From 8764ecd0c14f9f3f083b84de893ab8e6f71100bc Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 10 May 2025 21:09:16 +1000 Subject: [PATCH 1/5] Add a searchable tag `PTR_LEN_STR` to explain `*const c_uchar` bindings This module comment describes why it's OK for LLVM bindings to declare a parameter type of `*const c_uchar` for pointer/length strings, even though the corresponding parameter on the C/C++ side uses `const char *`. Adding a searchable term to each such parameter should make it easier for future maintainers to understand why `*const c_uchar` is being used instead of `*const c_char`. --- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index a249cb86ed42e..feaae19f0cd1c 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1,7 +1,7 @@ //! Bindings to the LLVM-C API (`LLVM*`), and to our own `extern "C"` wrapper //! functions around the unstable LLVM C++ API (`LLVMRust*`). //! -//! ## Passing pointer/length strings as `*const c_uchar` +//! ## Passing pointer/length strings as `*const c_uchar` (PTR_LEN_STR) //! //! Normally it's a good idea for Rust-side bindings to match the corresponding //! C-side function declarations as closely as possible. But when passing `&str` @@ -1766,7 +1766,7 @@ unsafe extern "C" { pub(crate) fn LLVMDIBuilderCreateNameSpace<'ll>( Builder: &DIBuilder<'ll>, ParentScope: Option<&'ll Metadata>, - Name: *const c_uchar, + Name: *const c_uchar, // See "PTR_LEN_STR". NameLen: size_t, ExportSymbols: llvm::Bool, ) -> &'ll Metadata; From d1bb310a7aa54b4bfc68c6960e1de0976ff447a5 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 10 May 2025 18:54:45 +1000 Subject: [PATCH 2/5] Use `LLVMGetInlineAsm` This LLVM-C binding replaces the existing `LLVMRustInlineAsm` function. --- compiler/rustc_codegen_llvm/src/asm.rs | 6 ++--- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 27 ++++++++++--------- .../rustc_llvm/llvm-wrapper/RustWrapper.cpp | 27 ------------------- 3 files changed, 17 insertions(+), 43 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index e481b99afcc67..c3851dc7762b5 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -488,11 +488,11 @@ pub(crate) fn inline_asm_call<'ll>( debug!("constraint verification result: {:?}", constraints_ok); if constraints_ok { let v = unsafe { - llvm::LLVMRustInlineAsm( + llvm::LLVMGetInlineAsm( fty, - asm.as_c_char_ptr(), + asm.as_ptr(), asm.len(), - cons.as_c_char_ptr(), + cons.as_ptr(), cons.len(), volatile, alignstack, diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index feaae19f0cd1c..2b71e8954bbda 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -471,7 +471,7 @@ pub(crate) enum MetadataType { MD_kcfi_type = 36, } -/// LLVMRustAsmDialect +/// Must match the layout of `LLVMInlineAsmDialect`. #[derive(Copy, Clone, PartialEq)] #[repr(C)] pub(crate) enum AsmDialect { @@ -1017,6 +1017,19 @@ unsafe extern "C" { /// See Module::setModuleInlineAsm. pub(crate) fn LLVMAppendModuleInlineAsm(M: &Module, Asm: *const c_char, Len: size_t); + /// Create the specified uniqued inline asm string. See `InlineAsm::get()`. + pub(crate) fn LLVMGetInlineAsm<'ll>( + Ty: &'ll Type, + AsmString: *const c_uchar, // See "PTR_LEN_STR". + AsmStringSize: size_t, + Constraints: *const c_uchar, // See "PTR_LEN_STR". + ConstraintsSize: size_t, + HasSideEffects: llvm::Bool, + IsAlignStack: llvm::Bool, + Dialect: AsmDialect, + CanThrow: llvm::Bool, + ) -> &'ll Value; + // Operations on integer types pub(crate) fn LLVMInt1TypeInContext(C: &Context) -> &Type; pub(crate) fn LLVMInt8TypeInContext(C: &Context) -> &Type; @@ -1994,18 +2007,6 @@ unsafe extern "C" { /// Prints the statistics collected by `-Zprint-codegen-stats`. pub(crate) fn LLVMRustPrintStatistics(OutStr: &RustString); - /// Prepares inline assembly. - pub(crate) fn LLVMRustInlineAsm( - Ty: &Type, - AsmString: *const c_char, - AsmStringLen: size_t, - Constraints: *const c_char, - ConstraintsLen: size_t, - SideEffects: Bool, - AlignStack: Bool, - Dialect: AsmDialect, - CanThrow: Bool, - ) -> &Value; pub(crate) fn LLVMRustInlineAsmVerify( Ty: &Type, Constraints: *const c_char, diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 72369ab7b692a..90aa9188c8300 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -622,37 +622,10 @@ extern "C" LLVMValueRef LLVMRustBuildAtomicStore(LLVMBuilderRef B, return wrap(SI); } -enum class LLVMRustAsmDialect { - Att, - Intel, -}; - -static InlineAsm::AsmDialect fromRust(LLVMRustAsmDialect Dialect) { - switch (Dialect) { - case LLVMRustAsmDialect::Att: - return InlineAsm::AD_ATT; - case LLVMRustAsmDialect::Intel: - return InlineAsm::AD_Intel; - default: - report_fatal_error("bad AsmDialect."); - } -} - extern "C" uint64_t LLVMRustGetArrayNumElements(LLVMTypeRef Ty) { return unwrap(Ty)->getArrayNumElements(); } -extern "C" LLVMValueRef -LLVMRustInlineAsm(LLVMTypeRef Ty, char *AsmString, size_t AsmStringLen, - char *Constraints, size_t ConstraintsLen, - LLVMBool HasSideEffects, LLVMBool IsAlignStack, - LLVMRustAsmDialect Dialect, LLVMBool CanThrow) { - return wrap(InlineAsm::get( - unwrap(Ty), StringRef(AsmString, AsmStringLen), - StringRef(Constraints, ConstraintsLen), HasSideEffects, IsAlignStack, - fromRust(Dialect), CanThrow)); -} - extern "C" bool LLVMRustInlineAsmVerify(LLVMTypeRef Ty, char *Constraints, size_t ConstraintsLen) { // llvm::Error converts to true if it is an error. From b1094f6a0a489bb1bc2be6ca17d2bec269bd9364 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 10 May 2025 18:26:57 +1000 Subject: [PATCH 3/5] Add a safe wrapper for `LLVMAppendModuleInlineAsm` This patch also changes the Rust-side declaration to take `*const c_uchar` instead of `*const c_char`, to avoid the need for `AsCCharPtr`. --- compiler/rustc_codegen_llvm/src/asm.rs | 8 +------- compiler/rustc_codegen_llvm/src/back/write.rs | 4 ++-- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 8 ++++++-- compiler/rustc_codegen_llvm/src/llvm/mod.rs | 8 ++++++++ 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index c3851dc7762b5..15bd1f6901dc6 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -435,13 +435,7 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> { template_str.push_str("\n.att_syntax\n"); } - unsafe { - llvm::LLVMAppendModuleInlineAsm( - self.llmod, - template_str.as_c_char_ptr(), - template_str.len(), - ); - } + llvm::append_module_inline_asm(self.llmod, template_str.as_bytes()); } fn mangled_name(&self, instance: Instance<'tcx>) -> String { diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 4ac77c8f7f165..20721c7460878 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -1148,9 +1148,9 @@ unsafe fn embed_bitcode( // We need custom section flags, so emit module-level inline assembly. let section_flags = if cgcx.is_pe_coff { "n" } else { "e" }; let asm = create_section_with_flags_asm(".llvmbc", section_flags, bitcode); - llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_c_char_ptr(), asm.len()); + llvm::append_module_inline_asm(llmod, &asm); let asm = create_section_with_flags_asm(".llvmcmd", section_flags, cmdline.as_bytes()); - llvm::LLVMAppendModuleInlineAsm(llmod, asm.as_c_char_ptr(), asm.len()); + llvm::append_module_inline_asm(llmod, &asm); } } } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 2b71e8954bbda..507a29c15c8d0 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1014,8 +1014,12 @@ unsafe extern "C" { pub(crate) fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char; pub(crate) fn LLVMSetDataLayout(M: &Module, Triple: *const c_char); - /// See Module::setModuleInlineAsm. - pub(crate) fn LLVMAppendModuleInlineAsm(M: &Module, Asm: *const c_char, Len: size_t); + /// Append inline assembly to a module. See `Module::appendModuleInlineAsm`. + pub(crate) fn LLVMAppendModuleInlineAsm( + M: &Module, + Asm: *const c_uchar, // See "PTR_LEN_STR". + Len: size_t, + ); /// Create the specified uniqued inline asm string. See `InlineAsm::get()`. pub(crate) fn LLVMGetInlineAsm<'ll>( diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs index d14aab060731a..7dbcfb508cefc 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs @@ -440,3 +440,11 @@ pub(crate) fn set_dso_local<'ll>(v: &'ll Value) { LLVMRustSetDSOLocal(v, true); } } + +/// Safe wrapper for `LLVMAppendModuleInlineAsm`, which delegates to +/// `Module::appendModuleInlineAsm`. +pub(crate) fn append_module_inline_asm<'ll>(llmod: &'ll Module, asm: &[u8]) { + unsafe { + LLVMAppendModuleInlineAsm(llmod, asm.as_ptr(), asm.len()); + } +} From b6300294a852f9a14ab9eb1f706d4a966aeb18ed Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sun, 11 May 2025 14:09:50 +1000 Subject: [PATCH 4/5] Make `LLVMRustInlineAsmVerify` take `*const c_uchar` This avoids the need for an explicit `as_c_char_ptr` conversion. --- compiler/rustc_codegen_llvm/src/asm.rs | 5 ++--- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index 15bd1f6901dc6..c0b75fa43b724 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -14,7 +14,7 @@ use smallvec::SmallVec; use tracing::debug; use crate::builder::Builder; -use crate::common::{AsCCharPtr, Funclet}; +use crate::common::Funclet; use crate::context::CodegenCx; use crate::type_::Type; use crate::type_of::LayoutLlvmExt; @@ -477,8 +477,7 @@ pub(crate) fn inline_asm_call<'ll>( debug!("Asm Output Type: {:?}", output); let fty = bx.cx.type_func(&argtys, output); // Ask LLVM to verify that the constraints are well-formed. - let constraints_ok = - unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_c_char_ptr(), cons.len()) }; + let constraints_ok = unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_ptr(), cons.len()) }; debug!("constraint verification result: {:?}", constraints_ok); if constraints_ok { let v = unsafe { diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 507a29c15c8d0..67a66e6ec795f 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2013,7 +2013,7 @@ unsafe extern "C" { pub(crate) fn LLVMRustInlineAsmVerify( Ty: &Type, - Constraints: *const c_char, + Constraints: *const c_uchar, // See "PTR_LEN_STR". ConstraintsLen: size_t, ) -> bool; From eccf0647d3dfcef826a40fdcc7cde279ea154eaf Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 10 May 2025 21:25:17 +1000 Subject: [PATCH 5/5] Flatten control-flow in `inline_asm_call` after verification --- compiler/rustc_codegen_llvm/src/asm.rs | 105 +++++++++++++------------ 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index c0b75fa43b724..9e3893d5314ae 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -476,66 +476,67 @@ pub(crate) fn inline_asm_call<'ll>( debug!("Asm Output Type: {:?}", output); let fty = bx.cx.type_func(&argtys, output); + // Ask LLVM to verify that the constraints are well-formed. let constraints_ok = unsafe { llvm::LLVMRustInlineAsmVerify(fty, cons.as_ptr(), cons.len()) }; debug!("constraint verification result: {:?}", constraints_ok); - if constraints_ok { - let v = unsafe { - llvm::LLVMGetInlineAsm( - fty, - asm.as_ptr(), - asm.len(), - cons.as_ptr(), - cons.len(), - volatile, - alignstack, - dia, - can_throw, - ) - }; + if !constraints_ok { + // LLVM has detected an issue with our constraints, so bail out. + return None; + } - let call = if !labels.is_empty() { - assert!(catch_funclet.is_none()); - bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None) - } else if let Some((catch, funclet)) = catch_funclet { - bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None) - } else { - bx.call(fty, None, None, v, inputs, None, None) - }; + let v = unsafe { + llvm::LLVMGetInlineAsm( + fty, + asm.as_ptr(), + asm.len(), + cons.as_ptr(), + cons.len(), + volatile, + alignstack, + dia, + can_throw, + ) + }; - // Store mark in a metadata node so we can map LLVM errors - // back to source locations. See #17552. - let key = "srcloc"; - let kind = bx.get_md_kind_id(key); + let call = if !labels.is_empty() { + assert!(catch_funclet.is_none()); + bx.callbr(fty, None, None, v, inputs, dest.unwrap(), labels, None, None) + } else if let Some((catch, funclet)) = catch_funclet { + bx.invoke(fty, None, None, v, inputs, dest.unwrap(), catch, funclet, None) + } else { + bx.call(fty, None, None, v, inputs, None, None) + }; - // `srcloc` contains one 64-bit integer for each line of assembly code, - // where the lower 32 bits hold the lo byte position and the upper 32 bits - // hold the hi byte position. - let mut srcloc = vec![]; - if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 { - // LLVM inserts an extra line to add the ".intel_syntax", so add - // a dummy srcloc entry for it. - // - // Don't do this if we only have 1 line span since that may be - // due to the asm template string coming from a macro. LLVM will - // default to the first srcloc for lines that don't have an - // associated srcloc. - srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0))); - } - srcloc.extend(line_spans.iter().map(|span| { - llvm::LLVMValueAsMetadata( - bx.const_u64(u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32)), - ) - })); - let md = unsafe { llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len()) }; - let md = bx.get_metadata_value(md); - llvm::LLVMSetMetadata(call, kind, md); + // Store mark in a metadata node so we can map LLVM errors + // back to source locations. See #17552. + let key = "srcloc"; + let kind = bx.get_md_kind_id(key); - Some(call) - } else { - // LLVM has detected an issue with our constraints, bail out - None + // `srcloc` contains one 64-bit integer for each line of assembly code, + // where the lower 32 bits hold the lo byte position and the upper 32 bits + // hold the hi byte position. + let mut srcloc = vec![]; + if dia == llvm::AsmDialect::Intel && line_spans.len() > 1 { + // LLVM inserts an extra line to add the ".intel_syntax", so add + // a dummy srcloc entry for it. + // + // Don't do this if we only have 1 line span since that may be + // due to the asm template string coming from a macro. LLVM will + // default to the first srcloc for lines that don't have an + // associated srcloc. + srcloc.push(llvm::LLVMValueAsMetadata(bx.const_u64(0))); } + srcloc.extend(line_spans.iter().map(|span| { + llvm::LLVMValueAsMetadata( + bx.const_u64(u64::from(span.lo().to_u32()) | (u64::from(span.hi().to_u32()) << 32)), + ) + })); + let md = unsafe { llvm::LLVMMDNodeInContext2(bx.llcx, srcloc.as_ptr(), srcloc.len()) }; + let md = bx.get_metadata_value(md); + llvm::LLVMSetMetadata(call, kind, md); + + Some(call) } /// If the register is an xmm/ymm/zmm register then return its index.