diff --git a/Cargo.lock b/Cargo.lock index 69137192..422075db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -344,9 +344,9 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.12" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", "windows-sys 0.59.0", diff --git a/src/bin/bpf-linker.rs b/src/bin/bpf-linker.rs index 635f8a3a..06f8f24d 100644 --- a/src/bin/bpf-linker.rs +++ b/src/bin/bpf-linker.rs @@ -14,7 +14,7 @@ use std::{ feature = "rust-llvm-21" ))] use aya_rustc_llvm_proxy as _; -use bpf_linker::{Cpu, Linker, LinkerOptions, OptLevel, OutputType}; +use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType}; use clap::{ builder::{PathBufValueParser, TypedValueParser as _}, error::ErrorKind, @@ -117,9 +117,9 @@ struct CommandLine { #[clap(long)] allow_bpf_trap: bool, - /// Add a directory to the library search path + /// UNUSED: it only exists for compatibility with rustc #[clap(short = 'L', number_of_values = 1)] - libs: Vec, + _libs: Vec, /// Optimization level. 0-3, s, or z #[clap(short = 'O', default_value = "2")] @@ -212,7 +212,6 @@ fn main() -> anyhow::Result<()> { emit, btf, allow_bpf_trap, - libs, optimize, export_symbols, log_file, @@ -227,6 +226,7 @@ fn main() -> anyhow::Result<()> { export, fatal_errors, _debug, + _libs, } = match Parser::try_parse_from(args) { Ok(command_line) => command_line, Err(err) => match err.kind() { @@ -271,15 +271,11 @@ fn main() -> anyhow::Result<()> { let export_symbols = export_symbols.map(fs::read_to_string).transpose()?; - // TODO: the data is owned by this call frame; we could make this zero-alloc. let export_symbols = export_symbols .as_deref() .into_iter() .flat_map(str::lines) - .map(str::to_owned) - .chain(export) - .map(Into::into) - .collect(); + .chain(export.iter().map(String::as_str)); let output_type = match *emit.as_slice() { [] => unreachable!("emit has a default value"), @@ -294,15 +290,9 @@ fn main() -> anyhow::Result<()> { target, cpu, cpu_features, - inputs, - output, - output_type, - libs, optimize, - export_symbols, unroll_loops, ignore_inline_never, - dump_module, llvm_args, disable_expand_memcpy_in_order, disable_memory_builtins, @@ -310,7 +300,15 @@ fn main() -> anyhow::Result<()> { allow_bpf_trap, }); - linker.link()?; + if let Some(path) = dump_module { + linker.set_dump_module_path(path); + } + + let inputs = inputs + .iter() + .map(|p| LinkerInput::new_from_file(p.as_path())); + + linker.link_to_file(inputs, &output, output_type, export_symbols)?; if fatal_errors && linker.has_errors() { return Err(anyhow::anyhow!( diff --git a/src/linker.rs b/src/linker.rs index 9962dd57..489e22f0 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -3,29 +3,22 @@ use std::{ collections::HashSet, ffi::{CStr, CString, OsStr}, fs::File, - io::{self, Read, Seek as _}, + io::{self, Read, Seek}, + ops::Deref, os::unix::ffi::OsStrExt as _, path::{Path, PathBuf}, - pin::Pin, - ptr, str::{self, FromStr}, }; use ar::Archive; use llvm_sys::{ - bit_writer::LLVMWriteBitcodeToFile, - core::{ - LLVMContextCreate, LLVMContextDispose, LLVMContextSetDiagnosticHandler, LLVMDisposeModule, - LLVMGetTarget, - }, error_handling::{LLVMEnablePrettyStackTrace, LLVMInstallFatalErrorHandler}, - prelude::{LLVMContextRef, LLVMModuleRef}, - target_machine::{LLVMCodeGenFileType, LLVMDisposeTargetMachine, LLVMTargetMachineRef}, + target_machine::LLVMCodeGenFileType, }; use thiserror::Error; use tracing::{debug, error, info, warn}; -use crate::llvm; +use crate::llvm::{self, LLVMContext, LLVMModule, LLVMTargetMachine, MemoryBuffer}; /// Linker error #[derive(Debug, Error)] @@ -63,8 +56,8 @@ pub enum LinkerError { EmitCodeError(String), /// Writing the bitcode failed. - #[error("LLVMWriteBitcodeToFile failed")] - WriteBitcodeError, + #[error("LLVMWriteBitcodeToFile failed: {0}")] + WriteBitcodeError(io::Error), /// Writing the LLVM IR failed. #[error("LLVMPrintModuleToFile failed: {0}")] @@ -77,6 +70,10 @@ pub enum LinkerError { /// The input object file does not have embedded bitcode. #[error("no bitcode section found in {0}")] MissingBitcodeSection(PathBuf), + + /// LLVM cannot create a module for linking. + #[error("failed to create module")] + CreateModuleError, } /// BPF Cpu type @@ -145,6 +142,59 @@ pub enum OptLevel { SizeMin, } +pub struct FileInput<'a> { + path: &'a Path, +} + +pub struct BufferInput<'a> { + name: &'a str, + bytes: &'a [u8], +} + +pub enum LinkerInput<'a> { + File(FileInput<'a>), + Buffer(BufferInput<'a>), +} + +impl<'a> LinkerInput<'a> { + pub fn new_from_file(path: &'a Path) -> Self { + LinkerInput::File(FileInput { path }) + } + + pub fn new_from_buffer(name: &'a str, bytes: &'a [u8]) -> Self { + LinkerInput::Buffer(BufferInput { name, bytes }) + } +} + +enum InputReader<'a> { + File { + path: &'a Path, + file: File, + }, + Buffer { + name: &'a str, + cursor: io::Cursor<&'a [u8]>, + }, +} + +impl Seek for InputReader<'_> { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + match self { + InputReader::File { file, .. } => file.seek(pos), + InputReader::Buffer { cursor, .. } => cursor.seek(pos), + } + } +} + +impl Read for InputReader<'_> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + InputReader::File { file, .. } => file.read(buf), + InputReader::Buffer { cursor, .. } => cursor.read(buf), + } + } +} + /// Linker input type #[derive(Clone, Copy, Debug, PartialEq)] enum InputType { @@ -196,24 +246,13 @@ pub struct LinkerOptions { pub cpu: Cpu, /// Cpu features. pub cpu_features: CString, - /// Input files. Can be bitcode, object files with embedded bitcode or archive files. - pub inputs: Vec, - /// Where to save the output. - pub output: PathBuf, - /// The format to output. - pub output_type: OutputType, - pub libs: Vec, /// Optimization level. pub optimize: OptLevel, - /// Set of symbol names to export. - pub export_symbols: HashSet>, /// Whether to aggressively unroll loops. Useful for older kernels that don't support loops. pub unroll_loops: bool, /// Remove `noinline` attributes from functions. Useful for kernels before 5.8 that don't /// support function calls. pub ignore_inline_never: bool, - /// Write the linked module IR before and after optimization. - pub dump_module: Option, /// Extra command line args to pass to LLVM. pub llvm_args: Vec, /// Disable passing --bpf-expand-memcpy-in-order to LLVM. @@ -232,367 +271,558 @@ pub struct LinkerOptions { /// BPF Linker pub struct Linker { options: LinkerOptions, - context: LLVMContextRef, - module: LLVMModuleRef, - target_machine: LLVMTargetMachineRef, - diagnostic_handler: Pin>, + context: LLVMContext, + diagnostic_handler: llvm::InstalledDiagnosticHandler, + dump_module: Option, } impl Linker { /// Create a new linker instance with the given options. pub fn new(options: LinkerOptions) -> Self { + let (context, diagnostic_handler) = llvm_init(&options); + Self { options, - context: ptr::null_mut(), - module: ptr::null_mut(), - target_machine: ptr::null_mut(), - diagnostic_handler: Box::pin(DiagnosticHandler::default()), + context, + diagnostic_handler, + dump_module: None, } } + /// Set the directory where the linker will dump the linked LLVM IR before and after + /// optimization, for debugging and inspection purposes. + /// + /// When set: + /// - The directory is created if it does not already exist. + /// - A "pre-opt.ll" file is written with the IR before optimization. + /// - A "post-opt.ll" file is written with the IR after optimization. + pub fn set_dump_module_path(&mut self, path: impl AsRef) { + self.dump_module = Some(path.as_ref().to_path_buf()) + } + + /// Link and generate the output code to file. + /// + /// # Example + /// + /// ```rust,no_run + /// # use std::{collections::HashSet, path::Path, borrow::Cow, ffi::CString}; + /// # use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType}; + /// # fn main() -> Result<(), Box> { + /// let path = Path::new("/path/to/object-or-bitcode"); + /// let bytes: &[u8] = &[]; // An in memory object/bitcode + /// # let options = LinkerOptions { + /// # target: None, + /// # cpu: Cpu::Generic, + /// # cpu_features: CString::default(), + /// # optimize: OptLevel::Default, + /// # unroll_loops: false, + /// # ignore_inline_never: false, + /// # llvm_args: vec![], + /// # disable_expand_memcpy_in_order: false, + /// # disable_memory_builtins: false, + /// # allow_bpf_trap: false, + /// # btf: false, + /// # }; + /// # let linker = Linker::new(options); + /// + /// let export_symbols = ["my_sym_1", "my_sym_2"]; + /// + /// linker.link_to_file( + /// [ + /// LinkerInput::new_from_file(path), + /// LinkerInput::new_from_buffer("my buffer", bytes), // In memory buffer needs a name + /// ], + /// "/path/to/output", + /// OutputType::Object, + /// export_symbols, + /// )?; + /// # Ok(()) + /// # } + /// ``` + pub fn link_to_file<'i, 'a, I, P, E>( + &self, + inputs: I, + output: P, + output_type: OutputType, + export_symbols: E, + ) -> Result<(), LinkerError> + where + I: IntoIterator>, + E: IntoIterator, + P: AsRef, + { + let (linked_module, target_machine) = self.link(inputs, export_symbols)?; + codegen_to_file( + &linked_module, + &target_machine, + output.as_ref(), + output_type, + )?; + Ok(()) + } + + /// Link and generate the output code to an in-memory buffer. + /// + /// # Example + /// + /// ```rust,no_run + /// # use std::{collections::HashSet, path::Path, borrow::Cow, ffi::CString}; + /// # use bpf_linker::{Cpu, Linker, LinkerInput, LinkerOptions, OptLevel, OutputType}; + /// # fn main() -> Result<(), Box> { + /// let path = Path::new("/path/to/object-or-bitcode"); + /// let bytes: &[u8] = &[]; // An in memory object/bitcode + /// # let options = LinkerOptions { + /// # target: None, + /// # cpu: Cpu::Generic, + /// # cpu_features: CString::default(), + /// # optimize: OptLevel::Default, + /// # unroll_loops: false, + /// # ignore_inline_never: false, + /// # llvm_args: vec![], + /// # disable_expand_memcpy_in_order: false, + /// # disable_memory_builtins: false, + /// # allow_bpf_trap: false, + /// # btf: false, + /// # }; + /// # let linker = Linker::new(options); + /// + /// let export_symbols = ["my_sym_1", "my_sym_2"]; + /// + /// let out_buf = linker.link_to_buffer( + /// [ + /// LinkerInput::new_from_file(path), + /// LinkerInput::new_from_buffer("my buffer", bytes), // In memory buffer needs a name + /// ], + /// OutputType::Bitcode, + /// export_symbols, + /// )?; + /// + /// // Use the buffer as slice of u8 + /// let bytes = out_buf.as_slice(); + /// println!("Linked {} bytes into memory)", bytes.len()); + /// + /// # Ok(()) + /// # } + /// ``` + pub fn link_to_buffer<'i, 'a, I, E>( + &self, + inputs: I, + output_type: OutputType, + export_symbols: E, + ) -> Result + where + I: IntoIterator>, + E: IntoIterator, + { + let (linked_module, target_machine) = self.link(inputs, export_symbols)?; + codegen_to_buffer(&linked_module, &target_machine, output_type) + } + /// Link and generate the output code. - pub fn link(&mut self) -> Result<(), LinkerError> { - self.llvm_init(); - self.link_modules()?; - self.create_target_machine()?; - if let Some(path) = &self.options.dump_module { - std::fs::create_dir_all(path).map_err(|err| LinkerError::IoError(path.clone(), err))?; + fn link<'ctx, 'i, 'a, I, E>( + &'ctx self, + inputs: I, + export_symbols: E, + ) -> Result<(LLVMModule<'ctx>, LLVMTargetMachine), LinkerError> + where + I: IntoIterator>, + E: IntoIterator, + { + let Self { + options, + context, + dump_module, + .. + } = self; + + let inputs = inputs + .into_iter() + .map(|value| match value { + LinkerInput::File(file_input) => { + let FileInput { path } = file_input; + + let file = File::open(path) + .map_err(|err| LinkerError::IoError(path.to_owned(), err))?; + Ok(InputReader::File { path, file }) + } + LinkerInput::Buffer(buffer_input) => { + let BufferInput { name, bytes } = buffer_input; + + Ok(InputReader::Buffer { + name, + cursor: io::Cursor::new(bytes), + }) + } + }) + .collect::, _>>()?; + + let mut module = link_modules(context, inputs)?; + + let target_machine = create_target_machine(options, &module)?; + + if let Some(path) = dump_module { + std::fs::create_dir_all(path) + .map_err(|err| LinkerError::IoError(path.to_owned(), err))?; } - if let Some(path) = &self.options.dump_module { + if let Some(path) = dump_module { // dump IR before optimization let path = path.join("pre-opt.ll"); - let path = CString::new(path.as_os_str().as_bytes()).unwrap(); - self.write_ir(&path)?; + let path = CString::new(path.as_os_str().as_encoded_bytes()).unwrap(); + module + .write_ir_to_path(&path) + .map_err(LinkerError::WriteIRError)?; }; - self.optimize()?; - if let Some(path) = &self.options.dump_module { + optimize( + options, + context, + &target_machine, + &mut module, + export_symbols, + )?; + if let Some(path) = dump_module { // dump IR before optimization let path = path.join("post-opt.ll"); - let path = CString::new(path.as_os_str().as_bytes()).unwrap(); - self.write_ir(&path)?; + let path = CString::new(path.as_os_str().as_encoded_bytes()).unwrap(); + module + .write_ir_to_path(&path) + .map_err(LinkerError::WriteIRError)?; }; - self.codegen()?; - Ok(()) + + Ok((module, target_machine)) } pub fn has_errors(&self) -> bool { - self.diagnostic_handler.has_errors + self.diagnostic_handler.with_view(|h| h.has_errors) } +} - fn link_modules(&mut self) -> Result<(), LinkerError> { - // buffer used to perform file type detection - let mut buf = [0u8; 8]; - for path in self.options.inputs.clone() { - let mut file = File::open(&path).map_err(|e| LinkerError::IoError(path.clone(), e))?; - - // determine whether the input is bitcode, ELF with embedded bitcode, an archive file - // or an invalid file - file.read_exact(&mut buf) - .map_err(|e| LinkerError::IoError(path.clone(), e))?; - file.rewind() - .map_err(|e| LinkerError::IoError(path.clone(), e))?; - let in_type = detect_input_type(&buf) - .ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?; - - match in_type { - InputType::Archive => { - info!("linking archive {:?}", path); - - // Extract the archive and call link_reader() for each item. - let mut archive = Archive::new(file); - while let Some(Ok(item)) = archive.next_entry() { - let name = PathBuf::from(OsStr::from_bytes(item.header().identifier())); - info!("linking archive item {:?}", name); - - match self.link_reader(&name, item, None) { - Ok(_) => continue, - Err(LinkerError::InvalidInputType(_)) => { - info!("ignoring archive item {:?}: invalid type", name); - continue; - } - Err(LinkerError::MissingBitcodeSection(_)) => { - warn!("ignoring archive item {:?}: no embedded bitcode", name); - continue; - } - Err(_) => return Err(LinkerError::LinkArchiveModuleError(path, name)), - }; - } - } - ty => { - info!("linking file {:?} type {}", path, ty); - match self.link_reader(&path, file, Some(ty)) { - Ok(_) => {} +fn link_modules<'ctx, 'i, I>( + context: &'ctx LLVMContext, + inputs: I, +) -> Result, LinkerError> +where + I: IntoIterator>, +{ + let mut module = context + .create_module(c"linked_module") + .ok_or(LinkerError::CreateModuleError)?; + + // buffer used to perform file type detection + let mut buf = [0u8; 8]; + for mut input in inputs { + let path = match input { + InputReader::File { path, .. } => path.into(), + InputReader::Buffer { name, .. } => PathBuf::from(format!("in_memory::{}", name)), + }; + + // determine whether the input is bitcode, ELF with embedded bitcode, an archive file + // or an invalid file + input + .read_exact(&mut buf) + .map_err(|e| LinkerError::IoError(path.clone(), e))?; + input + .rewind() + .map_err(|e| LinkerError::IoError(path.clone(), e))?; + let in_type = + detect_input_type(&buf).ok_or_else(|| LinkerError::InvalidInputType(path.clone()))?; + + match in_type { + InputType::Archive => { + info!("linking archive {:?}", path); + + // Extract the archive and call link_reader() for each item. + let mut archive = Archive::new(input); + while let Some(Ok(item)) = archive.next_entry() { + let name = PathBuf::from(OsStr::from_bytes(item.header().identifier())); + info!("linking archive item {:?}", name); + + match link_reader(context, &mut module, &name, item, None) { + Ok(_) => continue, Err(LinkerError::InvalidInputType(_)) => { - info!("ignoring file {:?}: invalid type", path); + info!("ignoring archive item {:?}: invalid type", name); continue; } Err(LinkerError::MissingBitcodeSection(_)) => { - warn!("ignoring file {:?}: no embedded bitcode", path); + warn!("ignoring archive item {:?}: no embedded bitcode", name); + continue; } - err => return err, + Err(_) => return Err(LinkerError::LinkArchiveModuleError(path, name)), + }; + } + } + ty => { + info!("linking file {:?} type {}", path, ty); + match link_reader(context, &mut module, &path, input, Some(ty)) { + Ok(_) => {} + Err(LinkerError::InvalidInputType(_)) => { + info!("ignoring file {:?}: invalid type", path); + continue; + } + Err(LinkerError::MissingBitcodeSection(_)) => { + warn!("ignoring file {:?}: no embedded bitcode", path); } + Err(err) => return Err(err), } } } - - Ok(()) } - // link in a `Read`-er, which can be a file or an archive item - fn link_reader( - &mut self, - path: &Path, - mut reader: impl Read, - in_type: Option, - ) -> Result<(), LinkerError> { - let mut data = Vec::new(); - let _: usize = reader - .read_to_end(&mut data) - .map_err(|e| LinkerError::IoError(path.to_owned(), e))?; - // in_type is unknown when we're linking an item from an archive file - let in_type = in_type - .or_else(|| detect_input_type(&data)) - .ok_or_else(|| LinkerError::InvalidInputType(path.to_owned()))?; - - let bitcode = match in_type { - InputType::Bitcode => data, - InputType::Elf => match llvm::find_embedded_bitcode(self.context, &data) { - Ok(Some(bitcode)) => bitcode, - Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())), - Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)), - }, - // we need to handle this here since archive files could contain - // mach-o files, eg somecrate.rlib containing lib.rmeta which is - // mach-o on macos - InputType::MachO => return Err(LinkerError::InvalidInputType(path.to_owned())), - // this can't really happen - InputType::Archive => panic!("nested archives not supported duh"), - }; - - if !llvm::link_bitcode_buffer(self.context, self.module, &bitcode) { - return Err(LinkerError::LinkModuleError(path.to_owned())); - } + Ok(module) +} - Ok(()) +// link in a `Read`-er, which can be a file or an archive item +fn link_reader<'ctx>( + context: &'ctx LLVMContext, + module: &mut LLVMModule<'ctx>, + path: &Path, + mut reader: impl Read, + in_type: Option, +) -> Result<(), LinkerError> { + let mut data = Vec::new(); + let _: usize = reader + .read_to_end(&mut data) + .map_err(|e| LinkerError::IoError(path.to_owned(), e))?; + // in_type is unknown when we're linking an item from an archive file + let in_type = in_type + .or_else(|| detect_input_type(&data)) + .ok_or_else(|| LinkerError::InvalidInputType(path.to_owned()))?; + + let bitcode = match in_type { + InputType::Bitcode => data, + InputType::Elf => match llvm::find_embedded_bitcode(context, &data) { + Ok(Some(bitcode)) => bitcode, + Ok(None) => return Err(LinkerError::MissingBitcodeSection(path.to_owned())), + Err(e) => return Err(LinkerError::EmbeddedBitcodeError(e)), + }, + // we need to handle this here since archive files could contain + // mach-o files, eg somecrate.rlib containing lib.rmeta which is + // mach-o on macos + InputType::MachO => return Err(LinkerError::InvalidInputType(path.to_owned())), + // this can't really happen + InputType::Archive => panic!("nested archives not supported duh"), + }; + + if !llvm::link_bitcode_buffer(context, module, &bitcode) { + return Err(LinkerError::LinkModuleError(path.to_owned())); } - fn create_target_machine(&mut self) -> Result<(), LinkerError> { - let Self { - options: - LinkerOptions { - target, - cpu, - cpu_features, - .. - }, - module, - target_machine, - .. - } = self; - // Here's how the output target is selected: - // - // 1) rustc with builtin BPF support: cargo build --target=bpf[el|eb]-unknown-none - // the input modules are already configured for the correct output target - // - // 2) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker -C link-arg=--target=bpf[el|eb] - // the input modules are configured for the *host* target, and the output target - // is configured with the `--target` linker argument - // - // 3) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker - // the input modules are configured for the *host* target, the output target isn't - // set via `--target`, so default to `bpf` (bpfel or bpfeb depending on the host - // endianness) - let (triple, target) = match target { - // case 1 - Some(c_triple) => (c_triple.as_c_str(), llvm::target_from_triple(c_triple)), - None => { - let c_triple = unsafe { LLVMGetTarget(*module) }; - let c_triple = unsafe { CStr::from_ptr(c_triple) }; - if c_triple.to_bytes().starts_with(b"bpf") { - // case 2 - (c_triple, llvm::target_from_module(*module)) - } else { - // case 3. - info!("detected non-bpf input target {:?} and no explicit output --target specified, selecting `bpf'", c_triple); - let c_triple = c"bpf"; - (c_triple, llvm::target_from_triple(c_triple)) - } - } - }; - let target = target - .map_err(|_msg| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; - - debug!( - "creating target machine: triple: {} cpu: {} features: {}", - triple.to_string_lossy(), - cpu, - cpu_features.to_string_lossy(), - ); - - *target_machine = llvm::create_target_machine(target, triple, cpu.as_c_str(), cpu_features) - .ok_or_else(|| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; - - Ok(()) - } + Ok(()) +} - fn optimize(&mut self) -> Result<(), LinkerError> { - if !self.options.disable_memory_builtins { - self.options.export_symbols.extend( - ["memcpy", "memmove", "memset", "memcmp", "bcmp"] - .into_iter() - .map(Into::into), - ); - }; - debug!( - "linking exporting symbols {:?}, opt level {:?}", - self.options.export_symbols, self.options.optimize - ); - // run optimizations. Will optionally remove noinline attributes, intern all non exported - // programs and maps and remove dead code. - - let export_symbols = self - .options - .export_symbols - .iter() - .map(|s| s.as_bytes().into()) - .collect(); - - if self.options.btf { - // if we want to emit BTF, we need to sanitize the debug information - llvm::DISanitizer::new(self.context, self.module).run(&export_symbols); - } else { - // if we don't need BTF emission, we can strip DI - let ok = llvm::strip_debug_info(self.module); - debug!("Stripping DI, changed={}", ok); +fn create_target_machine( + options: &LinkerOptions, + module: &LLVMModule<'_>, +) -> Result { + let LinkerOptions { + target, + cpu, + cpu_features, + .. + } = options; + // Here's how the output target is selected: + // + // 1) rustc with builtin BPF support: cargo build --target=bpf[el|eb]-unknown-none + // the input modules are already configured for the correct output target + // + // 2) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker -C link-arg=--target=bpf[el|eb] + // the input modules are configured for the *host* target, and the output target + // is configured with the `--target` linker argument + // + // 3) rustc with no BPF support: cargo rustc -- -C linker-flavor=bpf-linker -C linker=bpf-linker + // the input modules are configured for the *host* target, the output target isn't + // set via `--target`, so default to `bpf` (bpfel or bpfeb depending on the host + // endianness) + let (triple, target) = match target { + // case 1 + Some(c_triple) => (c_triple.as_c_str(), llvm::target_from_triple(c_triple)), + None => { + let c_triple = module.get_target(); + let c_triple = unsafe { CStr::from_ptr(c_triple) }; + if c_triple.to_bytes().starts_with(b"bpf") { + // case 2 + (c_triple, llvm::target_from_module(module)) + } else { + // case 3. + info!("detected non-bpf input target {:?} and no explicit output --target specified, selecting `bpf'", c_triple); + let c_triple = c"bpf"; + (c_triple, llvm::target_from_triple(c_triple)) + } } + }; + let target = + target.map_err(|_msg| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; - llvm::optimize( - self.target_machine, - self.module, - self.options.optimize, - self.options.ignore_inline_never, - &export_symbols, - ) - .map_err(LinkerError::OptimizeError)?; + debug!( + "creating target machine: triple: {} cpu: {} features: {}", + triple.to_string_lossy(), + cpu, + cpu_features.to_string_lossy(), + ); - Ok(()) - } + let target_machine = LLVMTargetMachine::new(target, triple, cpu.as_c_str(), cpu_features) + .ok_or_else(|| LinkerError::InvalidTarget(triple.to_string_lossy().to_string()))?; - fn codegen(&mut self) -> Result<(), LinkerError> { - let output = CString::new(self.options.output.as_os_str().as_bytes()).unwrap(); - match self.options.output_type { - OutputType::Bitcode => self.write_bitcode(&output), - OutputType::LlvmAssembly => self.write_ir(&output), - OutputType::Assembly => self.emit(&output, LLVMCodeGenFileType::LLVMAssemblyFile), - OutputType::Object => self.emit(&output, LLVMCodeGenFileType::LLVMObjectFile), - } + Ok(target_machine) +} + +fn optimize<'ctx, 'a, E>( + options: &LinkerOptions, + context: &'ctx LLVMContext, + target_machine: &LLVMTargetMachine, + module: &mut LLVMModule<'ctx>, + export_symbols: E, +) -> Result<(), LinkerError> +where + E: IntoIterator, +{ + let LinkerOptions { + disable_memory_builtins, + optimize, + btf, + ignore_inline_never, + .. + } = options; + + let mut export_symbols: HashSet> = export_symbols + .into_iter() + .map(|s| Cow::Borrowed(s.as_bytes())) + .collect(); + + if !disable_memory_builtins { + export_symbols.extend( + ["memcpy", "memmove", "memset", "memcmp", "bcmp"] + .into_iter() + .map(|s| s.as_bytes().into()), + ); + }; + debug!( + "linking exporting symbols {:?}, opt level {:?}", + export_symbols, optimize + ); + // run optimizations. Will optionally remove noinline attributes, intern all non exported + // programs and maps and remove dead code. + + if *btf { + // if we want to emit BTF, we need to sanitize the debug information + llvm::DISanitizer::new(context, module).run(&export_symbols); + } else { + // if we don't need BTF emission, we can strip DI + let ok = module.strip_debug_info(); + debug!("Stripping DI, changed={}", ok); } - fn write_bitcode(&mut self, output: &CStr) -> Result<(), LinkerError> { - info!("writing bitcode to {:?}", output); + llvm::optimize( + target_machine, + module, + options.optimize, + *ignore_inline_never, + &export_symbols, + ) + .map_err(LinkerError::OptimizeError)?; - if unsafe { LLVMWriteBitcodeToFile(self.module, output.as_ptr()) } == 1 { - return Err(LinkerError::WriteBitcodeError); - } + Ok(()) +} - Ok(()) +fn codegen_to_file( + module: &LLVMModule<'_>, + target_machine: &LLVMTargetMachine, + output: &Path, + output_type: OutputType, +) -> Result<(), LinkerError> { + info!("writing {:?} to {:?}", output_type, output); + let output = CString::new(output.as_os_str().as_encoded_bytes()).unwrap(); + match output_type { + OutputType::Bitcode => module + .write_bitcode_to_path(&output) + .map_err(LinkerError::WriteBitcodeError), + OutputType::LlvmAssembly => module + .write_ir_to_path(&output) + .map_err(LinkerError::WriteIRError), + OutputType::Assembly => target_machine + .emit_to_file(module, &output, LLVMCodeGenFileType::LLVMAssemblyFile) + .map_err(LinkerError::EmitCodeError), + OutputType::Object => target_machine + .emit_to_file(module, &output, LLVMCodeGenFileType::LLVMObjectFile) + .map_err(LinkerError::EmitCodeError), } +} - fn write_ir(&mut self, output: &CStr) -> Result<(), LinkerError> { - info!("writing IR to {:?}", output); +fn codegen_to_buffer( + module: &LLVMModule<'_>, + target_machine: &LLVMTargetMachine, + output_type: OutputType, +) -> Result { + let memory_buffer = match output_type { + OutputType::Bitcode => module.write_bitcode_to_memory(), + OutputType::LlvmAssembly => module.write_ir_to_memory(), + OutputType::Assembly => target_machine + .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMAssemblyFile) + .map_err(LinkerError::EmitCodeError)?, + OutputType::Object => target_machine + .emit_to_memory_buffer(module, LLVMCodeGenFileType::LLVMObjectFile) + .map_err(LinkerError::EmitCodeError)?, + }; + + Ok(LinkerOutput { + inner: memory_buffer, + }) +} - llvm::write_ir(self.module, output).map_err(LinkerError::WriteIRError) +fn llvm_init( + options: &LinkerOptions, +) -> ( + LLVMContext, + llvm::InstalledDiagnosticHandler, +) { + let mut args = Vec::>::new(); + args.push(c"bpf-linker".into()); + // Disable cold call site detection. Many accessors in aya-ebpf return Result + // where the layout is larger than 64 bits, but the LLVM BPF target only supports + // up to 64 bits return values. Since the accessors are tiny in terms of code, we + // avoid the issue by annotating them with #[inline(always)]. If they are classified + // as cold though - and they often are starting from LLVM17 - #[inline(always)] + // is ignored and the BPF target fails codegen. + args.push(c"--cold-callsite-rel-freq=0".into()); + if options.unroll_loops { + // setting cmdline arguments is the only way to customize the unroll pass with the + // C API. + args.extend([ + c"--unroll-runtime".into(), + c"--unroll-runtime-multi-exit".into(), + CString::new(format!("--unroll-max-upperbound={}", u32::MAX)) + .unwrap() + .into(), + CString::new(format!("--unroll-threshold={}", u32::MAX)) + .unwrap() + .into(), + ]); } + if !options.disable_expand_memcpy_in_order { + args.push(c"--bpf-expand-memcpy-in-order".into()); + } + if !options.allow_bpf_trap { + // TODO: Remove this once ksyms support is guaranteed. + // LLVM introduces __bpf_trap calls at points where __builtin_trap would normally be + // emitted. This is currently not supported by aya because __bpf_trap requires a .ksyms + // section, but this is not trivial to support. In the meantime, using this flag + // returns LLVM to the old behaviour, which did not introduce these calls and therefore + // does not require the .ksyms section. + args.push(c"--bpf-disable-trap-unreachable".into()); + } + args.extend(options.llvm_args.iter().map(Into::into)); + info!("LLVM command line: {:?}", args); + llvm::init(args.as_slice(), c"BPF linker"); - fn emit(&mut self, output: &CStr, output_type: LLVMCodeGenFileType) -> Result<(), LinkerError> { - info!("emitting {:?} to {:?}", output_type, output); + let mut context = LLVMContext::new(); - llvm::codegen(self.target_machine, self.module, output, output_type) - .map_err(LinkerError::EmitCodeError) - } + let diagnostic_handler = context.set_diagnostic_handler(DiagnosticHandler::default()); - fn llvm_init(&mut self) { - let mut args = Vec::>::new(); - args.push(c"bpf-linker".into()); - // Disable cold call site detection. Many accessors in aya-ebpf return Result - // where the layout is larger than 64 bits, but the LLVM BPF target only supports - // up to 64 bits return values. Since the accessors are tiny in terms of code, we - // avoid the issue by annotating them with #[inline(always)]. If they are classified - // as cold though - and they often are starting from LLVM17 - #[inline(always)] - // is ignored and the BPF target fails codegen. - args.push(c"--cold-callsite-rel-freq=0".into()); - if self.options.unroll_loops { - // setting cmdline arguments is the only way to customize the unroll pass with the - // C API. - args.extend([ - c"--unroll-runtime".into(), - c"--unroll-runtime-multi-exit".into(), - CString::new(format!("--unroll-max-upperbound={}", u32::MAX)) - .unwrap() - .into(), - CString::new(format!("--unroll-threshold={}", u32::MAX)) - .unwrap() - .into(), - ]); - } - if !self.options.disable_expand_memcpy_in_order { - args.push(c"--bpf-expand-memcpy-in-order".into()); - } - if !self.options.allow_bpf_trap { - // TODO: Remove this once ksyms support is guaranteed. - // LLVM introduces __bpf_trap calls at points where __builtin_trap would normally be - // emitted. This is currently not supported by aya because __bpf_trap requires a .ksyms - // section, but this is not trivial to support. In the meantime, using this flag - // returns LLVM to the old behaviour, which did not introduce these calls and therefore - // does not require the .ksyms section. - args.push(c"--bpf-disable-trap-unreachable".into()); - } - args.extend(self.options.llvm_args.iter().map(Into::into)); - info!("LLVM command line: {:?}", args); - llvm::init(args.as_slice(), c"BPF linker"); - - let context = unsafe { LLVMContextCreate() }; - self.context = context; - - unsafe { - let handler_ptr = { - // SAFETY: `diagnostic_handler` is pinned for the lifetime of `Linker`, and we use - // the mutable reference only to obtain a stable raw pointer for LLVM’s callback. - let handler = self.diagnostic_handler.as_mut().get_unchecked_mut(); - ptr::from_mut(handler).cast() - }; - LLVMContextSetDiagnosticHandler( - context, - Some(llvm::diagnostic_handler::), - handler_ptr, - ); - LLVMInstallFatalErrorHandler(Some(llvm::fatal_error)); - LLVMEnablePrettyStackTrace(); - } - self.module = llvm::create_module( - CString::new(self.options.output.file_stem().unwrap().as_bytes()) - .unwrap() - .as_c_str(), - context, - ) - .unwrap(); + unsafe { + LLVMInstallFatalErrorHandler(Some(llvm::fatal_error)); + LLVMEnablePrettyStackTrace(); } -} -impl Drop for Linker { - fn drop(&mut self) { - unsafe { - if !self.target_machine.is_null() { - LLVMDisposeTargetMachine(self.target_machine); - } - if !self.module.is_null() { - LLVMDisposeModule(self.module); - } - if !self.context.is_null() { - LLVMContextDispose(self.context); - } - } - } + (context, diagnostic_handler) } #[derive(Default)] @@ -655,3 +885,27 @@ fn detect_input_type(data: &[u8]) -> Option { } } } + +pub struct LinkerOutput { + inner: MemoryBuffer, +} + +impl LinkerOutput { + pub fn as_slice(&self) -> &[u8] { + self.inner.as_slice() + } +} + +impl AsRef<[u8]> for LinkerOutput { + fn as_ref(&self) -> &[u8] { + self.as_slice() + } +} + +impl Deref for LinkerOutput { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + self.as_slice() + } +} diff --git a/src/llvm/di.rs b/src/llvm/di.rs index 21505a3c..6f405b22 100644 --- a/src/llvm/di.rs +++ b/src/llvm/di.rs @@ -3,6 +3,7 @@ use std::{ collections::{hash_map::DefaultHasher, HashMap, HashSet}, hash::Hasher as _, io::Write as _, + marker::PhantomData, ptr, }; @@ -14,20 +15,22 @@ use super::types::{ di::DIType, ir::{Function, MDNode, Metadata, Value}, }; -use crate::llvm::{iter::*, types::di::DISubprogram}; +use crate::llvm::{iter::*, types::di::DISubprogram, LLVMContext, LLVMModule}; // KSYM_NAME_LEN from linux kernel intentionally set // to lower value found across kernel versions to ensure // backward compatibility const MAX_KSYM_NAME_LEN: usize = 128; -pub(crate) struct DISanitizer { +pub(crate) struct DISanitizer<'ctx> { context: LLVMContextRef, module: LLVMModuleRef, builder: LLVMDIBuilderRef, visited_nodes: HashSet, replace_operands: HashMap, skipped_types_lossy: Vec, + // TODO: use references of safe wrappers instead of PhantomData + _marker: PhantomData>, } // Sanitize Rust type names to be valid C type names. @@ -55,15 +58,16 @@ fn sanitize_type_name(name: &[u8]) -> Vec { sanitized } -impl DISanitizer { - pub(crate) fn new(context: LLVMContextRef, module: LLVMModuleRef) -> Self { - Self { - context, - module, - builder: unsafe { LLVMCreateDIBuilder(module) }, +impl<'ctx> DISanitizer<'ctx> { + pub(crate) fn new(context: &'ctx LLVMContext, module: &mut LLVMModule<'ctx>) -> Self { + DISanitizer { + context: context.as_mut_ptr(), + module: module.as_mut_ptr(), + builder: unsafe { LLVMCreateDIBuilder(module.as_mut_ptr()) }, visited_nodes: HashSet::new(), replace_operands: HashMap::new(), skipped_types_lossy: Vec::new(), + _marker: PhantomData, } } diff --git a/src/llvm/mod.rs b/src/llvm/mod.rs index 84436853..ee4069c9 100644 --- a/src/llvm/mod.rs +++ b/src/llvm/mod.rs @@ -5,7 +5,7 @@ mod types; use std::{ borrow::Cow, collections::HashSet, - ffi::{c_void, CStr, CString}, + ffi::{CStr, CString}, os::raw::c_char, ptr, slice, str, }; @@ -16,12 +16,10 @@ use llvm_sys::{ bit_reader::LLVMParseBitcodeInContext2, core::{ LLVMCreateMemoryBufferWithMemoryRange, LLVMDisposeMemoryBuffer, LLVMDisposeMessage, - LLVMGetDiagInfoDescription, LLVMGetDiagInfoSeverity, LLVMGetEnumAttributeKindForName, - LLVMGetMDString, LLVMGetModuleInlineAsm, LLVMGetTarget, LLVMGetValueName2, - LLVMModuleCreateWithNameInContext, LLVMPrintModuleToFile, LLVMRemoveEnumAttributeAtIndex, - LLVMSetLinkage, LLVMSetModuleInlineAsm2, LLVMSetVisibility, + LLVMGetEnumAttributeKindForName, LLVMGetMDString, LLVMGetModuleInlineAsm, LLVMGetTarget, + LLVMGetValueName2, LLVMRemoveEnumAttributeAtIndex, LLVMSetLinkage, LLVMSetModuleInlineAsm2, + LLVMSetVisibility, }, - debuginfo::LLVMStripModuleDebugInfo, error::{ LLVMDisposeErrorMessage, LLVMGetErrorMessage, LLVMGetErrorTypeId, LLVMGetStringErrorTypeId, }, @@ -31,23 +29,25 @@ use llvm_sys::{ LLVMGetSectionName, LLVMGetSectionSize, LLVMMoveToNextSection, LLVMObjectFileCopySectionIterator, LLVMObjectFileIsSectionIteratorAtEnd, }, - prelude::{LLVMContextRef, LLVMDiagnosticInfoRef, LLVMModuleRef, LLVMValueRef}, + prelude::{LLVMModuleRef, LLVMValueRef}, support::LLVMParseCommandLineOptions, target::{ LLVMInitializeBPFAsmParser, LLVMInitializeBPFAsmPrinter, LLVMInitializeBPFDisassembler, LLVMInitializeBPFTarget, LLVMInitializeBPFTargetInfo, LLVMInitializeBPFTargetMC, }, - target_machine::{ - LLVMCodeGenFileType, LLVMCodeGenOptLevel, LLVMCodeModel, LLVMCreateTargetMachine, - LLVMGetTargetFromTriple, LLVMRelocMode, LLVMTargetMachineEmitToFile, LLVMTargetMachineRef, - LLVMTargetRef, - }, + target_machine::{LLVMGetTargetFromTriple, LLVMTargetRef}, transforms::pass_builder::{ LLVMCreatePassBuilderOptions, LLVMDisposePassBuilderOptions, LLVMRunPasses, }, LLVMAttributeFunctionIndex, LLVMLinkage, LLVMVisibility, }; use tracing::{debug, error}; +pub(crate) use types::{ + context::{InstalledDiagnosticHandler, LLVMContext}, + memory_buffer::MemoryBuffer, + module::LLVMModule, + target_machine::LLVMTargetMachine, +}; use crate::OptLevel; @@ -65,18 +65,8 @@ pub(crate) fn init(args: &[Cow<'_, CStr>], overview: &CStr) { unsafe { LLVMParseCommandLineOptions(c_ptrs.len() as i32, c_ptrs.as_ptr(), overview.as_ptr()) }; } -pub(crate) fn create_module(name: &CStr, context: LLVMContextRef) -> Option { - let module = unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), context) }; - - if module.is_null() { - return None; - } - - Some(module) -} - pub(crate) fn find_embedded_bitcode( - context: LLVMContextRef, + context: &LLVMContext, data: &[u8], ) -> Result>, String> { let buffer_name = c"mem_buffer"; @@ -90,7 +80,7 @@ pub(crate) fn find_embedded_bitcode( }; let (bin, message) = - Message::with(|message| unsafe { LLVMCreateBinary(buffer, context, message) }); + Message::with(|message| unsafe { LLVMCreateBinary(buffer, context.as_mut_ptr(), message) }); if bin.is_null() { return Err(message.as_string_lossy().to_string()); } @@ -118,9 +108,9 @@ pub(crate) fn find_embedded_bitcode( } #[must_use] -pub(crate) fn link_bitcode_buffer( - context: LLVMContextRef, - module: LLVMModuleRef, +pub(crate) fn link_bitcode_buffer<'ctx>( + context: &'ctx LLVMContext, + module: &mut LLVMModule<'ctx>, buffer: &[u8], ) -> bool { let mut linked = false; @@ -136,8 +126,8 @@ pub(crate) fn link_bitcode_buffer( let mut temp_module = ptr::null_mut(); - if unsafe { LLVMParseBitcodeInContext2(context, buffer, &mut temp_module) } == 0 { - linked = unsafe { LLVMLinkModules2(module, temp_module) } == 0; + if unsafe { LLVMParseBitcodeInContext2(context.as_mut_ptr(), buffer, &mut temp_module) } == 0 { + linked = unsafe { LLVMLinkModules2(module.as_mut_ptr(), temp_module) } == 0; } unsafe { LLVMDisposeMemoryBuffer(buffer) }; @@ -157,54 +147,30 @@ pub(crate) fn target_from_triple(triple: &CStr) -> Result } } -pub(crate) fn target_from_module(module: LLVMModuleRef) -> Result { - let triple = unsafe { LLVMGetTarget(module) }; +pub(crate) fn target_from_module(module: &LLVMModule<'_>) -> Result { + let triple = unsafe { LLVMGetTarget(module.as_mut_ptr()) }; unsafe { target_from_triple(CStr::from_ptr(triple)) } } -pub(crate) fn create_target_machine( - target: LLVMTargetRef, - triple: &CStr, - cpu: &CStr, - features: &CStr, -) -> Option { - let tm = unsafe { - LLVMCreateTargetMachine( - target, - triple.as_ptr(), - cpu.as_ptr(), - features.as_ptr(), - LLVMCodeGenOptLevel::LLVMCodeGenLevelAggressive, - LLVMRelocMode::LLVMRelocDefault, - LLVMCodeModel::LLVMCodeModelDefault, - ) - }; - if tm.is_null() { - None - } else { - Some(tm) - } -} - pub(crate) fn optimize( - tm: LLVMTargetMachineRef, - module: LLVMModuleRef, + tm: &LLVMTargetMachine, + module: &mut LLVMModule<'_>, opt_level: OptLevel, ignore_inline_never: bool, export_symbols: &HashSet>, ) -> Result<(), String> { - if module_asm_is_probestack(module) { - unsafe { LLVMSetModuleInlineAsm2(module, ptr::null_mut(), 0) }; + if module_asm_is_probestack(module.as_mut_ptr()) { + unsafe { LLVMSetModuleInlineAsm2(module.as_mut_ptr(), ptr::null_mut(), 0) }; } - for sym in module.globals_iter() { + for sym in module.as_mut_ptr().globals_iter() { internalize(sym, symbol_name(sym), export_symbols); } - for sym in module.global_aliases_iter() { + for sym in module.as_mut_ptr().global_aliases_iter() { internalize(sym, symbol_name(sym), export_symbols); } - for function in module.functions_iter() { + for function in module.as_mut_ptr().functions_iter() { let name = symbol_name(function); if !name.starts_with(b"llvm.") { if ignore_inline_never { @@ -235,7 +201,14 @@ pub(crate) fn optimize( debug!("running passes: {passes}"); let passes = CString::new(passes).unwrap(); let options = unsafe { LLVMCreatePassBuilderOptions() }; - let error = unsafe { LLVMRunPasses(module, passes.as_ptr(), tm, options) }; + let error = unsafe { + LLVMRunPasses( + module.as_mut_ptr(), + passes.as_ptr(), + tm.as_mut_ptr(), + options, + ) + }; unsafe { LLVMDisposePassBuilderOptions(options) }; // Handle the error and print it to stderr. if !error.is_null() { @@ -253,11 +226,6 @@ pub(crate) fn optimize( Ok(()) } -/// strips debug information, returns true if DI got stripped -pub(crate) fn strip_debug_info(module: LLVMModuleRef) -> bool { - unsafe { LLVMStripModuleDebugInfo(module) != 0 } -} - pub(crate) fn module_asm_is_probestack(module: LLVMModuleRef) -> bool { let mut len = 0; let ptr = unsafe { LLVMGetModuleInlineAsm(module, &mut len) }; @@ -281,32 +249,6 @@ pub(crate) fn remove_attribute(function: *mut llvm_sys::LLVMValue, name: &str) { unsafe { LLVMRemoveEnumAttributeAtIndex(function, LLVMAttributeFunctionIndex, attr_kind) }; } -pub(crate) fn write_ir(module: LLVMModuleRef, output: &CStr) -> Result<(), String> { - let (ret, message) = - Message::with(|message| unsafe { LLVMPrintModuleToFile(module, output.as_ptr(), message) }); - if ret == 0 { - Ok(()) - } else { - Err(message.as_string_lossy().to_string()) - } -} - -pub(crate) fn codegen( - tm: LLVMTargetMachineRef, - module: LLVMModuleRef, - output: &CStr, - output_type: LLVMCodeGenFileType, -) -> Result<(), String> { - let (ret, message) = Message::with(|message| unsafe { - LLVMTargetMachineEmitToFile(tm, module, output.as_ptr().cast_mut(), output_type, message) - }); - if ret == 0 { - Ok(()) - } else { - Err(message.as_string_lossy().to_string()) - } -} - pub(crate) fn internalize( value: LLVMValueRef, name: &[u8], @@ -326,18 +268,6 @@ pub(crate) trait LLVMDiagnosticHandler { ); } -pub(crate) extern "C" fn diagnostic_handler( - info: LLVMDiagnosticInfoRef, - handler: *mut c_void, -) { - let severity = unsafe { LLVMGetDiagInfoSeverity(info) }; - let message = Message { - ptr: unsafe { LLVMGetDiagInfoDescription(info) }, - }; - let handler = handler.cast::(); - unsafe { &mut *handler }.handle_diagnostic(severity, message.as_string_lossy()); -} - pub(crate) extern "C" fn fatal_error(reason: *const c_char) { error!("fatal error: {:?}", unsafe { CStr::from_ptr(reason) }) } diff --git a/src/llvm/types/context.rs b/src/llvm/types/context.rs new file mode 100644 index 00000000..52a38689 --- /dev/null +++ b/src/llvm/types/context.rs @@ -0,0 +1,134 @@ +use std::{ + any::Any, + ffi::{c_void, CStr}, + marker::PhantomData, + pin::Pin, + ptr, + rc::Rc, +}; + +use llvm_sys::{ + core::{ + LLVMContextCreate, LLVMContextDispose, LLVMContextSetDiagnosticHandler, + LLVMGetDiagInfoDescription, LLVMGetDiagInfoSeverity, LLVMModuleCreateWithNameInContext, + }, + prelude::{LLVMContextRef, LLVMDiagnosticInfoRef}, +}; + +use crate::llvm::{types::module::LLVMModule, LLVMDiagnosticHandler, Message}; + +pub(crate) struct LLVMContext { + context: LLVMContextRef, + /// Optional diagnostic handler set for the context. + /// + /// The diagnostic handler pointer must remain valid until either + /// a new handler is installed or the context is disposed. + /// To guarantee this, we keep a strong reference to the handler + /// inside the wrapper. + /// The type of the diagnostic handler is erased to make the + /// context wrapper non generic. + diagnostic_handler: Option, +} + +impl LLVMContext { + pub(crate) fn new() -> Self { + let context = unsafe { LLVMContextCreate() }; + Self { + context, + diagnostic_handler: None, + } + } + + /// Returns an unsafe mutable pointer to the LLVM context. + /// + /// The caller must ensure that the [`LLVMContext`] outlives the pointer this + /// function returns, or else it will end up dangling. + pub(in crate::llvm) const fn as_mut_ptr(&self) -> LLVMContextRef { + self.context + } + + pub(crate) fn create_module<'ctx>(&'ctx self, name: &CStr) -> Option> { + let module = unsafe { LLVMModuleCreateWithNameInContext(name.as_ptr(), self.context) }; + + if module.is_null() { + return None; + } + + Some(LLVMModule { + module, + _marker: PhantomData, + }) + } + + /// Install a context-local diagnostic handler. + pub(crate) fn set_diagnostic_handler(&mut self, handler: T) -> InstalledDiagnosticHandler + where + T: LLVMDiagnosticHandler + 'static, + { + // Heap-allocate and pin the handler so its address is stable + // for the C API + let pinrc = Rc::pin(handler); + + // Get a opaque raw pointer to the new memory stable object + let handler_ptr = ptr::from_ref(Pin::as_ref(&pinrc).get_ref()) as *mut c_void; + + unsafe { + LLVMContextSetDiagnosticHandler( + self.context, + Some(diagnostic_handler::), + handler_ptr, + ) + }; + + // Keep the handler alive for at least as long as the context + // by storing a type-erased pinned clone in the context. This + // guards against the handler being dropped while LLVM still + // holds the callback pointer. + self.diagnostic_handler = Some(StoredHandler { + _handler: pinrc.clone(), + }); + + // Return a typed handle that keeps a strong, pinned reference to `T`. + // + // This lets the caller interact with the installed diagnostic handler + // directly (via `with_view`) without needing to query the context or + // deal with an Option. It also contributes to keeping the handler alive + // for as long as the handle (or the context-held clone) exists. + InstalledDiagnosticHandler { inner: pinrc } + } +} + +impl Drop for LLVMContext { + fn drop(&mut self) { + unsafe { + LLVMContextDispose(self.context); + } + } +} + +struct StoredHandler { + _handler: Pin>, +} + +#[derive(Clone)] +pub(crate) struct InstalledDiagnosticHandler { + inner: Pin>, +} + +impl InstalledDiagnosticHandler { + pub(crate) fn with_view R>(&self, f: F) -> R { + f(Pin::as_ref(&self.inner).get_ref()) + } +} + +extern "C" fn diagnostic_handler( + info: LLVMDiagnosticInfoRef, + handler: *mut c_void, +) { + let severity = unsafe { LLVMGetDiagInfoSeverity(info) }; + let message = Message { + ptr: unsafe { LLVMGetDiagInfoDescription(info) }, + }; + let handler = handler.cast::(); + unsafe { &mut *handler }.handle_diagnostic(severity, message.as_string_lossy()); +} diff --git a/src/llvm/types/memory_buffer.rs b/src/llvm/types/memory_buffer.rs new file mode 100644 index 00000000..a9b150e4 --- /dev/null +++ b/src/llvm/types/memory_buffer.rs @@ -0,0 +1,34 @@ +use core::slice; + +use llvm_sys::{ + core::{LLVMDisposeMemoryBuffer, LLVMGetBufferSize, LLVMGetBufferStart}, + prelude::LLVMMemoryBufferRef, +}; + +pub(crate) struct MemoryBuffer { + pub(super) memory_buffer: LLVMMemoryBufferRef, +} + +impl MemoryBuffer { + /// Gets a byte slice of this `MemoryBuffer`. + pub(crate) fn as_slice(&self) -> &[u8] { + unsafe { + let start = LLVMGetBufferStart(self.memory_buffer); + + slice::from_raw_parts(start.cast(), self.get_size()) + } + } + + /// Gets the byte size of this `MemoryBuffer`. + pub(crate) fn get_size(&self) -> usize { + unsafe { LLVMGetBufferSize(self.memory_buffer) } + } +} + +impl Drop for MemoryBuffer { + fn drop(&mut self) { + unsafe { + LLVMDisposeMemoryBuffer(self.memory_buffer); + } + } +} diff --git a/src/llvm/types/mod.rs b/src/llvm/types/mod.rs index 783868ba..f585bb04 100644 --- a/src/llvm/types/mod.rs +++ b/src/llvm/types/mod.rs @@ -1,2 +1,6 @@ +pub(super) mod context; pub(super) mod di; pub(super) mod ir; +pub(super) mod memory_buffer; +pub(super) mod module; +pub(super) mod target_machine; diff --git a/src/llvm/types/module.rs b/src/llvm/types/module.rs new file mode 100644 index 00000000..fc45317f --- /dev/null +++ b/src/llvm/types/module.rs @@ -0,0 +1,92 @@ +use std::{ffi::CStr, marker::PhantomData}; + +use libc::c_char; +use llvm_sys::{ + bit_writer::LLVMWriteBitcodeToFile, + core::{ + LLVMCreateMemoryBufferWithMemoryRangeCopy, LLVMDisposeMessage, LLVMDisposeModule, + LLVMGetTarget, LLVMPrintModuleToFile, LLVMPrintModuleToString, + }, + debuginfo::LLVMStripModuleDebugInfo, + prelude::LLVMModuleRef, +}; + +use crate::llvm::{types::context::LLVMContext, MemoryBuffer, Message}; + +pub(crate) struct LLVMModule<'ctx> { + pub(super) module: LLVMModuleRef, + pub(super) _marker: PhantomData<&'ctx LLVMContext>, +} + +impl LLVMModule<'_> { + /// Returns an unsafe mutable pointer to the LLVM module. + /// + /// The caller must ensure that the [`LLVMModule`] outlives the pointer this + /// function returns, or else it will end up dangling. + pub(in crate::llvm) const fn as_mut_ptr(&self) -> LLVMModuleRef { + self.module + } + + pub(crate) fn get_target(&self) -> *const c_char { + unsafe { LLVMGetTarget(self.module) } + } + + pub(crate) fn write_bitcode_to_path(&self, path: &CStr) -> Result<(), std::io::Error> { + if unsafe { LLVMWriteBitcodeToFile(self.module, path.as_ptr()) } != 0 { + return Err(std::io::Error::last_os_error()); + } + + Ok(()) + } + + pub(crate) fn write_bitcode_to_memory(&self) -> MemoryBuffer { + let buf = unsafe { llvm_sys::bit_writer::LLVMWriteBitcodeToMemoryBuffer(self.module) }; + + MemoryBuffer { memory_buffer: buf } + } + + pub(crate) fn write_ir_to_path(&self, path: &CStr) -> Result<(), String> { + let (ret, message) = unsafe { + Message::with(|message| LLVMPrintModuleToFile(self.module, path.as_ptr(), message)) + }; + + if ret == 0 { + Ok(()) + } else { + Err(message.as_string_lossy().to_string()) + } + } + + pub(crate) fn write_ir_to_memory(&self) -> MemoryBuffer { + // Format the module to a string, then copy into a MemoryBuffer. We do the extra copy to keep the + // internal API simpler, as all the other codegen methods output a MemoryBuffer. + unsafe { + let ptr = LLVMPrintModuleToString(self.module); + let cstr = CStr::from_ptr(ptr); + let bytes = cstr.to_bytes(); + + let buffer_name = c"mem_buffer"; + + // Copy bytes into a new LLVMMemoryBuffer so we can safely dispose the message. + let memory_buffer = LLVMCreateMemoryBufferWithMemoryRangeCopy( + bytes.as_ptr().cast(), + bytes.len(), + buffer_name.as_ptr(), + ); + LLVMDisposeMessage(ptr); + + MemoryBuffer { memory_buffer } + } + } + + /// strips debug information, returns true if DI got stripped + pub(crate) fn strip_debug_info(&mut self) -> bool { + unsafe { LLVMStripModuleDebugInfo(self.module) != 0 } + } +} + +impl Drop for LLVMModule<'_> { + fn drop(&mut self) { + unsafe { LLVMDisposeModule(self.module) }; + } +} diff --git a/src/llvm/types/target_machine.rs b/src/llvm/types/target_machine.rs new file mode 100644 index 00000000..46871002 --- /dev/null +++ b/src/llvm/types/target_machine.rs @@ -0,0 +1,103 @@ +use std::ffi::CStr; + +use llvm_sys::target_machine::{ + LLVMCodeGenFileType, LLVMCodeGenOptLevel, LLVMCodeModel, LLVMCreateTargetMachine, + LLVMDisposeTargetMachine, LLVMRelocMode, LLVMTargetMachineEmitToFile, + LLVMTargetMachineEmitToMemoryBuffer, LLVMTargetMachineRef, LLVMTargetRef, +}; + +use crate::llvm::{types::module::LLVMModule, MemoryBuffer, Message}; + +pub(crate) struct LLVMTargetMachine { + target_machine: LLVMTargetMachineRef, +} + +impl LLVMTargetMachine { + pub(crate) fn new( + target: LLVMTargetRef, + triple: &CStr, + cpu: &CStr, + features: &CStr, + ) -> Option { + let tm = unsafe { + LLVMCreateTargetMachine( + target, + triple.as_ptr(), + cpu.as_ptr(), + features.as_ptr(), + LLVMCodeGenOptLevel::LLVMCodeGenLevelAggressive, + LLVMRelocMode::LLVMRelocDefault, + LLVMCodeModel::LLVMCodeModelDefault, + ) + }; + if tm.is_null() { + None + } else { + Some(Self { target_machine: tm }) + } + } + + /// Returns an unsafe mutable pointer to the LLVM target machine. + /// + /// The caller must ensure that the [`LLVMTargetMachine`] outlives the pointer this + /// function returns, or else it will end up dangling. + pub(in crate::llvm) const fn as_mut_ptr(&self) -> LLVMTargetMachineRef { + self.target_machine + } + + pub(crate) fn emit_to_file( + &self, + module: &LLVMModule<'_>, + path: &CStr, + output_type: LLVMCodeGenFileType, + ) -> Result<(), String> { + let (ret, message) = unsafe { + Message::with(|message| { + LLVMTargetMachineEmitToFile( + self.target_machine, + module.module, + path.as_ptr(), + output_type, + message, + ) + }) + }; + if ret == 0 { + Ok(()) + } else { + Err(message.as_string_lossy().to_string()) + } + } + + pub(crate) fn emit_to_memory_buffer( + &self, + module: &LLVMModule<'_>, + output_type: LLVMCodeGenFileType, + ) -> Result { + let mut out_buf = std::ptr::null_mut(); + let (ret, message) = Message::with(|message| unsafe { + LLVMTargetMachineEmitToMemoryBuffer( + self.target_machine, + module.module, + output_type, + message, + &mut out_buf, + ) + }); + if ret != 0 { + return Err(message.as_string_lossy().to_string()); + } + + Ok(MemoryBuffer { + memory_buffer: out_buf, + }) + } +} + +impl Drop for LLVMTargetMachine { + fn drop(&mut self) { + unsafe { + LLVMDisposeTargetMachine(self.target_machine); + } + } +}