diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e0f2d3d9..18242e6b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## Unreleased + +**Features** + +- Extract srcsrv data from PDB for file mapping by @mujajica and @loewenheim. + This includes a bump of the SymCache format to version 9. ([#943](https://github.com/getsentry/symbolic/pull/943)) + ## 12.18.3 ### Internal Changes 🔧 diff --git a/symbolic-debuginfo/src/base.rs b/symbolic-debuginfo/src/base.rs index 92ea3a2a2..dcbadb982 100644 --- a/symbolic-debuginfo/src/base.rs +++ b/symbolic-debuginfo/src/base.rs @@ -450,13 +450,34 @@ pub struct FileInfo<'data> { name: Cow<'data, [u8]>, /// Path to the file. dir: Cow<'data, [u8]>, + /// The base name on the source server. + /// + /// This only exists if we have a debug file containing + /// source server information. + srcsrv_name: Option>, + /// The path to the file on the source server. + /// + /// This only exists if we have a debug file containing + /// source server information. + srcsrv_dir: Option>, + /// The optional VCS revision (e.g., Perforce changelist, git commit hash). + /// + /// This only exists if we have a debug file containing + /// source server information. + srcsrv_revision: Option>, } impl<'data> FileInfo<'data> { /// Creates a `FileInfo` with a given directory and the file name. #[cfg(feature = "dwarf")] pub fn new(dir: Cow<'data, [u8]>, name: Cow<'data, [u8]>) -> Self { - FileInfo { name, dir } + FileInfo { + name, + dir, + srcsrv_name: None, + srcsrv_dir: None, + srcsrv_revision: None, + } } /// Creates a `FileInfo` from a joined path by trying to split it. @@ -470,12 +491,14 @@ impl<'data> FileInfo<'data> { Some(dir) => Cow::Borrowed(dir), None => Cow::default(), }, + srcsrv_name: None, + srcsrv_dir: None, + srcsrv_revision: None, } } /// Creates a `FileInfo` from a joined path by trying to split it. /// Unlike from_path(), copies the given data instead of referencing it. - #[cfg(feature = "ppdb")] pub(crate) fn from_path_owned(path: &[u8]) -> Self { let (dir, name) = symbolic_common::split_path_bytes(path); @@ -485,6 +508,9 @@ impl<'data> FileInfo<'data> { Some(dir) => Cow::Owned(dir.to_vec()), None => Cow::default(), }, + srcsrv_name: None, + srcsrv_dir: None, + srcsrv_revision: None, } } @@ -493,6 +519,9 @@ impl<'data> FileInfo<'data> { FileInfo { name: Cow::Borrowed(name), dir: Cow::default(), + srcsrv_name: None, + srcsrv_dir: None, + srcsrv_revision: None, } } @@ -511,6 +540,53 @@ impl<'data> FileInfo<'data> { let joined = join_path(&self.dir_str(), &self.name_str()); clean_path(&joined).into_owned() } + + /// The file name on the source server as UTF-8 string. + /// + /// This only exists if we have a debug file containing + /// source server information. + pub fn srcsrv_name_str(&self) -> Option> { + self.srcsrv_name.as_ref().map(from_utf8_cow_lossy) + } + + /// Path to the file on the source server. + /// + /// This only exists if we have a debug file containing + /// source server information. + pub fn srcsrv_dir_str(&self) -> Option> { + self.srcsrv_dir.as_ref().map(from_utf8_cow_lossy) + } + + /// The full path to the file on the source server. + /// + /// This only exists if we have a debug file containing + /// source server information. + pub fn srcsrv_path_str(&self) -> Option { + let joined = join_path( + &self.srcsrv_dir_str().unwrap_or_default(), + &self.srcsrv_name_str()?, + ); + Some(clean_path(&joined).into_owned()) + } + + /// The optional VCS revision (e.g., Perforce changelist, git commit hash). + /// + /// This only exists if we have a debug file containing + /// source server information. + pub fn srcsrv_revision(&self) -> Option<&str> { + self.srcsrv_revision.as_deref() + } + + pub(crate) fn set_srcsrv_path(&mut self, path: &[u8]) { + let (dir, name) = symbolic_common::split_path_bytes(path); + + self.srcsrv_name = Some(Cow::Owned(name.to_owned())); + self.srcsrv_dir = dir.map(|d| Cow::Owned(d.to_owned())); + } + + pub(crate) fn set_srcsrv_revision(&mut self, revision: Option) { + self.srcsrv_revision = revision.map(Cow::Owned); + } } #[allow(clippy::ptr_arg)] // false positive https://github.com/rust-lang/rust-clippy/issues/9218 @@ -562,6 +638,14 @@ impl<'data> FileEntry<'data> { let joined = join_path(&self.compilation_dir_str(), &joined_path); clean_path(&joined).into_owned() } + + /// The full path to the file on the source server. + /// + /// This only exists if we have a debug file containing + /// source server information. + pub fn srcsrv_path_str(&self) -> Option { + self.info.srcsrv_path_str() + } } impl fmt::Debug for FileEntry<'_> { diff --git a/symbolic-debuginfo/src/pdb.rs b/symbolic-debuginfo/src/pdb/mod.rs similarity index 93% rename from symbolic-debuginfo/src/pdb.rs rename to symbolic-debuginfo/src/pdb/mod.rs index a13c423c6..cb78d4112 100644 --- a/symbolic-debuginfo/src/pdb.rs +++ b/symbolic-debuginfo/src/pdb/mod.rs @@ -16,7 +16,6 @@ use pdb_addr2line::pdb::{ }; use pdb_addr2line::ModuleProvider; use smallvec::SmallVec; -use srcsrv; use thiserror::Error; use symbolic_common::{ @@ -25,8 +24,11 @@ use symbolic_common::{ use crate::base::*; use crate::function_stack::FunctionStack; +use crate::pdb::srcsrv::{SourceServerInfo, SourceServerMappings}; use crate::sourcebundle::SourceFileDescriptor; +mod srcsrv; + type Pdb<'data> = pdb::PDB<'data, Cursor<&'data [u8]>>; const MAGIC_BIG: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00"; @@ -201,6 +203,19 @@ impl<'data> PdbObject<'data> { .unwrap_or_default() } + /// Returns true if this object contains source server information. + pub fn has_source_server_data(&self) -> Result { + let mut pdb = self.pdb.write(); + match pdb.named_stream(b"srcsrv") { + Ok(_) => Ok(true), + Err(pdb::Error::StreamNameNotFound) => { + // No source server info is normal for many PDBs + Ok(false) + } + Err(e) => Err(e.into()), + } + } + /// The kind of this object, which is always `Debug`. pub fn kind(&self) -> ObjectKind { ObjectKind::Debug @@ -252,31 +267,6 @@ impl<'data> PdbObject<'data> { false } - /// Returns the SRCSRV VCS integration name if available. - /// - /// This extracts the version control system identifier from the SRCSRV stream, - /// if present. Common values include "perforce", "tfs", "git", etc. - /// Returns `None` if no SRCSRV stream exists or if it cannot be parsed. - pub fn srcsrv_vcs_name(&self) -> Option { - let mut pdb = self.pdb.write(); - - // Try to open the "srcsrv" named stream - let stream = match pdb.named_stream(b"srcsrv") { - Ok(stream) => stream, - Err(_) => return None, - }; - - // Parse the stream to extract VCS name - let stream_data = stream.as_slice(); - if let Ok(parsed_stream) = srcsrv::SrcSrvStream::parse(stream_data) { - parsed_stream - .version_control_description() - .map(|s| s.to_string()) - } else { - None - } - } - /// Determines whether this object is malformed and was only partially parsed pub fn is_malformed(&self) -> bool { false @@ -500,6 +490,7 @@ struct PdbStreams<'d> { type_info: pdb::TypeInformation<'d>, id_info: pdb::IdInformation<'d>, string_table: Option>, + srcsrv: Option>, pdb: Arc>>, @@ -523,11 +514,22 @@ impl<'d> PdbStreams<'d> { Err(e) => return Err(e.into()), }; + // Try to open the "srcsrv" named stream + let srcsrv = match p.named_stream(b"srcsrv") { + Ok(stream) => Some(stream.as_slice().to_vec()), + Err(pdb::Error::StreamNameNotFound) => { + // No source server info is normal for many PDBs + None + } + Err(e) => return Err(e.into()), + }; + Ok(Self { string_table, debug_info: pdb.debug_info.clone(), type_info: p.type_information()?, id_info: p.id_information()?, + srcsrv, pdb: pdb.pdb.clone(), module_infos: FrozenMap::new(), }) @@ -561,6 +563,7 @@ struct PdbDebugInfo<'d> { string_table: Option<&'d pdb::StringTable<'d>>, /// Type formatter for function name strings. type_formatter: pdb_addr2line::TypeFormatter<'d, 'd>, + srcsrv: Option>, } impl<'d> PdbDebugInfo<'d> { @@ -574,10 +577,20 @@ impl<'d> PdbDebugInfo<'d> { drop(p); + let srcsrv = streams + .srcsrv + .as_deref() + // We don't want to exit on error here so we can still use the PDB + // file even if we fail to parse the source server part. + // TODO: It would be nice to surface this error to users, if and + // when we add logging to this crate. + .and_then(|stream| SourceServerMappings::parse(stream).ok()); + Ok(PdbDebugInfo { address_map, streams, string_table: streams.string_table.as_ref(), + srcsrv, type_formatter: pdb_addr2line::TypeFormatter::new_from_parts( streams, modules, @@ -638,6 +651,7 @@ pub struct PdbDebugSession<'d> { impl<'d> PdbDebugSession<'d> { fn build(pdb: &PdbObject<'d>) -> Result { let streams = PdbStreams::from_pdb(pdb)?; + let cell = SelfCell::try_new(Box::new(streams), |streams| { PdbDebugInfo::build(pdb, unsafe { &*streams }) })?; @@ -671,6 +685,19 @@ impl<'d> PdbDebugSession<'d> { ) -> Result>, PdbError> { Ok(None) } + + /// Returns the SRCSRV VCS integration name if available. + /// + /// This extracts the version control system identifier from the SRCSRV stream, + /// if present. Common values include "perforce", "tfs", "git", etc. + /// Returns `None` if no SRCSRV stream exists or if it cannot be parsed. + pub fn srcsrv_vcs_name(&self) -> Option { + self.cell + .get() + .srcsrv + .as_ref() + .map(|srcsrv| srcsrv.vcs_name().to_owned()) + } } impl<'session> DebugSession<'session> for PdbDebugSession<'_> { @@ -735,11 +762,22 @@ impl<'s> Unit<'s> { } let file_info = program.get_file_info(line_info.file_index)?; + let mut file = self.debug_info.file_info(file_info)?; + + // Fill in source server information if available + if let Some(mappings) = self.debug_info.srcsrv.as_ref() { + let original_path = file.path_str(); + let info = mappings.get_info(&original_path); + if let Some(SourceServerInfo { path, revision }) = info { + file.set_srcsrv_path(path.as_bytes()); + file.set_srcsrv_revision(revision); + } + } lines.push(LineInfo { address: rva, size, - file: self.debug_info.file_info(file_info)?, + file, line: line_info.line_start.into(), }); } @@ -1062,7 +1100,19 @@ impl<'s> Iterator for PdbFileIterator<'s> { let result = file_result .map_err(|err| err.into()) .and_then(|i| self.debug_info.file_info(i)) - .map(|info| FileEntry::new(Cow::default(), info)); + .map(|mut file| { + // Fill in source server information if available + if let Some(mappings) = &self.debug_info.srcsrv { + let original_path = file.path_str(); + let info = mappings.get_info(&original_path); + if let Some(SourceServerInfo { path, revision }) = info { + file.set_srcsrv_path(path.as_bytes()); + file.set_srcsrv_revision(revision); + } + } + + FileEntry::new(Cow::default(), file) + }); return Some(result); } diff --git a/symbolic-debuginfo/src/pdb/srcsrv.rs b/symbolic-debuginfo/src/pdb/srcsrv.rs new file mode 100644 index 000000000..ef7cadcca --- /dev/null +++ b/symbolic-debuginfo/src/pdb/srcsrv.rs @@ -0,0 +1,177 @@ +//! Support for source server information in PDB files. + +use std::cell::RefCell; +use std::collections::HashMap; +use std::convert::Infallible; +use std::str::FromStr; + +use crate::pdb::{PdbError, PdbErrorKind}; + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum SourceServerError { + /// The srcsrv stream doesn't contain a VCS name. + #[error("missing VCS name in srcsrv stream")] + MissingSourceServerVcs, +} + +/// Source server information for a particular path. +#[derive(Debug, Clone)] +pub struct SourceServerInfo { + /// The file's path on the source server. + pub path: String, + /// The file's revision. + pub revision: Option, +} + +/// VCS schemas for which symbolic can +/// process source server information. +#[derive(Debug, Clone)] +enum SourceServerVcs { + /// Perforce. + /// + /// For perforce, we require the following layout: + /// * `var3` contains the depot path + /// * `var4` contains the changelist + Perforce, + /// Any other VCS. + Unknown(String), +} + +impl SourceServerVcs { + /// The VCS's name. + pub fn name(&self) -> &str { + match self { + SourceServerVcs::Perforce => "Perforce", + SourceServerVcs::Unknown(s) => s, + } + } +} + +impl FromStr for SourceServerVcs { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + if s.eq_ignore_ascii_case("perforce") { + Ok(Self::Perforce) + } else { + Ok(Self::Unknown(s.to_owned())) + } + } +} + +/// A parsed source server stream that can be used to look up +/// a file's revision and path on the source server. +pub struct SourceServerMappings<'s> { + /// The VCS schema in the stream. + vcs: SourceServerVcs, + /// A cache for already computed [`SourceServerInfos`](SourceServerInfo). + cache: RefCell>>, + /// The underlying `SrcSrvStream`. + stream: srcsrv::SrcSrvStream<'s>, +} + +impl<'s> SourceServerMappings<'s> { + /// Attempt to parse a slice of bytes into [`SourceServerMappings`]. + /// + /// This can fail if the `srcsrv_stream` is malformed or lacks a + /// `"verctrl"` field. + pub fn parse(srcsrv_stream: &'s [u8]) -> Result { + let stream = srcsrv::SrcSrvStream::parse(srcsrv_stream) + .map_err(|e| PdbError::new(PdbErrorKind::BadObject, e))?; + let vcs = stream + .version_control_description() + .ok_or(PdbError::new( + PdbErrorKind::BadObject, + SourceServerError::MissingSourceServerVcs, + ))? + .parse() + .unwrap_or_else(|e| match e {}); + + Ok(Self { + vcs, + cache: Default::default(), + stream, + }) + } + + /// Freshly compute source server information for a path from the underlying + /// [`SrcSrvStream`](srcsrv::SrcSrvStream). + /// + /// The computation method depends on the `vcs`. + fn compute_info( + vcs: &SourceServerVcs, + stream: &srcsrv::SrcSrvStream, + path: &str, + ) -> Option { + let Ok(Some((_method, var_map))) = stream.source_and_raw_var_values_for_path(path, "") + else { + return None; + }; + + match vcs { + // Extracts depot path (var3) and changelist (var4). + SourceServerVcs::Perforce => { + let depot_path = var_map.get("var3")?; + let changelist = var_map.get("var4"); + + // Strip leading // from depot path for code mapping compatibility + let depot = depot_path.trim_start_matches("//"); + let revision = changelist.filter(|cl| !cl.is_empty()); + + Some(SourceServerInfo { + path: depot.to_owned(), + revision: revision.map(|s| s.to_owned()), + }) + } + SourceServerVcs::Unknown(_) => None, + } + } + + /// Returns source server information for the given path. + /// + /// This caches the information internally. + pub fn get_info(&self, path: &str) -> Option { + self.cache + .borrow_mut() + .entry(path.to_owned()) + .or_insert_with(|| Self::compute_info(&self.vcs, &self.stream, path)) + .clone() + } + + /// Returns the name of the VCS in this stream. + pub fn vcs_name(&self) -> &str { + self.vcs.name() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_perforce_mapping() { + let stream = br#"SRCSRV: ini ------------------------------------------------ +VERSION=1 +VERCTRL=Perforce +SRCSRV: variables ------------------------------------------ +SRCSRVTRG=%targ%\%var2%\%fnbksl%(%var3%) +SRCSRVCMD= +SRCSRV: source files --------------------------------------- +c:\projects\breakpad-tools\deps\breakpad\src\client\windows\crash_generation\crash_generation_client.cc*P4_SERVER*depot/breakpad/src/client/windows/crash_generation/crash_generation_client.cc*12345 +c:\projects\breakpad-tools\deps\breakpad\src\common\scoped_ptr.h*P4_SERVER*depot/breakpad/src/common/scoped_ptr.h*12346 +c:\projects\breakpad-tools\deps\breakpad\src\common\windows\string_utils-inl.h*P4_SERVER*depot/breakpad/src/common/windows/string_utils-inl.h*12347 +c:\program files (x86)\microsoft visual studio\2017\community\vc\tools\msvc\14.13.26128\include\system_error*P4_SERVER*depot/msvc/2017/include/system_error*67890 +SRCSRV: end ------------------------------------------------"#; + + let mappings = SourceServerMappings::parse(stream).unwrap(); + let info = mappings.get_info(r"c:\projects\breakpad-tools\deps\breakpad\src\client\windows\crash_generation\crash_generation_client.cc").unwrap(); + + assert_eq!( + info.path, + "depot/breakpad/src/client/windows/crash_generation/crash_generation_client.cc" + ); + assert_eq!(info.revision.as_deref(), Some("12345")); + } +} diff --git a/symbolic-debuginfo/tests/test_pdb_srcsrv.rs b/symbolic-debuginfo/tests/test_pdb_srcsrv.rs index a5aae549b..9656bb781 100644 --- a/symbolic-debuginfo/tests/test_pdb_srcsrv.rs +++ b/symbolic-debuginfo/tests/test_pdb_srcsrv.rs @@ -1,29 +1,84 @@ +//! Tests for PDB SRCSRV functionality +//! +//! These tests verify that source server (SRCSRV) information embedded in PDB files +//! is properly parsed and used to fill in file information. This is commonly used in game +//! development where builds happen on different machines with Perforce. + use symbolic_common::ByteView; use symbolic_debuginfo::pdb::PdbObject; +use symbolic_debuginfo::Object; use symbolic_testutils::fixture; -type Error = Box; - #[test] -fn test_pdb_srcsrv_vcs_name() -> Result<(), Error> { - let view = ByteView::open(fixture("windows/crash_with_srcsrv.pdb"))?; - let pdb = PdbObject::parse(&view)?; +fn test_pdb_srcsrv_vcs_name() { + let view = ByteView::open(fixture("windows/crash_with_srcsrv.pdb")).unwrap(); + let pdb = PdbObject::parse(&view).unwrap(); // This PDB file contains Perforce SRCSRV information - let vcs_name = pdb.srcsrv_vcs_name(); + let vcs_name = pdb.debug_session().unwrap().srcsrv_vcs_name(); assert_eq!(vcs_name, Some("Perforce".to_string())); +} + +#[test] +fn test_pdb_file_info() { + let view = ByteView::open(fixture("windows/crash_with_srcsrv.pdb")).unwrap(); + let object = Object::parse(&view).unwrap(); + + let session = object.debug_session().unwrap(); + + let file = session + .files() + .map(|file| file.unwrap()) + .find(|file| { + // Expected specific path based on the SRCSRV data in the test PDB: + // c:\projects\breakpad-tools\deps\breakpad\src\client\windows\crash_generation\crash_generation_client.cc + // -> depot/breakpad/src/client/windows/crash_generation/crash_generation_client.cc (path) + // -> 12345 (revision) + file.srcsrv_path_str().as_deref() + == Some( + "depot/breakpad/src/client/windows/crash_generation/crash_generation_client.cc", + ) + }) + .unwrap(); + + assert_eq!(file.srcsrv_revision(), Some("12345")); +} + +#[test] +fn test_pdb_line_info() { + let view = ByteView::open(fixture("windows/crash_with_srcsrv.pdb")).unwrap(); + let object = Object::parse(&view).unwrap(); + + let session = object.debug_session().unwrap(); + + let line = session + .functions() + .map(|function| function.unwrap()) + .flat_map(|function| function.lines.into_iter()) + .find(|line| { + // Expected specific path based on the SRCSRV data in the test PDB + // Path should NOT contain @12345, revision should be separate + line.file.srcsrv_path_str().as_deref() + == Some( + "depot/breakpad/src/client/windows/crash_generation/crash_generation_client.cc", + ) + }) + .unwrap(); - Ok(()) + assert_eq!(line.file.srcsrv_revision(), Some("12345"),); } #[test] -fn test_pdb_without_srcsrv() -> Result<(), Error> { - let view = ByteView::open(fixture("windows/crash.pdb"))?; - let pdb = PdbObject::parse(&view)?; +fn test_pdb_without_srcsrv() { + let view = ByteView::open(fixture("windows/crash.pdb")).unwrap(); + let pdb = PdbObject::parse(&view).unwrap(); - // This PDB file does not contain SRCSRV information - let vcs_name = pdb.srcsrv_vcs_name(); + let vcs_name = pdb.debug_session().unwrap().srcsrv_vcs_name(); assert_eq!(vcs_name, None); - Ok(()) + let srcsrv_data = pdb.has_source_server_data().unwrap(); + assert!( + !srcsrv_data, + "Regular PDB without SRCSRV should return None" + ); } diff --git a/symbolic-symcache/src/lib.rs b/symbolic-symcache/src/lib.rs index b9c625c8e..e89ee3693 100644 --- a/symbolic-symcache/src/lib.rs +++ b/symbolic-symcache/src/lib.rs @@ -5,7 +5,7 @@ //! //! # Structure of a SymCache //! -//! A SymCache (version 8) contains the following primary kinds of data, written in the following +//! A SymCache (version 9) contains the following primary kinds of data, written in the following //! order: //! //! 1. Files @@ -25,6 +25,8 @@ //! ## Files //! //! A file contains string offsets for its file name, parent directory, and compilation directory. +//! In version 9+, files also contain optional string offsets for the name and directory on the +//! source server as well as the file revision.. //! //! ## Functions //! @@ -106,6 +108,7 @@ mod lookup; mod raw; pub mod transform; mod v7; +mod v9; mod writer; use symbolic_common::Arch; @@ -117,7 +120,8 @@ pub use error::{Error, ErrorKind}; pub use lookup::*; pub use writer::SymCacheConverter; -use crate::v7::{SymCacheV7, SymCacheV7Inner, SymCacheV8}; +use crate::v7::{SymCacheV7, SymCacheV8}; +use crate::v9::SymCacheV9; type Result = std::result::Result; @@ -133,7 +137,8 @@ type Result = std::result::Result; /// 6: PR #319: Correct line offsets and spacer line records /// 7: PR #459: A new binary format fundamentally based on addr ranges /// 8: PR #670: Use LEB128-prefixed string table -pub const SYMCACHE_VERSION: u32 = 8; +/// 9: PR #943: Add fields to File structure for source server and revision tracking +pub const SYMCACHE_VERSION: u32 = 9; /// The serialized SymCache binary format. /// @@ -150,6 +155,7 @@ impl std::fmt::Debug for SymCache<'_> { match self.inner { SymCacheInner::V7(ref sym_cache_v7) => sym_cache_v7.fmt(f), SymCacheInner::V8(ref sym_cache_v8) => sym_cache_v8.fmt(f), + SymCacheInner::V9(ref sym_cache_v9) => sym_cache_v9.fmt(f), } } } @@ -168,8 +174,9 @@ impl<'data> SymCache<'data> { } let inner = match version.version { - 7 => SymCacheInner::V7(SymCacheV7Inner::parse(rest)?), - 8 => SymCacheInner::V8(SymCacheV7Inner::parse(rest)?), + 7 => SymCacheInner::V7(SymCacheV7::parse(rest)?), + 8 => SymCacheInner::V8(SymCacheV8::parse(rest)?), + 9 => SymCacheInner::V9(SymCacheV9::parse(rest)?), _ => return Err(ErrorKind::WrongVersion.into()), }; @@ -191,16 +198,16 @@ impl<'data> SymCache<'data> { match &self.inner { SymCacheInner::V7(cache) => Arch::from_u32(cache.header.arch), SymCacheInner::V8(cache) => Arch::from_u32(cache.header.arch), + SymCacheInner::V9(cache) => Arch::from_u32(cache.header.arch), } } /// The debug identifier of the cache file. pub fn debug_id(&self) -> DebugId { - { - match &self.inner { - SymCacheInner::V7(cache) => cache.header.debug_id, - SymCacheInner::V8(cache) => cache.header.debug_id, - } + match &self.inner { + SymCacheInner::V7(cache) => cache.header.debug_id, + SymCacheInner::V8(cache) => cache.header.debug_id, + SymCacheInner::V9(cache) => cache.header.debug_id, } } } @@ -217,4 +224,5 @@ impl<'slf, 'd: 'slf> AsSelf<'slf> for SymCache<'d> { enum SymCacheInner<'data> { V7(SymCacheV7<'data>), V8(SymCacheV8<'data>), + V9(SymCacheV9<'data>), } diff --git a/symbolic-symcache/src/lookup.rs b/symbolic-symcache/src/lookup.rs index 1370f7b26..558ae3110 100644 --- a/symbolic-symcache/src/lookup.rs +++ b/symbolic-symcache/src/lookup.rs @@ -6,6 +6,7 @@ use crate::v7::lookup::{ FilesV7, FilesV8, FunctionsV7, FunctionsV8, SourceLocationV7, SourceLocationV8, SourceLocationsV7, SourceLocationsV8, }; +use crate::v9::lookup::{FilesV9, FunctionsV9, SourceLocationV9, SourceLocationsV9}; use crate::SymCacheInner; use super::SymCache; @@ -17,6 +18,7 @@ impl<'data> SymCache<'data> { match self.inner { SymCacheInner::V7(ref cache) => cache.lookup(addr).into(), SymCacheInner::V8(ref cache) => cache.lookup(addr).into(), + SymCacheInner::V9(ref cache) => cache.lookup(addr).into(), } } @@ -30,6 +32,7 @@ impl<'data> SymCache<'data> { match self.inner { SymCacheInner::V7(ref cache) => cache.functions().into(), SymCacheInner::V8(ref cache) => cache.functions().into(), + SymCacheInner::V9(ref cache) => cache.functions().into(), } } @@ -41,6 +44,7 @@ impl<'data> SymCache<'data> { match self.inner { SymCacheInner::V7(ref cache) => cache.files().into(), SymCacheInner::V8(ref cache) => cache.files().into(), + SymCacheInner::V9(ref cache) => cache.files().into(), } } } @@ -54,6 +58,21 @@ pub struct File<'data> { pub(crate) directory: Option<&'data str>, /// The file path. pub(crate) name: &'data str, + /// The base name on the source server (version 9+). + /// + /// This only exists if the symcache was created from a debug file containing + /// source server information. + pub(crate) srcsrv_name: Option<&'data str>, + /// The path to the file on the source server (version 9+). + /// + /// This only exists if the symcache was created from a debug file containing + /// source server information. + pub(crate) srcsrv_dir: Option<&'data str>, + /// The optional VCS revision (e.g., Perforce changelist, git commit hash) (version 9+). + /// + /// This only exists if the symcache was created from a debug file containing + /// source server information. + pub(crate) srcsrv_revision: Option<&'data str>, } impl File<'_> { @@ -68,6 +87,24 @@ impl File<'_> { full_path } + + /// Returns this file's full path on the source server (version 9+). + /// + /// This only exists if the symcache was created from a debug file containing + /// source server information. + pub fn full_srcsrv_path(&self) -> Option { + let path = + symbolic_common::join_path(self.srcsrv_dir.unwrap_or_default(), self.srcsrv_name?); + Some(symbolic_common::clean_path(&path).into_owned()) + } + + /// Returns the VCS revision of this file, if available (version 9+). + /// + /// This only exists if the symcache was created from a debug file containing + /// source server information. + pub fn srcsrv_revision(&self) -> Option<&str> { + self.srcsrv_revision + } } /// A Function definition as included in the SymCache. @@ -114,6 +151,7 @@ impl Default for Function<'_> { enum SourceLocationInner<'data, 'cache> { V7(SourceLocationV7<'data, 'cache>), V8(SourceLocationV8<'data, 'cache>), + V9(SourceLocationV9<'data, 'cache>), } /// A source location as included in the SymCache. @@ -131,6 +169,7 @@ impl<'data> SourceLocation<'data, '_> { match self.0 { SourceLocationInner::V7(ref loc) => loc.line(), SourceLocationInner::V8(ref loc) => loc.line(), + SourceLocationInner::V9(ref loc) => loc.line(), } } @@ -139,6 +178,7 @@ impl<'data> SourceLocation<'data, '_> { match self.0 { SourceLocationInner::V7(ref loc) => loc.file(), SourceLocationInner::V8(ref loc) => loc.file(), + SourceLocationInner::V9(ref loc) => loc.file(), } } @@ -147,6 +187,7 @@ impl<'data> SourceLocation<'data, '_> { match self.0 { SourceLocationInner::V7(ref loc) => loc.function(), SourceLocationInner::V8(ref loc) => loc.function(), + SourceLocationInner::V9(ref loc) => loc.function(), } } @@ -166,10 +207,17 @@ impl<'data, 'cache> From> for SourceLocation<'da } } +impl<'data, 'cache> From> for SourceLocation<'data, 'cache> { + fn from(value: SourceLocationV9<'data, 'cache>) -> Self { + Self(SourceLocationInner::V9(value)) + } +} + #[derive(Debug, Clone)] enum SourceLocationsInner<'data, 'cache> { V7(SourceLocationsV7<'data, 'cache>), V8(SourceLocationsV8<'data, 'cache>), + V9(SourceLocationsV9<'data, 'cache>), } /// An Iterator that yields [`SourceLocation`]s, representing an inlining hierarchy. @@ -187,6 +235,9 @@ impl<'data, 'cache> Iterator for SourceLocations<'data, 'cache> { SourceLocationsInner::V8(ref mut locations) => { locations.next().map(SourceLocation::from) } + SourceLocationsInner::V9(ref mut locations) => { + locations.next().map(SourceLocation::from) + } } } } @@ -203,10 +254,17 @@ impl<'data, 'cache> From> for SourceLocations<' } } +impl<'data, 'cache> From> for SourceLocations<'data, 'cache> { + fn from(value: SourceLocationsV9<'data, 'cache>) -> Self { + Self(SourceLocationsInner::V9(value)) + } +} + #[derive(Debug, Clone)] enum FunctionsInner<'data> { V7(FunctionsV7<'data>), V8(FunctionsV8<'data>), + V9(FunctionsV9<'data>), } /// Iterator returned by [`SymCache::functions`]; see documentation there. @@ -220,6 +278,7 @@ impl<'data> Iterator for Functions<'data> { match self.0 { FunctionsInner::V7(ref mut functions) => functions.next(), FunctionsInner::V8(ref mut functions) => functions.next(), + FunctionsInner::V9(ref mut functions) => functions.next(), } } } @@ -236,6 +295,12 @@ impl<'data> From> for Functions<'data> { } } +impl<'data> From> for Functions<'data> { + fn from(value: FunctionsV9<'data>) -> Self { + Self(FunctionsInner::V9(value)) + } +} + /// A helper struct for printing the functions contained in a symcache. /// /// This struct's `Debug` impl prints the entry pcs and names of the @@ -260,6 +325,7 @@ impl fmt::Debug for FunctionsDebug<'_> { enum FilesInner<'data> { V7(FilesV7<'data>), V8(FilesV8<'data>), + V9(FilesV9<'data>), } /// Iterator returned by [`SymCache::files`]; see documentation there. @@ -273,6 +339,7 @@ impl<'data> Iterator for Files<'data> { match self.0 { FilesInner::V7(ref mut files) => files.next(), FilesInner::V8(ref mut files) => files.next(), + FilesInner::V9(ref mut files) => files.next(), } } } @@ -289,6 +356,12 @@ impl<'data> From> for Files<'data> { } } +impl<'data> From> for Files<'data> { + fn from(value: FilesV9<'data>) -> Self { + Self(FilesInner::V9(value)) + } +} + /// A helper struct for printing the files contained in a symcache. /// /// This struct's `Debug` impl prints the full paths of the diff --git a/symbolic-symcache/src/raw/mod.rs b/symbolic-symcache/src/raw/mod.rs index f64e3a176..66f26ac5e 100644 --- a/symbolic-symcache/src/raw/mod.rs +++ b/symbolic-symcache/src/raw/mod.rs @@ -2,6 +2,7 @@ //! pub(crate) mod v7; +pub(crate) mod v9; use watto::Pod; diff --git a/symbolic-symcache/src/raw/v9.rs b/symbolic-symcache/src/raw/v9.rs new file mode 100644 index 000000000..fc68f2425 --- /dev/null +++ b/symbolic-symcache/src/raw/v9.rs @@ -0,0 +1,158 @@ +//! The raw SymCache V9 binary file format internals. +//! +//! V9 adds source server information support to the File structure. +use watto::Pod; + +use symbolic_common::DebugId; + +/// This [`SourceLocation`] is a sentinel value that says that no source location is present here. +/// This is used to push an "end" range that does not resolve to a valid source location. +/// Otherwise, the ranges would implicitly extend to infinity. +pub(crate) const NO_SOURCE_LOCATION: SourceLocation = SourceLocation { + file_idx: u32::MAX, + line: u32::MAX, + function_idx: u32::MAX, + inlined_into_idx: u32::MAX, +}; + +/// The header of a symcache file. +#[derive(Debug, Clone, PartialEq, Eq)] +#[repr(C)] +pub(crate) struct Header { + /// Debug identifier of the object file. + pub(crate) debug_id: DebugId, + /// CPU architecture of the object file. + /// + /// This cannot be [`symbolic_common::Arch`] because + /// not every bit pattern is valid for that type. + pub(crate) arch: u32, + /// Number of included [`File`]s. + pub(crate) num_files: u32, + /// Number of included [`Function`]s. + pub(crate) num_functions: u32, + /// Number of included [`SourceLocation`]s. + pub(crate) num_source_locations: u32, + /// Number of included [`Range`]s. + pub(crate) num_ranges: u32, + /// Total number of bytes used for string data. + pub(crate) string_bytes: u32, + + /// Some reserved space in the header for future extensions that would not require a + /// completely new parsing method. + pub(crate) _reserved: [u8; 16], +} + +/// Serialized Function metadata in the SymCache. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +#[repr(C)] +pub(crate) struct Function { + /// The functions name (reference to a [`String`]). + pub(crate) name_offset: u32, + /// The compilation directory (reference to a [`String`]). + /// + /// This is retained for binary compatibility; all path information + /// is contained in [`File`]. + pub(crate) _comp_dir_offset: u32, + /// The first address covered by this function. + pub(crate) entry_pc: u32, + /// The language of the function. + pub(crate) lang: u32, +} + +/// Serialized File in the SymCache (V9 format with revision support). +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +#[repr(C)] +pub(crate) struct File { + /// The optional compilation directory prefix (reference to a [`String`]). + pub(crate) comp_dir_offset: u32, + /// The optional directory prefix (reference to a [`String`]). + pub(crate) directory_offset: u32, + /// The file path (reference to a [`String`]). + pub(crate) name_offset: u32, + /// The optional base name on the source server (reference to a [`String`]). + /// + /// This field was added in version 9. + pub(crate) srcsrv_name_offset: u32, + /// The optional path to the file on the source server (reference to a [`String`]). + /// + /// This field was added in version 9. + pub(crate) srcsrv_dir_offset: u32, + /// The optional VCS revision (reference to a [`String`]). + /// + /// This field was added in version 9. + pub(crate) srcsrv_revision_offset: u32, +} + +/// A location in a source file, comprising a file, a line, a function, and +/// the index of the source location this was inlined into, if any. +/// +/// Note that each time a function is inlined, as well as the non-inlined +/// version of the function, is represented by a distinct `SourceLocation`. +/// These `SourceLocation`s will all point to the same file, line, and function, +/// but have different inline information. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[repr(C)] +pub(crate) struct SourceLocation { + /// The optional source file (reference to a [`File`]). + pub(crate) file_idx: u32, + /// The line number. + pub(crate) line: u32, + /// The function (reference to a [`Function`]). + pub(crate) function_idx: u32, + /// The caller source location in case this location was inlined + /// (reference to another [`SourceLocation`]). + pub(crate) inlined_into_idx: u32, +} + +/// A representation of a code range in the SymCache. +/// +/// We only save the start address, the end is implicitly given +/// by the next range's start. +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +#[repr(C)] +pub(crate) struct Range(pub(crate) u32); + +unsafe impl Pod for Header {} +unsafe impl Pod for Function {} +unsafe impl Pod for File {} +unsafe impl Pod for SourceLocation {} +unsafe impl Pod for Range {} + +#[cfg(test)] +mod tests { + use std::mem; + + use super::*; + + #[test] + fn test_sizeof() { + assert_eq!(mem::size_of::
(), 72); + assert_eq!(mem::align_of::
(), 4); + + assert_eq!(mem::size_of::(), 16); + assert_eq!(mem::align_of::(), 4); + + // File (version 9 format): 24 bytes with srcsrv name + srcsrv dir + revision + assert_eq!(mem::size_of::(), 24); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 16); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 4); + assert_eq!(mem::align_of::(), 4); + } + + #[test] + fn test_header_arch_from_bytes() { + // Create an array of `u32`s and later transmute it to a slice of `u8`. + // This is to get a `u8` slice which is 4-byte aligned. + let mut nums = [0u32; size_of::
() / 4]; + // There are 32B, or 8 `u32`, before `arch` in the header. + nums[8] = 17; + let ptr = &raw const nums as *const u8; + let bytes: &[u8] = unsafe { std::slice::from_raw_parts(ptr, size_of::
()) }; + + let _arch = Header::ref_from_bytes(bytes).unwrap().arch; + } +} diff --git a/symbolic-symcache/src/transform/bcsymbolmap.rs b/symbolic-symcache/src/transform/bcsymbolmap.rs index d554100d2..0fdb522c6 100644 --- a/symbolic-symcache/src/transform/bcsymbolmap.rs +++ b/symbolic-symcache/src/transform/bcsymbolmap.rs @@ -27,6 +27,7 @@ impl Transformer for BcSymbolMap<'_> { name: resolve_cow(self, sl.file.name), directory: sl.file.directory.map(|dir| resolve_cow(self, dir)), comp_dir: sl.file.comp_dir.map(|dir| resolve_cow(self, dir)), + ..sl.file }, line: sl.line, } diff --git a/symbolic-symcache/src/transform/mod.rs b/symbolic-symcache/src/transform/mod.rs index 453a87ec5..a641c7b38 100644 --- a/symbolic-symcache/src/transform/mod.rs +++ b/symbolic-symcache/src/transform/mod.rs @@ -25,6 +25,12 @@ pub struct File<'s> { pub directory: Option>, /// The optional compilation directory prefix. pub comp_dir: Option>, + /// The opitonal base name on the source server (version 9+). + pub srcsrv_name: Option>, + /// The optional path to the file on the source server (version 9+). + pub srcsrv_dir: Option>, + /// The optional VCS revision (e.g., Perforce changelist, git commit hash) (v9+). + pub srcsrv_revision: Option>, } /// A Source Location (File + Line) to be written to the SymCache. diff --git a/symbolic-symcache/src/v7/lookup.rs b/symbolic-symcache/src/v7/lookup.rs index 5c48910af..b9d964a0f 100644 --- a/symbolic-symcache/src/v7/lookup.rs +++ b/symbolic-symcache/src/v7/lookup.rs @@ -43,6 +43,9 @@ impl<'data, Flavor: SymCacheV7Flavor> SymCacheV7Inner<'data, Flavor> { comp_dir: self.get_string(raw_file.comp_dir_offset), directory: self.get_string(raw_file.directory_offset), name: self.get_string(raw_file.name_offset).unwrap_or_default(), + srcsrv_name: None, + srcsrv_dir: None, + srcsrv_revision: None, }) } diff --git a/symbolic-symcache/src/v9/lookup.rs b/symbolic-symcache/src/v9/lookup.rs new file mode 100644 index 000000000..223b1aa95 --- /dev/null +++ b/symbolic-symcache/src/v9/lookup.rs @@ -0,0 +1,191 @@ +use symbolic_common::Language; + +use crate::raw::v9 as raw; +use crate::v9::SymCacheV9; +use crate::{File, Function}; + +impl<'data> SymCacheV9<'data> { + /// Looks up an instruction address in the SymCacheV9, yielding an iterator of [`SourceLocationV9`]s + /// representing a hierarchy of inlined function calls. + pub(crate) fn lookup(&self, addr: u64) -> SourceLocationsV9<'data, '_> { + let addr = match u32::try_from(addr) { + Ok(addr) => addr, + Err(_) => { + return SourceLocationsV9 { + cache: self, + source_location_idx: u32::MAX, + } + } + }; + + let source_location_start = (self.source_locations.len() - self.ranges.len()) as u32; + let mut source_location_idx = match self.ranges.binary_search_by_key(&addr, |r| r.0) { + Ok(idx) => source_location_start + idx as u32, + Err(0) => u32::MAX, + Err(idx) => source_location_start + idx as u32 - 1, + }; + + if let Some(source_location) = self.source_locations.get(source_location_idx as usize) { + if *source_location == raw::NO_SOURCE_LOCATION { + source_location_idx = u32::MAX; + } + } + + SourceLocationsV9 { + cache: self, + source_location_idx, + } + } + + pub(crate) fn get_file(&self, file_idx: u32) -> Option> { + let raw_file = self.files.get(file_idx as usize)?; + Some(File { + comp_dir: self.get_string(raw_file.comp_dir_offset), + directory: self.get_string(raw_file.directory_offset), + name: self.get_string(raw_file.name_offset).unwrap_or_default(), + srcsrv_name: self.get_string(raw_file.srcsrv_name_offset), + srcsrv_dir: self.get_string(raw_file.srcsrv_dir_offset), + srcsrv_revision: self.get_string(raw_file.srcsrv_revision_offset), + }) + } + + pub(crate) fn get_function(&self, function_idx: u32) -> Option> { + let raw_function = self.functions.get(function_idx as usize)?; + Some(Function { + name: self.get_string(raw_function.name_offset).unwrap_or("?"), + entry_pc: raw_function.entry_pc, + language: Language::from_u32(raw_function.lang), + }) + } + + /// An iterator over the functions in this SymCacheV9. + /// + /// Only functions with a valid entry pc, i.e., one not equal to `u32::MAX`, + /// will be returned. + /// Note that functions are *not* returned ordered by name or entry pc, + /// but in insertion order, which is essentially random. + pub(crate) fn functions(&self) -> FunctionsV9<'data> { + FunctionsV9 { + cache: self.clone(), + function_idx: 0, + } + } + + /// An iterator over the files in this SymCacheV9. + /// + /// Note that files are *not* returned ordered by name or full path, + /// but in insertion order, which is essentially random. + pub(crate) fn files(&self) -> FilesV9<'data> { + FilesV9 { + cache: self.clone(), + file_idx: 0, + } + } +} + +/// A source location as included in the SymCacheV9. +/// +/// A `SourceLocation` represents source information about a particular instruction. +/// It always has a `[Function]` associated with it and may also have a `[File]` and a line number. +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct SourceLocationV9<'data, 'cache> { + pub(crate) cache: &'cache SymCacheV9<'data>, + pub(crate) source_location: &'data raw::SourceLocation, +} + +impl<'data> SourceLocationV9<'data, '_> { + /// The source line corresponding to the instruction. + /// + /// 0 denotes an unknown line number. + pub(crate) fn line(&self) -> u32 { + self.source_location.line + } + + /// The source file corresponding to the instruction. + pub(crate) fn file(&self) -> Option> { + self.cache.get_file(self.source_location.file_idx) + } + + /// The function corresponding to the instruction. + pub(crate) fn function(&self) -> Function<'data> { + self.cache + .get_function(self.source_location.function_idx) + .unwrap_or_default() + } +} + +/// An Iterator that yields [`SourceLocationV9`]s, representing an inlining hierarchy. +#[derive(Debug, Clone)] +pub(crate) struct SourceLocationsV9<'data, 'cache> { + pub(crate) cache: &'cache SymCacheV9<'data>, + pub(crate) source_location_idx: u32, +} + +impl<'data, 'cache> Iterator for SourceLocationsV9<'data, 'cache> { + type Item = SourceLocationV9<'data, 'cache>; + + fn next(&mut self) -> Option { + if self.source_location_idx == u32::MAX { + return None; + } + self.cache + .source_locations + .get(self.source_location_idx as usize) + .map(|source_location| { + self.source_location_idx = source_location.inlined_into_idx; + SourceLocationV9 { + cache: self.cache, + source_location, + } + }) + } +} + +/// Iterator returned by [`SymCacheV9::functions`]; see documentation there. +#[derive(Debug, Clone)] +pub(crate) struct FunctionsV9<'data> { + cache: SymCacheV9<'data>, + function_idx: u32, +} + +impl<'data> Iterator for FunctionsV9<'data> { + type Item = Function<'data>; + + fn next(&mut self) -> Option { + let mut function = self.cache.get_function(self.function_idx); + + while let Some(ref f) = function { + if f.entry_pc == u32::MAX { + self.function_idx += 1; + function = self.cache.get_function(self.function_idx); + } else { + break; + } + } + + if function.is_some() { + self.function_idx += 1; + } + + function + } +} + +/// Iterator returned by [`SymCacheV9::files`]; see documentation there. +#[derive(Debug, Clone)] +pub(crate) struct FilesV9<'data> { + cache: SymCacheV9<'data>, + file_idx: u32, +} + +impl<'data> Iterator for FilesV9<'data> { + type Item = File<'data>; + + fn next(&mut self) -> Option { + let file = self.cache.get_file(self.file_idx); + if file.is_some() { + self.file_idx += 1; + } + file + } +} diff --git a/symbolic-symcache/src/v9/mod.rs b/symbolic-symcache/src/v9/mod.rs new file mode 100644 index 000000000..2c2a766a5 --- /dev/null +++ b/symbolic-symcache/src/v9/mod.rs @@ -0,0 +1,85 @@ +pub(crate) mod lookup; + +// V9 uses its own raw format with revision support. +use crate::raw::v9 as raw; +use crate::{ErrorKind, Result}; + +use symbolic_common::Arch; +use watto::{align_to, Pod, StringTable}; + +/// The serialized SymCache V9 binary format. +#[derive(Clone, PartialEq, Eq)] +pub(crate) struct SymCacheV9<'data> { + pub(crate) header: &'data raw::Header, + pub(crate) files: &'data [raw::File], + pub(crate) functions: &'data [raw::Function], + pub(crate) source_locations: &'data [raw::SourceLocation], + pub(crate) ranges: &'data [raw::Range], + pub(crate) string_bytes: &'data [u8], +} + +impl std::fmt::Debug for SymCacheV9<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SymCache") + .field("version", &9) + .field("debug_id", &self.header.debug_id) + .field("arch", &Arch::from_u32(self.header.arch)) + .field("files", &self.header.num_files) + .field("functions", &self.header.num_functions) + .field("source_locations", &self.header.num_source_locations) + .field("ranges", &self.header.num_ranges) + .field("string_bytes", &self.header.string_bytes) + .finish() + } +} + +impl<'data> SymCacheV9<'data> { + pub fn parse(buf: &'data [u8]) -> Result { + let (header, rest) = raw::Header::ref_from_prefix(buf).ok_or(ErrorKind::InvalidHeader)?; + + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFiles)?; + let (files, rest) = raw::File::slice_from_prefix(rest, header.num_files as usize) + .ok_or(ErrorKind::InvalidFiles)?; + + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFunctions)?; + let (functions, rest) = + raw::Function::slice_from_prefix(rest, header.num_functions as usize) + .ok_or(ErrorKind::InvalidFunctions)?; + + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidSourceLocations)?; + let (source_locations, rest) = + raw::SourceLocation::slice_from_prefix(rest, header.num_source_locations as usize) + .ok_or(ErrorKind::InvalidSourceLocations)?; + + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidRanges)?; + let (ranges, rest) = raw::Range::slice_from_prefix(rest, header.num_ranges as usize) + .ok_or(ErrorKind::InvalidRanges)?; + + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::UnexpectedStringBytes { + expected: header.string_bytes as usize, + found: 0, + })?; + if rest.len() < header.string_bytes as usize { + return Err(ErrorKind::UnexpectedStringBytes { + expected: header.string_bytes as usize, + found: rest.len(), + } + .into()); + } + + Ok(Self { + header, + files, + functions, + source_locations, + ranges, + string_bytes: rest, + }) + } + + /// Resolves a string reference to the pointed-to `&str` data. + fn get_string(&self, offset: u32) -> Option<&'data str> { + // version >= 8 (including v9): string length prefixes are LEB128 + StringTable::read(self.string_bytes, offset as usize).ok() + } +} diff --git a/symbolic-symcache/src/writer.rs b/symbolic-symcache/src/writer.rs index 3d5b1e04b..0ca55a4e6 100644 --- a/symbolic-symcache/src/writer.rs +++ b/symbolic-symcache/src/writer.rs @@ -9,8 +9,8 @@ use symbolic_common::{Arch, DebugId}; use symbolic_debuginfo::{DebugSession, FileFormat, Function, ObjectLike, Symbol}; use watto::{Pod, StringTable, Writer}; -use super::{raw, transform}; -use crate::raw::v7::NO_SOURCE_LOCATION; +use crate::raw::v9::NO_SOURCE_LOCATION; +use crate::{raw, transform}; use crate::{Error, ErrorKind}; /// The SymCache Converter. @@ -32,18 +32,18 @@ pub struct SymCacheConverter<'a> { transformers: transform::Transformers<'a>, string_table: StringTable, - /// The set of all [`raw::v7::File`]s that have been added to this `Converter`. - files: IndexSet, - /// The set of all [`raw::v7::Function`]s that have been added to this `Converter`. - functions: IndexSet, - /// The set of [`raw::v7::SourceLocation`]s used in this `Converter` that are only used as + /// The set of all [`raw::v9::File`]s that have been added to this `Converter`. + files: IndexSet, + /// The set of all [`raw::v9::Function`]s that have been added to this `Converter`. + functions: IndexSet, + /// The set of [`raw::v9::SourceLocation`]s used in this `Converter` that are only used as /// "call locations", i.e. which are only referred to from `inlined_into_idx`. - call_locations: IndexSet, - /// A map from code ranges to the [`raw::v7::SourceLocation`]s they correspond to. + call_locations: IndexSet, + /// A map from code ranges to the [`raw::v9::SourceLocation`]s they correspond to. /// /// Only the starting address of a range is saved, the end address is given implicitly /// by the start address of the next range. - ranges: BTreeMap, + ranges: BTreeMap, /// This is highest addr that we know is outside of a valid function. /// Functions have an explicit end, while Symbols implicitly extend to infinity. @@ -160,7 +160,7 @@ impl<'a> SymCacheConverter<'a> { let name_offset = string_table.insert(function_name) as u32; let lang = language as u32; - let (fun_idx, _) = self.functions.insert_full(raw::v7::Function { + let (fun_idx, _) = self.functions.insert_full(raw::v9::Function { name_offset, _comp_dir_offset: u32::MAX, entry_pc, @@ -228,6 +228,9 @@ impl<'a> SymCacheConverter<'a> { name: line.file.name_str(), directory: Some(line.file.dir_str()), comp_dir: comp_dir.map(Into::into), + srcsrv_name: line.file.srcsrv_name_str(), + srcsrv_dir: line.file.srcsrv_dir_str(), + srcsrv_revision: line.file.srcsrv_revision().map(|s| s.into()), }, line: line.line as u32, }; @@ -244,14 +247,29 @@ impl<'a> SymCacheConverter<'a> { .file .comp_dir .map_or(u32::MAX, |cd| string_table.insert(&cd) as u32); + let srcsrv_name_offset = location + .file + .srcsrv_name + .map_or(u32::MAX, |r| string_table.insert(&r) as u32); + let srcsrv_dir_offset = location + .file + .srcsrv_dir + .map_or(u32::MAX, |r| string_table.insert(&r) as u32); + let srcsrv_revision_offset = location + .file + .srcsrv_revision + .map_or(u32::MAX, |r| string_table.insert(&r) as u32); - let (file_idx, _) = self.files.insert_full(raw::v7::File { + let (file_idx, _) = self.files.insert_full(raw::v9::File { name_offset, directory_offset, comp_dir_offset, + srcsrv_name_offset, + srcsrv_dir_offset, + srcsrv_revision_offset, }); - let source_location = raw::v7::SourceLocation { + let source_location = raw::v9::SourceLocation { file_idx: file_idx as u32, line: location.line, function_idx, @@ -353,7 +371,7 @@ impl<'a> SymCacheConverter<'a> { if !function.inline { // add the bare minimum of information for the function if there isn't any. - insert_source_location(&mut self.ranges, entry_pc, || raw::v7::SourceLocation { + insert_source_location(&mut self.ranges, entry_pc, || raw::v9::SourceLocation { file_idx: u32::MAX, line: 0, function_idx, @@ -416,7 +434,7 @@ impl<'a> SymCacheConverter<'a> { // Insert a source location for the symbol, overwriting `NO_SOURCE_LOCATION` sentinel // values but not actual source locations coming from e.g. functions. insert_source_location(&mut self.ranges, symbol.address as u32, || { - let function = raw::v7::Function { + let function = raw::v9::Function { name_offset: name_idx, _comp_dir_offset: u32::MAX, entry_pc: symbol.address as u32, @@ -424,7 +442,7 @@ impl<'a> SymCacheConverter<'a> { }; let function_idx = self.functions.insert_full(function).0 as u32; - raw::v7::SourceLocation { + raw::v9::SourceLocation { file_idx: u32::MAX, line: 0, function_idx, @@ -484,17 +502,17 @@ impl<'a> SymCacheConverter<'a> { let num_ranges = self.ranges.len() as u32; let string_bytes = self.string_table.into_bytes(); - let version = raw::VersionInfo { - magic: raw::SYMCACHE_MAGIC, + // Write VersionInfo preamble + let version_info = raw::VersionInfo { + magic: crate::raw::SYMCACHE_MAGIC, version: crate::SYMCACHE_VERSION, }; + writer.write_all(version_info.as_bytes())?; - writer.write_all(version.as_bytes())?; - - let header = raw::v7::Header { + // Write v9 Header + let header = raw::v9::Header { debug_id: self.debug_id, arch: self.arch as u32, - num_files, num_functions, num_source_locations, @@ -543,12 +561,12 @@ impl<'a> SymCacheConverter<'a> { /// starting at that same address, we want to evict that sentinel, but we wouldn't want to /// evict source locations carrying actual information. fn insert_source_location( - source_locations: &mut BTreeMap, + source_locations: &mut BTreeMap, key: K, val: F, ) where K: Ord, - F: FnOnce() -> raw::v7::SourceLocation, + F: FnOnce() -> raw::v9::SourceLocation, { if source_locations .get(&key) diff --git a/symbolic-symcache/tests/test_cache.rs b/symbolic-symcache/tests/test_cache.rs index 2612fa469..ba250719f 100644 --- a/symbolic-symcache/tests/test_cache.rs +++ b/symbolic-symcache/tests/test_cache.rs @@ -1,5 +1,8 @@ +use std::io::Cursor; + use symbolic_common::ByteView; -use symbolic_symcache::{FunctionsDebug, SymCache}; +use symbolic_debuginfo::Object; +use symbolic_symcache::{FunctionsDebug, SymCache, SymCacheConverter}; use symbolic_testutils::fixture; type Error = Box; @@ -82,3 +85,68 @@ fn test_lookup() -> Result<(), Error> { Ok(()) } + +#[test] +fn test_pdb_srcsrv_remapping() -> Result<(), Error> { + let buffer = ByteView::open(fixture("windows/crash_with_srcsrv.pdb"))?; + let object = Object::parse(&buffer)?; + + let mut converter = SymCacheConverter::new(); + converter.process_object(&object)?; + let mut buffer = Vec::new(); + converter.serialize(&mut Cursor::new(&mut buffer))?; + + let cache = SymCache::parse(&buffer)?; + + let file = cache.lookup(0x1000).next().unwrap().file().unwrap(); + assert_eq!( + file.full_srcsrv_path().as_deref(), + Some("depot/breakpad/src/client/windows/crash_generation/crash_generation_client.cc") + ); + assert_eq!(file.srcsrv_revision(), Some("12345")); + + Ok(()) +} + +#[test] +fn test_backward_compatibility_v7_v8_v9() -> Result<(), Error> { + let v7_buffer = ByteView::open(fixture("symcache/current/linux.symc"))?; + let v7_cache = SymCache::parse(&v7_buffer)?; + assert_eq!(v7_cache.version(), 7); + + for file in v7_cache.files() { + assert!(file.srcsrv_revision().is_none()); + } + + let regular_pdb_buffer = ByteView::open(fixture("windows/crash.pdb"))?; + let regular_object = Object::parse(®ular_pdb_buffer)?; + + let mut converter_no_revision = SymCacheConverter::new(); + converter_no_revision.process_object(®ular_object)?; + let mut v9_no_revision_buffer = Vec::new(); + converter_no_revision.serialize(&mut Cursor::new(&mut v9_no_revision_buffer))?; + + let v9_no_revision_cache = SymCache::parse(&v9_no_revision_buffer)?; + assert_eq!(v9_no_revision_cache.version(), 9,); + + for file in v9_no_revision_cache.files() { + assert!(file.srcsrv_revision().is_none()); + } + + let pdb_buffer = ByteView::open(fixture("windows/crash_with_srcsrv.pdb"))?; + let object = Object::parse(&pdb_buffer)?; + + let mut converter = SymCacheConverter::new(); + converter.process_object(&object)?; + let mut v9_buffer = Vec::new(); + converter.serialize(&mut Cursor::new(&mut v9_buffer))?; + + let v9_cache = SymCache::parse(&v9_buffer)?; + assert_eq!(v9_cache.version(), 9, "Should create v9 symcache"); + + // Verify v9 has both files with and without revisions + assert!(v9_cache.files().any(|f| f.srcsrv_revision().is_some())); + assert!(v9_cache.files().any(|f| f.srcsrv_revision().is_none())); + + Ok(()) +} diff --git a/symbolic-symcache/tests/test_writer.rs b/symbolic-symcache/tests/test_writer.rs index 294638131..ee8caf773 100644 --- a/symbolic-symcache/tests/test_writer.rs +++ b/symbolic-symcache/tests/test_writer.rs @@ -25,7 +25,7 @@ fn test_write_header_linux() -> Result<(), Error> { let symcache = SymCache::parse(&buffer)?; insta::assert_debug_snapshot!(symcache, @r#" SymCache { - version: 8, + version: 9, debug_id: DebugId { uuid: "c0bcc3f1-9827-fe65-3058-404b2831d9e6", appendix: 0, @@ -69,7 +69,7 @@ fn test_write_header_macos() -> Result<(), Error> { let symcache = SymCache::parse(&buffer)?; insta::assert_debug_snapshot!(symcache, @r#" SymCache { - version: 8, + version: 9, debug_id: DebugId { uuid: "67e9247c-814e-392b-a027-dbde6748fcbf", appendix: 0,