diff --git a/AsaApi/AsaApi.vcxproj b/AsaApi/AsaApi.vcxproj index 2ec10f5..7192005 100644 --- a/AsaApi/AsaApi.vcxproj +++ b/AsaApi/AsaApi.vcxproj @@ -78,10 +78,11 @@ DynamicLibrary false - v143 + v145 true Unicode - 14.39.33519 + + DynamicLibrary @@ -267,11 +268,11 @@ true true true - NDEBUG;ASAAPI_EXPORTS;ARK_EXPORTS;_WINDOWS;_USRDLL;POCO_STATIC;%(PreprocessorDefinitions) + NOMINMAX;_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR;_SILENCE_ALL_CXX17_DEPRECATION_WARNINGS;NDEBUG;ASAAPI_EXPORTS;ARK_EXPORTS;_WINDOWS;_USRDLL;POCO_STATIC;%(PreprocessorDefinitions) true NotUsing pch.h - $(SolutionDir)AsaApi\Core\Public\API\UE;$(SolutionDir)AsaApi\Core\Public;%(AdditionalIncludeDirectories) + $(SolutionDir)AsaApi\Core\Public\API\UE;$(SolutionDir)AsaApi\Core\Public;$(SolutionDir)Includes\raw_pdb\src;%(AdditionalIncludeDirectories) stdcpp20 stdc17 true @@ -371,6 +372,26 @@ copy "$(SolutionDir)$(PlatformName)\$(ConfigurationName)\$(ProjectName).pdb" "F: + + + + + + + + + + + + + + + + + + + + diff --git a/AsaApi/AsaApi.vcxproj.filters b/AsaApi/AsaApi.vcxproj.filters index dcb3686..f4feb8c 100644 --- a/AsaApi/AsaApi.vcxproj.filters +++ b/AsaApi/AsaApi.vcxproj.filters @@ -109,6 +109,9 @@ {f1b840ad-6bee-4a69-b9ca-20346e129b27} + + {358771c7-78e5-4c89-8d84-58b61dfd3d7d} + @@ -177,6 +180,66 @@ Source Files\Core\Private\Tools + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + + + Source Files\Core\Private\PDBReader\raw_pdb + diff --git a/AsaApi/Core/Private/Ark/ArkBaseApi.cpp b/AsaApi/Core/Private/Ark/ArkBaseApi.cpp index 01ff8df..b5836d8 100644 --- a/AsaApi/Core/Private/Ark/ArkBaseApi.cpp +++ b/AsaApi/Core/Private/Ark/ArkBaseApi.cpp @@ -10,6 +10,8 @@ #include "HooksImpl.h" #include "ApiUtils.h" #include +#include +#include #include "Requests.h" #include #include @@ -42,6 +44,8 @@ namespace API std::unordered_map offsets_dump; std::unordered_map bitfields_dump; + std::unordered_map fields_dump; + std::unordered_map functions_dump; try { @@ -64,11 +68,12 @@ namespace API const fs::path keyCacheFile = fs::path(exe_path).append(ArkBaseApi::GetApiName()+"/Cache/cached_key.cache"); const fs::path offsetsCacheFile = fs::path(exe_path).append(ArkBaseApi::GetApiName()+"/Cache/cached_offsets.cache"); const fs::path bitfieldsCacheFile = fs::path(exe_path).append(ArkBaseApi::GetApiName()+"/Cache/cached_bitfields.cache"); + const fs::path fieldsCacheFile = fs::path(exe_path).append(ArkBaseApi::GetApiName()+"/Cache/cached_fields.cache"); + const fs::path functionsCacheFile = fs::path(exe_path).append(ArkBaseApi::GetApiName()+"/Cache/cached_functions.cache"); const fs::path offsetsCacheFilePlain = fs::path(exe_path).append(ArkBaseApi::GetApiName() + "/Cache/cached_offsets.txt"); const std::string fileHash = Cache::calculateSHA256(filepath); std::string storedHash = Cache::readFromFile(keyCacheFile); std::unordered_set pdbIgnoreSet = Cache::readFileIntoSet(pdbIgnoreFile); - const std::string defaultCDNUrl = "https://cdn.pelayori.com/cache/"; const fs::path arkApiDir = fs::path(exe_path).append(ArkBaseApi::GetApiName()); @@ -99,32 +104,23 @@ namespace API Log::GetLog()->info("Added DLL search directory: {}", std::filesystem::path(w).string()); } - if (autoCacheConfig.value("Enable", true) - && autoCacheConfig.value("DownloadCacheURL", defaultCDNUrl) != "" - && (fileHash != storedHash || !fs::exists(offsetsCacheFile) || !fs::exists(bitfieldsCacheFile))) - { - const fs::path downloadFile = autoCacheConfig.value("DownloadCacheURL", defaultCDNUrl) + fileHash + ".zip"; - const fs::path localFile = fs::path(exe_path).append(ArkBaseApi::GetApiName() + "/Cache/" + fileHash + ".zip"); - - if (ArkBaseApi::DownloadCacheFiles(downloadFile, localFile)) - storedHash = Cache::readFromFile(keyCacheFile); - else - Log::GetLog()->warn("Ooops you are early, the cache has not finished cooking yet! Cache files usually take 10 minutes to be ready after an update. If more time has passed please contact developers."); - - if (fs::exists(localFile)) - fs::remove(localFile); - } - if (fileHash != storedHash || !fs::exists(offsetsCacheFile) || !fs::exists(bitfieldsCacheFile)) { - Log::GetLog()->info("Cache refresh required this will take 10-20 minutes to complete"); - pdb_reader.Read(filepath, &offsets_dump, &bitfields_dump, pdbIgnoreSet); + Log::GetLog()->info("Cache refresh required this will take few seconds to complete"); + pdb_reader.Read(filepath, &offsets_dump, &bitfields_dump, pdbIgnoreSet, &fields_dump, &functions_dump); Log::GetLog()->info("Caching offsets for faster loading next time"); Cache::serializeMap(offsets_dump, offsetsCacheFile); Log::GetLog()->info("Caching bitfields for faster loading next time"); Cache::serializeMap(bitfields_dump, bitfieldsCacheFile); + + Log::GetLog()->info("Caching field type info for faster loading next time"); + Cache::serializeMap(fields_dump, fieldsCacheFile); + + Log::GetLog()->info("Caching function info for faster loading next time"); + Cache::serializeMap(functions_dump, functionsCacheFile); + Cache::saveToFile(keyCacheFile, fileHash); Cache::saveToFilePlain(offsetsCacheFilePlain, offsets_dump); } @@ -136,6 +132,18 @@ namespace API Log::GetLog()->info("Reading cached bitfields"); bitfields_dump = Cache::deserializeMap(bitfieldsCacheFile); + + if (fs::exists(fieldsCacheFile)) + { + Log::GetLog()->info("Reading cached field types"); + fields_dump = Cache::deserializeMap(fieldsCacheFile); + } + + if (fs::exists(functionsCacheFile)) + { + Log::GetLog()->info("Reading cached function info"); + functions_dump = Cache::deserializeMap(functionsCacheFile); + } } } catch (const std::exception& error) @@ -144,7 +152,7 @@ namespace API return false; } - Offsets::Get().Init(move(offsets_dump), move(bitfields_dump)); + Offsets::Get().Init(move(offsets_dump), move(bitfields_dump), move(fields_dump), move(functions_dump)); Sleep(10); AsaApi::InitHooks(); Log::GetLog()->info("API was successfully loaded"); @@ -282,9 +290,11 @@ namespace API { GetCommands()->AddConsoleCommand("plugins.load", &LoadPluginCmd); GetCommands()->AddConsoleCommand("plugins.unload", &UnloadPluginCmd); + GetCommands()->AddConsoleCommand("dumpclass", &DumpClassCmd); GetCommands()->AddRconCommand("plugins.load", &LoadPluginRcon); GetCommands()->AddRconCommand("plugins.unload", &UnloadPluginRcon); GetCommands()->AddRconCommand("map.setserverid", &SetServerID); + GetCommands()->AddRconCommand("dumpclass", &DumpClassRcon); } FString ArkBaseApi::LoadPlugin(FString* cmd) @@ -411,4 +421,206 @@ namespace API rcon_connection->SendMessageW(rcon_packet->Id, 0, &reply); } + + FString ArkBaseApi::DumpClass(FString* cmd) { + TArray parsed; + cmd->ParseIntoArray(parsed, L" ", true); + + if (!parsed.IsValidIndex(1)) { + return L"Usage: dumpclass "; + } + + const std::string className = parsed[1].ToString(); + const bool isGlobal = (className == "Global"); + + try { + namespace fs = std::filesystem; + + TCHAR buffer[MAX_PATH]; + GetModuleFileName(NULL, buffer, sizeof(buffer)); + fs::path exe_path = fs::path(buffer).parent_path(); + + const fs::path dumpDir = exe_path / "ArkApi" / "ClassDumps"; + if (!fs::exists(dumpDir)) + fs::create_directories(dumpDir); + + const fs::path outputFile = dumpDir / (className + ".h"); + std::ofstream file(outputFile); + + if (!file.is_open()) { + return FString::Format("Failed to create output file: {}", outputFile.string().c_str()); + } + + auto fields = Offsets::Get().GetFieldsForClass(className); + auto bitfields = Offsets::Get().GetBitFieldsForClass(className); + auto functions = Offsets::Get().GetFunctionsForClass(className); + + if (fields.empty() && bitfields.empty() && functions.empty()) { + file.close(); + fs::remove(outputFile); + return FString::Format("No data found for class: {}", className.c_str()); + } + + std::sort(fields.begin(), fields.end(), [](const auto& a, const auto& b) { return a.second.offset < b.second.offset; }); + std::sort(bitfields.begin(), bitfields.end(), [](const auto& a, const auto& b) { return a.second.offset < b.second.offset; }); + std::sort(functions.begin(), functions.end(), [](const auto& a, const auto& b) { return a.second.signature < b.second.signature; }); + + if (isGlobal) { + file << "namespace " << className << "\n{\n"; + } + else { + file << "struct " << className << "\n{\n"; + } + + if (!fields.empty()) { + file << "\t// Fields\n\n"; + for (const auto& [key, info] : fields) { + size_t dotPos = key.rfind('.'); + std::string memberName = (dotPos != std::string::npos) ? key.substr(dotPos + 1) : key; + + if (isGlobal) { + file << "\tinline " << info.type << "& " << memberName << "Field() { return *GetNativeDataPointerField<" << info.type << "*>(nullptr, \"" << key << "\"); }\n"; + } + else { + file << "\t" << info.type << "& " << memberName << "Field() { return *GetNativePointerField<" << info.type << "*>(this, \"" << key << "\"); }\n"; + } + } + } + + if (!bitfields.empty()) { + file << "\n\t// Bitfields\n\n"; + for (const auto& [key, bf] : bitfields) { + size_t dotPos = key.rfind('.'); + std::string memberName = (dotPos != std::string::npos) ? key.substr(dotPos + 1) : key; + + if (isGlobal) { + file << "\tinline BitFieldValue " << memberName << "Field() { return { nullptr, \"" << key << "\" }; }\n"; + } + else { + file << "\tBitFieldValue " << memberName << "Field() { return { this, \"" << key << "\" }; }\n"; + } + } + } + + if (!functions.empty()) { + file << "\n\t// Functions\n\n"; + for (const auto& [key, info] : functions) { + if (info.signature.rfind("exec", 0) == 0) + continue; + + std::string paramTypes = info.params; + std::vector paramNamesList; + if (!info.paramNames.empty()) { + std::string name; + for (char c : info.paramNames) { + if (c == ',') { + if (!name.empty()) paramNamesList.push_back(name); + name.clear(); + } + else { + name += c; + } + } + if (!name.empty()) paramNamesList.push_back(name); + } + + std::string paramDecl; + std::string paramCall; + if (!info.params.empty()) { + std::vector paramList; + std::string param; + int depth = 0; + for (char c : info.params) { + if (c == '<') depth++; + else if (c == '>') depth--; + else if (c == ',' && depth == 0) { + paramList.push_back(param); + param.clear(); + continue; + } + param += c; + } + if (!param.empty()) paramList.push_back(param); + + for (size_t i = 0; i < paramList.size(); i++) { + if (i > 0) { + paramDecl += ", "; + paramCall += ", "; + } + + std::string paramName = (i < paramNamesList.size()) ? paramNamesList[i] : ("arg" + std::to_string(i)); + paramDecl += paramList[i] + " " + paramName; + paramCall += paramName; + } + } + + size_t parenPos = info.signature.find('('); + std::string funcName = (parenPos != std::string::npos) ? info.signature.substr(0, parenPos) : info.signature; + + if (isGlobal) { + if (info.returnType == "void" || info.returnType.empty()) { + if (paramTypes.empty()) + file << "\tinline void " << funcName << "() { NativeCall(nullptr, \"" << key << "\"); }\n"; + else + file << "\tinline void " << funcName << "(" << paramDecl << ") { NativeCall(nullptr, \"" << key << "\", " << paramCall << "); }\n"; + } + else { + if (paramTypes.empty()) + file << "\tinline " << info.returnType << " " << funcName << "() { return NativeCall<" << info.returnType << ">(nullptr, \"" << key << "\"); }\n"; + else + file << "\tinline " << info.returnType << " " << funcName << "(" << paramDecl << ") { return NativeCall<" << info.returnType << ", " << paramTypes << ">(nullptr, \"" << key << "\", " << paramCall << "); }\n"; + } + } + else if (info.isStatic) { + if (info.returnType == "void" || info.returnType.empty()) { + if (paramTypes.empty()) + file << "\tstatic void " << funcName << "() { NativeCall(nullptr, \"" << key << "\"); }\n"; + else + file << "\tstatic void " << funcName << "(" << paramDecl << ") { NativeCall(nullptr, \"" << key << "\", " << paramCall << "); }\n"; + } + else { + if (paramTypes.empty()) + file << "\tstatic " << info.returnType << " " << funcName << "() { return NativeCall<" << info.returnType << ">(nullptr, \"" << key << "\"); }\n"; + else + file << "\tstatic " << info.returnType << " " << funcName << "(" << paramDecl << ") { return NativeCall<" << info.returnType << ", " << paramTypes << ">(nullptr, \"" << key << "\", " << paramCall << "); }\n"; + } + } + else { + if (info.returnType == "void" || info.returnType.empty()) { + if (paramTypes.empty()) + file << "\tvoid " << funcName << "() { NativeCall(this, \"" << key << "\"); }\n"; + else + file << "\tvoid " << funcName << "(" << paramDecl << ") { NativeCall(this, \"" << key << "\", " << paramCall << "); }\n"; + } + else { + if (paramTypes.empty()) + file << "\t" << info.returnType << " " << funcName << "() { return NativeCall<" << info.returnType << ">(this, \"" << key << "\"); }\n"; + else + file << "\t" << info.returnType << " " << funcName << "(" << paramDecl << ") { return NativeCall<" << info.returnType << ", " << paramTypes << ">(this, \"" << key << "\", " << paramCall << "); }\n"; + } + } + } + } + + file << "};\n"; + file.close(); + + Log::GetLog()->info("Class dump saved to: {}", outputFile.string()); + return FString::Format("Class dump saved to: {}", outputFile.string().c_str()); + } + catch (const std::exception& error) { + Log::GetLog()->warn("({}) {}", __FUNCTION__, error.what()); + return FString::Format("Failed to dump class - {}", error.what()); + } + } + + void ArkBaseApi::DumpClassCmd(APlayerController* player_controller, FString* cmd, bool /*unused*/) { + auto* shooter_controller = static_cast(player_controller); + AsaApi::GetApiUtils().SendServerMessage(shooter_controller, FColorList::Green, *DumpClass(cmd)); + } + + void ArkBaseApi::DumpClassRcon(RCONClientConnection* rcon_connection, RCONPacket* rcon_packet, UWorld* /*unused*/) { + FString reply = DumpClass(&rcon_packet->Body); + rcon_connection->SendMessageW(rcon_packet->Id, 0, &reply); + } } // namespace API diff --git a/AsaApi/Core/Private/Ark/ArkBaseApi.h b/AsaApi/Core/Private/Ark/ArkBaseApi.h index af6f69e..47c91dc 100644 --- a/AsaApi/Core/Private/Ark/ArkBaseApi.h +++ b/AsaApi/Core/Private/Ark/ArkBaseApi.h @@ -42,6 +42,12 @@ namespace API static void SetServerID(RCONClientConnection* /*rcon_connection*/, RCONPacket* /*rcon_packet*/, UWorld* /*unused*/); + // Class dump command + static FString DumpClass(FString* cmd); + static void DumpClassCmd(APlayerController* /*player_controller*/, FString* /*cmd*/, bool /*unused*/); + static void DumpClassRcon(RCONClientConnection* /*rcon_connection*/, RCONPacket* /*rcon_packet*/, + UWorld* /*unused*/); + std::unique_ptr commands_; std::unique_ptr hooks_; std::unique_ptr api_utils_; diff --git a/AsaApi/Core/Private/Cache.h b/AsaApi/Core/Private/Cache.h index 6035644..6525ee2 100644 --- a/AsaApi/Core/Private/Cache.h +++ b/AsaApi/Core/Private/Cache.h @@ -1,6 +1,7 @@ #pragma once #include #include "Logger/Logger.h" +#include "PDBReader/PDBReader.h" #include #include @@ -16,6 +17,24 @@ namespace Cache std::string readFromFile(const std::filesystem::path& filename); + // Helper to write a string to binary file + inline void writeString(std::ofstream& file, const std::string& str) + { + std::size_t len = str.size(); + file.write(reinterpret_cast(&len), sizeof(len)); + file.write(str.data(), len); + } + + // Helper to read a string from binary file + inline bool readString(std::ifstream& file, std::string& str) + { + std::size_t len; + if (!file.read(reinterpret_cast(&len), sizeof(len))) + return false; + str.resize(len); + return file.read(&str[0], len).good() || file.eof(); + } + template void serializeMap(const std::unordered_map& data, const std::filesystem::path& filename) { @@ -37,6 +56,53 @@ namespace Cache file.close(); } + // Specialized serialization for FieldInfo + template <> + inline void serializeMap(const std::unordered_map& data, const std::filesystem::path& filename) + { + std::ofstream file(filename, std::ios::binary | std::ios::trunc); + if (!file.is_open()) + { + Log::GetLog()->error("Error opening file for writing: " + filename.string()); + return; + } + + for (const auto& entry : data) + { + writeString(file, entry.first); + writeString(file, entry.second.type); + file.write(reinterpret_cast(&entry.second.offset), sizeof(entry.second.offset)); + file.write(reinterpret_cast(&entry.second.isPointer), sizeof(entry.second.isPointer)); + } + + file.close(); + } + + // Specialized serialization for FunctionInfo + template <> + inline void serializeMap(const std::unordered_map& data, const std::filesystem::path& filename) + { + std::ofstream file(filename, std::ios::binary | std::ios::trunc); + if (!file.is_open()) + { + Log::GetLog()->error("Error opening file for writing: " + filename.string()); + return; + } + + for (const auto& entry : data) + { + writeString(file, entry.first); + writeString(file, entry.second.returnType); + writeString(file, entry.second.signature); + writeString(file, entry.second.params); + writeString(file, entry.second.paramNames); + file.write(reinterpret_cast(&entry.second.offset), sizeof(entry.second.offset)); + file.write(reinterpret_cast(&entry.second.isStatic), sizeof(entry.second.isStatic)); + } + + file.close(); + } + template std::unordered_map deserializeMap(const std::filesystem::path& filename) { @@ -79,6 +145,81 @@ namespace Cache return data; } + // Specialized deserialization for FieldInfo + template <> + inline std::unordered_map deserializeMap(const std::filesystem::path& filename) + { + std::unordered_map data; + + if (!std::filesystem::exists(filename)) + { + Log::GetLog()->error("File does not exist: " + filename.string()); + return data; + } + + std::ifstream file(filename, std::ios::binary); + if (!file.is_open()) + { + Log::GetLog()->error("Error opening file for reading: " + filename.string()); + return data; + } + + data.reserve(300000); + + std::string key; + while (readString(file, key)) + { + API::FieldInfo info; + if (!readString(file, info.type)) break; + if (!file.read(reinterpret_cast(&info.offset), sizeof(info.offset))) break; + if (!file.read(reinterpret_cast(&info.isPointer), sizeof(info.isPointer))) break; + data[key] = info; + key.clear(); + } + + file.close(); + return data; + } + + // Specialized deserialization for FunctionInfo + template <> + inline std::unordered_map deserializeMap(const std::filesystem::path& filename) + { + std::unordered_map data; + + if (!std::filesystem::exists(filename)) + { + Log::GetLog()->error("File does not exist: " + filename.string()); + return data; + } + + std::ifstream file(filename, std::ios::binary); + if (!file.is_open()) + { + Log::GetLog()->error("Error opening file for reading: " + filename.string()); + return data; + } + + data.reserve(250000); + + std::string key; + while (readString(file, key)) + { + API::FunctionInfo info; + if (!readString(file, info.returnType)) break; + if (!readString(file, info.signature)) break; + if (!readString(file, info.params)) break; + if (!readString(file, info.paramNames)) break; + if (!file.read(reinterpret_cast(&info.offset), sizeof(info.offset))) break; + if (!file.read(reinterpret_cast(&info.isStatic), sizeof(info.isStatic))) break; + data[key] = info; + key.clear(); + } + + file.close(); + return data; + } + void saveToFilePlain(const std::filesystem::path& filename, const std::unordered_map& map); std::unordered_set readFileIntoSet(const std::filesystem::path& filename); diff --git a/AsaApi/Core/Private/Offsets.cpp b/AsaApi/Core/Private/Offsets.cpp index 90f856f..a6e8bcb 100644 --- a/AsaApi/Core/Private/Offsets.cpp +++ b/AsaApi/Core/Private/Offsets.cpp @@ -41,16 +41,18 @@ namespace API } void Offsets::Init(std::unordered_map&& offsets_dump, - std::unordered_map&& bitfields_dump) + std::unordered_map&& bitfields_dump, + std::unordered_map&& fields_dump, + std::unordered_map&& functions_dump) { offsets_dump_.swap(offsets_dump); bitfields_dump_.swap(bitfields_dump); + fields_dump_.swap(fields_dump); + functions_dump_.swap(functions_dump); } - DWORD64 Offsets::GetAddress(const void* base, const std::string& name) - { - if (!offsets_dump_.contains(name)) - { + DWORD64 Offsets::GetAddress(const void* base, const std::string& name) { + if (!offsets_dump_.contains(name)) { Log::GetLog()->critical("Failed to get the offset of {}.", name); Log::GetLog()->flush(); Sleep(10000); @@ -60,10 +62,8 @@ namespace API return reinterpret_cast(base) + static_cast(offsets_dump_[name]); } - LPVOID Offsets::GetAddress(const std::string& name) - { - if (!offsets_dump_.contains(name)) - { + LPVOID Offsets::GetAddress(const std::string& name) { + if (!offsets_dump_.contains(name)) { Log::GetLog()->critical("Failed to get the offset of {}.", name); Log::GetLog()->flush(); Sleep(10000); @@ -73,10 +73,8 @@ namespace API return reinterpret_cast(module_base_ + static_cast(offsets_dump_[name])); } - LPVOID Offsets::GetDataAddress(const std::string& name) - { - if (!offsets_dump_.contains(name)) - { + LPVOID Offsets::GetDataAddress(const std::string& name) { + if (!offsets_dump_.contains(name)) { Log::GetLog()->critical("Failed to get the offset of {}.", name); Log::GetLog()->flush(); Sleep(10000); @@ -86,18 +84,10 @@ namespace API return reinterpret_cast(data_base_ + static_cast(offsets_dump_[name])); } - BitField Offsets::GetBitField(const void* base, const std::string& name) - { - return GetBitFieldInternal(base, name); - } - - BitField Offsets::GetBitField(LPVOID base, const std::string& name) - { - return GetBitFieldInternal(base, name); - } + BitField Offsets::GetBitField(const void* base, const std::string& name) { return GetBitFieldInternal(base, name); } + BitField Offsets::GetBitField(LPVOID base, const std::string& name) { return GetBitFieldInternal(base, name); } - BitField Offsets::GetBitFieldInternal(const void* base, const std::string& name) - { + BitField Offsets::GetBitFieldInternal(const void* base, const std::string& name) { if (!bitfields_dump_.contains(name)) { Log::GetLog()->critical("Failed to get the bitfield address of {}.", name); @@ -115,4 +105,56 @@ namespace API return cf; } + + std::vector> Offsets::GetOffsetsForClass(const std::string& className) const { + std::vector> result; + const std::string prefix = className + "."; + + for (const auto& [key, value] : offsets_dump_) { + if (key.rfind(prefix, 0) == 0) { + result.emplace_back(key, value); + } + } + + return result; + } + + std::vector> Offsets::GetBitFieldsForClass(const std::string& className) const { + std::vector> result; + const std::string prefix = className + "."; + + for (const auto& [key, value] : bitfields_dump_) { + if (key.rfind(prefix, 0) == 0) { + result.emplace_back(key, value); + } + } + + return result; + } + + std::vector> Offsets::GetFieldsForClass(const std::string& className) const { + std::vector> result; + const std::string prefix = className + "."; + + for (const auto& [key, value] : fields_dump_) { + if (key.rfind(prefix, 0) == 0) { + result.emplace_back(key, value); + } + } + + return result; + } + + std::vector> Offsets::GetFunctionsForClass(const std::string& className) const { + std::vector> result; + const std::string prefix = className + "."; + + for (const auto& [key, value] : functions_dump_) { + if (key.rfind(prefix, 0) == 0) { + result.emplace_back(key, value); + } + } + + return result; + } } // namespace API diff --git a/AsaApi/Core/Private/Offsets.h b/AsaApi/Core/Private/Offsets.h index bd98e24..c9098e3 100644 --- a/AsaApi/Core/Private/Offsets.h +++ b/AsaApi/Core/Private/Offsets.h @@ -1,8 +1,10 @@ #pragma once #include +#include "PDBReader/PDBReader.h" #include +#include namespace API { @@ -17,7 +19,9 @@ namespace API Offsets& operator=(Offsets&&) = delete; void Init(std::unordered_map&& offsets_dump, - std::unordered_map&& bitfields_dump); + std::unordered_map&& bitfields_dump, + std::unordered_map&& fields_dump = {}, + std::unordered_map&& functions_dump = {}); DWORD64 GetAddress(const void* base, const std::string& name); LPVOID GetAddress(const std::string& name); @@ -27,6 +31,12 @@ namespace API BitField GetBitField(const void* base, const std::string& name); BitField GetBitField(LPVOID base, const std::string& name); + // Get all entries for a specific class + std::vector> GetOffsetsForClass(const std::string& className) const; + std::vector> GetBitFieldsForClass(const std::string& className) const; + std::vector> GetFieldsForClass(const std::string& className) const; + std::vector> GetFunctionsForClass(const std::string& className) const; + private: Offsets(); ~Offsets() = default; @@ -38,5 +48,7 @@ namespace API std::unordered_map offsets_dump_; std::unordered_map bitfields_dump_; + std::unordered_map fields_dump_; + std::unordered_map functions_dump_; }; } // namespace API diff --git a/AsaApi/Core/Private/PDBReader/PDBReader.cpp b/AsaApi/Core/Private/PDBReader/PDBReader.cpp index e064593..ce5eba6 100644 --- a/AsaApi/Core/Private/PDBReader/PDBReader.cpp +++ b/AsaApi/Core/Private/PDBReader/PDBReader.cpp @@ -1,6 +1,10 @@ #include "PDBReader.h" #include +#include +#include +#include +#include #include #include @@ -8,430 +12,1041 @@ #include "../Private/Helpers.h" #include "../Private/Offsets.h" +#include +#pragma comment(lib, "dbghelp.lib") + +// raw_pdb includes +#include "PDB.h" +#include "PDB_RawFile.h" +#include "PDB_InfoStream.h" +#include "PDB_DBIStream.h" +#include "PDB_TPIStream.h" +#include "PDB_ModuleInfoStream.h" +#include "PDB_ModuleSymbolStream.h" +#include "PDB_ImageSectionStream.h" +#include "PDB_GlobalSymbolStream.h" +#include "PDB_PublicSymbolStream.h" +#include "PDB_CoalescedMSFStream.h" +#include "PDB_TPITypes.h" +#include "PDB_DBITypes.h" + namespace API { - template - class ScopedDiaType + struct MemoryMappedFile { - public: - ScopedDiaType() : _sym(nullptr) + void* baseAddress = nullptr; + size_t length = 0; + HANDLE fileHandle = INVALID_HANDLE_VALUE; + HANDLE mappingHandle = nullptr; + + static MemoryMappedFile Open(const std::wstring& path) { + MemoryMappedFile file; + + file.fileHandle = CreateFileW(path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + + if (file.fileHandle == INVALID_HANDLE_VALUE) + return file; + + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(file.fileHandle, &fileSize)) { + CloseHandle(file.fileHandle); + file.fileHandle = INVALID_HANDLE_VALUE; + return file; + } + + file.length = static_cast(fileSize.QuadPart); + file.mappingHandle = CreateFileMappingW(file.fileHandle, nullptr, PAGE_READONLY, 0, 0, nullptr); + if (!file.mappingHandle) { + CloseHandle(file.fileHandle); + file.fileHandle = INVALID_HANDLE_VALUE; + return file; + } + + file.baseAddress = MapViewOfFile(file.mappingHandle, FILE_MAP_READ, 0, 0, 0); + if (!file.baseAddress) { + CloseHandle(file.mappingHandle); + CloseHandle(file.fileHandle); + file.fileHandle = INVALID_HANDLE_VALUE; + file.mappingHandle = nullptr; + } + + return file; } - ScopedDiaType(T* sym) : _sym(sym) - { + static void Close(MemoryMappedFile& file) { + if (file.baseAddress) { + UnmapViewOfFile(file.baseAddress); + file.baseAddress = nullptr; + } + + if (file.mappingHandle) { + CloseHandle(file.mappingHandle); + file.mappingHandle = nullptr; + } + + if (file.fileHandle != INVALID_HANDLE_VALUE) { + CloseHandle(file.fileHandle); + file.fileHandle = INVALID_HANDLE_VALUE; + } } + }; - ~ScopedDiaType() + class TypeTable { + public: + TypeTable(const PDB::TPIStream& tpiStream) + : m_firstTypeIndex(tpiStream.GetFirstTypeIndex()) + , m_lastTypeIndex(tpiStream.GetLastTypeIndex()) + , m_recordCount(tpiStream.GetTypeRecordCount()) { - if (_sym != nullptr) - _sym->Release(); + const PDB::DirectMSFStream& directStream = tpiStream.GetDirectMSFStream(); + m_stream = PDB::CoalescedMSFStream(directStream, directStream.GetSize(), 0); + + m_records.resize(m_recordCount); + + uint32_t typeIndex = 0; + tpiStream.ForEachTypeRecordHeaderAndOffset([this, &typeIndex](const PDB::CodeView::TPI::RecordHeader& header, size_t offset) { + const PDB::CodeView::TPI::Record* record = m_stream.GetDataAtOffset(offset); + m_records[typeIndex] = record; + ++typeIndex; + }); + } + + const PDB::CodeView::TPI::Record* GetTypeRecord(uint32_t typeIndex) const { + if (typeIndex < m_firstTypeIndex || typeIndex > m_lastTypeIndex) + return nullptr; + + const size_t index = typeIndex - m_firstTypeIndex; + if (index >= m_records.size()) + return nullptr; + + return m_records[index]; } - T** ref() { return &_sym; } - T** operator&() { return ref(); } - T* operator->() { return _sym; } - operator T*() { return _sym; } - void Attach(T* sym) { _sym = sym; } + uint32_t GetFirstTypeIndex() const { return m_firstTypeIndex; } + uint32_t GetLastTypeIndex() const { return m_lastTypeIndex; } + const std::vector& GetTypeRecords() const { return m_records; } private: - T* _sym; + uint32_t m_firstTypeIndex; + uint32_t m_lastTypeIndex; + size_t m_recordCount; + std::vector m_records; + PDB::CoalescedMSFStream m_stream; }; - template - using CComPtr = ScopedDiaType; + void PdbReader::AddOffset(const std::string& key, intptr_t value) { + std::lock_guard lock(offsets_mutex_); + (*offsets_dump_)[key] = value; + } - void PdbReader::Read(const std::wstring& path, std::unordered_map* offsets_dump, - std::unordered_map* bitfields_dump, const std::unordered_set filter_set) - { - offsets_dump_ = offsets_dump; - bitfields_dump_ = bitfields_dump; - filter_set_ = filter_set; + void PdbReader::AddBitField(const std::string& key, const BitField& value) { + std::lock_guard lock(bitfields_mutex_); + (*bitfields_dump_)[key] = value; + } - offsets_dump_->reserve(550000); - bitfields_dump_->reserve(11000); + void PdbReader::AddFieldInfo(const std::string& key, const std::string& typeName, intptr_t offset, bool isPointer) { + if (!fields_dump_) return; + std::lock_guard lock(fields_mutex_); + FieldInfo info; + info.type = typeName; + info.offset = offset; + info.isPointer = isPointer; + (*fields_dump_)[key] = info; + } - std::ifstream f{path}; - if (!f.good()) - throw std::runtime_error("Failed to open pdb file"); - - IDiaDataSource* data_source; - IDiaSession* dia_session; - IDiaSymbol* symbol; + void PdbReader::AddFunctionInfo(const std::string& key, const std::string& returnType, const std::string& signature, const std::string& params, const std::string& paramNames, intptr_t offset, bool isStatic) { + if (!functions_dump_) return; + std::lock_guard lock(functions_mutex_); + FunctionInfo info; + info.returnType = returnType; + info.signature = signature; + info.params = params; + info.paramNames = paramNames; + info.offset = offset; + info.isStatic = isStatic; + (*functions_dump_)[key] = info; + } - try - { - LoadDataFromPdb(path, &data_source, &dia_session, &symbol); + bool PdbReader::MarkVisited(uint32_t id) { + std::lock_guard lock(visited_mutex_); + if (visited_.find(id) != visited_.end()) + return false; + visited_.insert(id); + return true; + } + + bool PdbReader::FilterSymbols(const std::string& name) const { + if (name.empty()) + return true; + + for (const auto& filter : filter_set_) { + if (name.rfind(filter, 0) == 0 && name.rfind("UE::GC", 0) != 0) + return true; } - catch (const std::runtime_error&) - { - Log::GetLog()->error("Failed to load data from pdb file "); - throw; + + if (name.find('`') != std::string::npos) + return true; + + return false; + } + + static std::string ReplaceAll(std::string str, const std::string& from, const std::string& to) { + size_t pos = 0; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); } + return str; + } - Log::GetLog()->info("Dumping structures.."); - DumpStructs(symbol); + std::string UndecorateName(const char* decoratedName, DWORD flags) { + if (!decoratedName || decoratedName[0] != '?') + return decoratedName ? decoratedName : ""; - Log::GetLog()->info("Dumping functions.."); - DumpFunctions(symbol); + char undecoratedName[4096]; + if (UnDecorateSymbolName(decoratedName, undecoratedName, sizeof(undecoratedName), flags) == 0) + return decoratedName; - Log::GetLog()->info("Dumping globals.."); - DumpGlobalVariables(symbol); + return std::string(undecoratedName); + } - Cleanup(symbol, dia_session, data_source); + std::string ExtractFunctionParams(const char* name) { + if (!name) + return ""; - Log::GetLog()->info("Successfully read information from PDB\n"); + std::string result; + + if (name[0] == '?') + result = UndecorateName(name, 0x20000); + else + result = name; + + size_t start = result.find('('); + size_t end = result.rfind(')'); + + if (start == std::string::npos || end == std::string::npos || end <= start) + return ""; + + std::string params = result.substr(start + 1, end - start - 1); + + params = ReplaceAll(params, "struct ", ""); + params = ReplaceAll(params, "class ", ""); + params = ReplaceAll(params, "enum ", ""); + params = ReplaceAll(params, "const ", ""); + params = ReplaceAll(params, " ", ""); + params = ReplaceAll(params, "__ptr64", ""); + + if (params == "void") + params.clear(); + + return params; } - void PdbReader::LoadDataFromPdb(const std::wstring& path, IDiaDataSource** dia_source, IDiaSession** session, - IDiaSymbol** symbol) - { - const std::string current_dir = Tools::GetCurrentDir(); + std::string ExtractReturnType(const char* name) { + if (!name) + return "void"; - const std::string lib_path = current_dir + "\\msdia140.dll"; - const HMODULE h_module = LoadLibraryA(lib_path.c_str()); - if (h_module == nullptr) - { - throw std::runtime_error("Failed to load msdia140.dll. Error code - " + std::to_string(GetLastError())); - } + std::string result; + + if (name[0] == '?') + result = UndecorateName(name, 0x0); + else + result = name; + + size_t parenPos = result.find('('); + if (parenPos == std::string::npos) + return "void"; + + size_t funcStart = result.rfind(' ', parenPos); + if (funcStart == std::string::npos) + return "void"; + + std::string beforeFunc = result.substr(0, funcStart); + beforeFunc = ReplaceAll(beforeFunc, "__cdecl", ""); + beforeFunc = ReplaceAll(beforeFunc, "__stdcall", ""); + beforeFunc = ReplaceAll(beforeFunc, "__fastcall", ""); + beforeFunc = ReplaceAll(beforeFunc, "__thiscall", ""); + beforeFunc = ReplaceAll(beforeFunc, "__vectorcall", ""); + beforeFunc = ReplaceAll(beforeFunc, "public:", ""); + beforeFunc = ReplaceAll(beforeFunc, "private:", ""); + beforeFunc = ReplaceAll(beforeFunc, "protected:", ""); + beforeFunc = ReplaceAll(beforeFunc, "virtual ", ""); + beforeFunc = ReplaceAll(beforeFunc, "static ", ""); + beforeFunc = ReplaceAll(beforeFunc, "struct ", ""); + beforeFunc = ReplaceAll(beforeFunc, "class ", ""); + beforeFunc = ReplaceAll(beforeFunc, "enum ", ""); + beforeFunc = ReplaceAll(beforeFunc, "__ptr64", ""); + + size_t start = beforeFunc.find_first_not_of(" \t"); + size_t end = beforeFunc.find_last_not_of(" \t"); + if (start == std::string::npos) + return "void"; + + std::string returnType = beforeFunc.substr(start, end - start + 1); + + if (returnType.empty() || returnType.find_first_not_of(" \t") == std::string::npos) + return "void"; + + returnType = ReplaceAll(returnType, " ", ""); + + return returnType; + } - const auto dll_get_class_object = reinterpret_cast(GetProcAddress( - h_module, "DllGetClassObject")); - if (dll_get_class_object == nullptr) - { - throw std::runtime_error("Can't find DllGetClassObject. Error code - " + std::to_string(GetLastError())); + std::string ExtractFunctionName(const char* name) { + if (!name) + return ""; + + std::string result; + + if (name[0] == '?') + result = UndecorateName(name, 0x1000); + else { + result = name; + size_t parenPos = result.find('('); + if (parenPos != std::string::npos) { + result = result.substr(0, parenPos); + } } - IClassFactory* class_factory; - HRESULT hr = dll_get_class_object(__uuidof(DiaSource), IID_IClassFactory, &class_factory); - if (FAILED(hr)) - { - throw std::runtime_error("DllGetClassObject has failed. Error code - " + std::to_string(GetLastError())); - } + return result; + } - hr = class_factory->CreateInstance(nullptr, __uuidof(IDiaDataSource), reinterpret_cast(dia_source)); - if (FAILED(hr)) - { - class_factory->Release(); - throw std::runtime_error("CreateInstance has failed. Error code - " + std::to_string(GetLastError())); - } + void PdbReader::CollectFunctionParamNames(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream) { + const PDB::ModuleInfoStream moduleInfoStream = dbiStream.CreateModuleInfoStream(rawFile); + const PDB::ArrayView modules = moduleInfoStream.GetModules(); - hr = (*dia_source)->loadDataFromPdb(path.c_str()); - if (FAILED(hr)) - { - class_factory->Release(); - throw std::runtime_error("loadDataFromPdb has failed. HRESULT - " + std::to_string(hr)); - } + for (const PDB::ModuleInfoStream::Module& module : modules) { + if (!module.HasSymbolStream()) + continue; - // Open a session for querying symbols + const PDB::ModuleSymbolStream moduleSymbolStream = module.CreateSymbolStream(rawFile); + + uint32_t currentFuncOffset = 0; + std::string currentFuncKey; + std::vector currentParams; + bool inFunction = false; + bool hasThisPointer = false; + + moduleSymbolStream.ForEachSymbol([&](const PDB::CodeView::DBI::Record* record) { + switch (record->header.kind) { + case PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32: + case PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32: + case PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32_ID: + case PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32_ID: { + if (inFunction && currentFuncOffset != 0) { + std::lock_guard lock(param_names_mutex_); + if (!currentParams.empty()) { + std::string paramNamesStr; + for (size_t i = 0; i < currentParams.size(); i++) + { + if (i > 0) paramNamesStr += ","; + paramNamesStr += currentParams[i]; + } + param_names_map_[currentFuncOffset] = paramNamesStr; + } + } + if (inFunction && !currentFuncKey.empty()) { + std::lock_guard lock(func_has_this_mutex_); + func_has_this_map_[currentFuncKey] = hasThisPointer; + } + + currentFuncOffset = record->data.S_GPROC32.offset; + currentParams.clear(); + inFunction = true; + hasThisPointer = false; + + const char* name = record->data.S_GPROC32.name; + if (name) { + std::string funcName = ExtractFunctionName(name); + if (funcName.find("::") != std::string::npos) + currentFuncKey = ReplaceAll(funcName, "::", "."); + else + currentFuncKey = "Global." + funcName; + } + else + currentFuncKey.clear(); + + break; + } + + case PDB::CodeView::DBI::SymbolRecordKind::S_REGREL32: { + if (inFunction) { + const char* paramName = record->data.S_REGREL32.name; + if (paramName && paramName[0] != '\0') { + std::string nameStr = paramName; + + if (nameStr == "this" || nameStr == "_this") + hasThisPointer = true; + else + currentParams.push_back(paramName); + } + } + + break; + } + + case PDB::CodeView::DBI::SymbolRecordKind::S_BPREL32: { + if (inFunction) { + const char* paramName = record->data.S_BPRELSYM32.name; + if (paramName && paramName[0] != '\0') { + std::string nameStr = paramName; + + if (nameStr == "this" || nameStr == "_this") + { + hasThisPointer = true; + } + else + { + currentParams.push_back(paramName); + } + } + } + break; + } + + case PDB::CodeView::DBI::SymbolRecordKind::S_END: + case PDB::CodeView::DBI::SymbolRecordKind::S_PROC_ID_END: { + if (inFunction && currentFuncOffset != 0) { + std::lock_guard lock(param_names_mutex_); + if (!currentParams.empty()) { + std::string paramNamesStr; + for (size_t i = 0; i < currentParams.size(); i++) { + if (i > 0) paramNamesStr += ","; + paramNamesStr += currentParams[i]; + } + param_names_map_[currentFuncOffset] = paramNamesStr; + } + } + if (inFunction && !currentFuncKey.empty()) { + std::lock_guard lock(func_has_this_mutex_); + func_has_this_map_[currentFuncKey] = hasThisPointer; + } + + inFunction = false; + currentFuncOffset = 0; + currentFuncKey.clear(); + currentParams.clear(); + hasThisPointer = false; + break; + } + + default: + break; + } + }); + + if (inFunction && currentFuncOffset != 0) { + std::lock_guard lock(param_names_mutex_); + if (!currentParams.empty()) { + std::string paramNamesStr; + for (size_t i = 0; i < currentParams.size(); i++) { + if (i > 0) paramNamesStr += ","; + paramNamesStr += currentParams[i]; + } + param_names_map_[currentFuncOffset] = paramNamesStr; + } + } - hr = (*dia_source)->openSession(session); - if (FAILED(hr)) - { - class_factory->Release(); - throw std::runtime_error("openSession has failed. HRESULT - " + std::to_string(hr)); + if (inFunction && !currentFuncKey.empty()) { + std::lock_guard lock(func_has_this_mutex_); + func_has_this_map_[currentFuncKey] = hasThisPointer; + } } + } - // Retrieve a reference to the global scope - - hr = (*session)->get_globalScope(symbol); - if (hr != S_OK) - { - class_factory->Release(); - throw std::runtime_error("get_globalScope has failed. HRESULT - " + std::to_string(hr)); + bool PdbReader::FunctionHasThisPointer(const std::string& funcName) const { + std::string baseName = funcName; + size_t parenPos = funcName.find('('); + if (parenPos != std::string::npos) { + baseName = funcName.substr(0, parenPos); + } + + auto it = func_has_this_map_.find(baseName); + if (it != func_has_this_map_.end()) { + return it->second; } - class_factory->Release(); + return true; } - void PdbReader::DumpStructs(IDiaSymbol* g_symbol) - { - IDiaSymbol* symbol = nullptr; + void PdbReader::ProcessModuleFunctions(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream) { + const PDB::ModuleInfoStream moduleInfoStream = dbiStream.CreateModuleInfoStream(rawFile); + const PDB::ArrayView modules = moduleInfoStream.GetModules(); - CComPtr enum_symbols; - if (FAILED(g_symbol->findChildren(SymTagUDT, nullptr, nsNone, &enum_symbols))) - throw std::runtime_error("Failed to find symbols"); + for (const PDB::ModuleInfoStream::Module& module : modules) { + if (!module.HasSymbolStream()) + continue; - ULONG celt = 0; - while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && celt == 1) - { - CComPtr sym(symbol); + const PDB::ModuleSymbolStream moduleSymbolStream = module.CreateSymbolStream(rawFile); - const uint32_t sym_id = GetSymbolId(symbol); - if (visited_.find(sym_id) != visited_.end()) - return; + moduleSymbolStream.ForEachSymbol([&](const PDB::CodeView::DBI::Record* record) { + if (record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32 && record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32 && record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32_ID && record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32_ID) return; - visited_.insert(sym_id); + const char* name = record->data.S_GPROC32.name; + uint32_t offset = record->data.S_GPROC32.offset; - std::string str_name = GetSymbolNameString(sym); + if (!name || offset == 0) + return; - if (FilterSymbols(str_name)) - continue; + std::string funcName = ExtractFunctionName(name); + + if (FilterSymbols(funcName)) + return; + + std::string params = ExtractFunctionParams(name); + + std::string fullName; + if (funcName.find("::") != std::string::npos) + fullName = ReplaceAll(funcName, "::", ".") + "(" + params + ")"; + else + fullName = "Global." + funcName + "(" + params + ")"; - DumpType(sym, str_name, 0); + { + std::lock_guard lock(offsets_mutex_); + if (offsets_dump_->find(fullName) != offsets_dump_->end()) + return; + } + + AddOffset(fullName, static_cast(offset)); + + std::string returnType = ExtractReturnType(name); + std::string signature = funcName.substr(funcName.rfind("::") != std::string::npos ? funcName.rfind("::") + 2 : funcName.rfind('.') != std::string::npos ? funcName.rfind('.') + 1 : 0); + signature += "(" + params + ")"; + + std::string paramNames = GetParamNamesForOffset(offset); + bool isMemberFunction = (funcName.find("::") != std::string::npos); + bool isStatic = isMemberFunction && !FunctionHasThisPointer(fullName); + + AddFunctionInfo(fullName, returnType, signature, params, paramNames, static_cast(offset), isStatic); + }); } } - void PdbReader::DumpFunctions(IDiaSymbol* g_symbol) + std::string PdbReader::GetParamNamesForOffset(uint32_t offset) const { + auto it = param_names_map_.find(offset); + if (it != param_names_map_.end()) + { + return it->second; + } + return ""; + } + + static size_t GetLeafSize(PDB::CodeView::TPI::TypeRecordKind kind) { + if (kind < PDB::CodeView::TPI::TypeRecordKind::LF_NUMERIC) + return sizeof(uint16_t); + + switch (kind) { + case PDB::CodeView::TPI::TypeRecordKind::LF_CHAR: + return sizeof(PDB::CodeView::TPI::TypeRecordKind) + sizeof(uint8_t); + case PDB::CodeView::TPI::TypeRecordKind::LF_SHORT: + case PDB::CodeView::TPI::TypeRecordKind::LF_USHORT: + return sizeof(PDB::CodeView::TPI::TypeRecordKind) + sizeof(uint16_t); + case PDB::CodeView::TPI::TypeRecordKind::LF_LONG: + case PDB::CodeView::TPI::TypeRecordKind::LF_ULONG: + return sizeof(PDB::CodeView::TPI::TypeRecordKind) + sizeof(uint32_t); + case PDB::CodeView::TPI::TypeRecordKind::LF_QUADWORD: + case PDB::CodeView::TPI::TypeRecordKind::LF_UQUADWORD: + return sizeof(PDB::CodeView::TPI::TypeRecordKind) + sizeof(uint64_t); + default: + return 0; + } + } + + static uint64_t GetLeafValue(const char* data, PDB::CodeView::TPI::TypeRecordKind kind) { - IDiaSymbol* symbol; + if (kind < PDB::CodeView::TPI::TypeRecordKind::LF_NUMERIC) + return *reinterpret_cast(data); + + const char* valueData = data + sizeof(PDB::CodeView::TPI::TypeRecordKind); + + switch (kind) { + case PDB::CodeView::TPI::TypeRecordKind::LF_CHAR: + return static_cast(*reinterpret_cast(valueData)); + case PDB::CodeView::TPI::TypeRecordKind::LF_SHORT: + return static_cast(*reinterpret_cast(valueData)); + case PDB::CodeView::TPI::TypeRecordKind::LF_USHORT: + return static_cast(*reinterpret_cast(valueData)); + case PDB::CodeView::TPI::TypeRecordKind::LF_LONG: + return static_cast(*reinterpret_cast(valueData)); + case PDB::CodeView::TPI::TypeRecordKind::LF_ULONG: + return static_cast(*reinterpret_cast(valueData)); + case PDB::CodeView::TPI::TypeRecordKind::LF_QUADWORD: + return static_cast(*reinterpret_cast(valueData)); + case PDB::CodeView::TPI::TypeRecordKind::LF_UQUADWORD: + return *reinterpret_cast(valueData); + default: + return 0; + } + } - CComPtr enum_symbols; - if (FAILED(g_symbol->findChildren(SymTagFunction, nullptr, nsNone, &enum_symbols))) - throw std::runtime_error("Failed to find symbols"); + static const char* GetLeafName(const char* data, PDB::CodeView::TPI::TypeRecordKind kind) { return &data[GetLeafSize(kind)]; } + + std::string PdbReader::GetTypeName(const TypeTable& typeTable, uint32_t typeIndex) const { return GetTypeNameInternal(typeTable, typeIndex, 0); } + + std::string PdbReader::GetTypeNameInternal(const TypeTable& typeTable, uint32_t typeIndex, int depth) const { + if (depth > 50) + return ""; + + if (typeIndex < typeTable.GetFirstTypeIndex()) { + PDB::CodeView::TPI::TypeIndexKind type = (PDB::CodeView::TPI::TypeIndexKind)(typeIndex); + switch (type) { + case PDB::CodeView::TPI::TypeIndexKind::T_VOID: return "void"; + case PDB::CodeView::TPI::TypeIndexKind::T_CHAR: return "char"; + case PDB::CodeView::TPI::TypeIndexKind::T_UCHAR: return "unsigned char"; + case PDB::CodeView::TPI::TypeIndexKind::T_SHORT: return "short"; + case PDB::CodeView::TPI::TypeIndexKind::T_USHORT: return "unsigned short"; + case PDB::CodeView::TPI::TypeIndexKind::T_LONG: return "long"; + case PDB::CodeView::TPI::TypeIndexKind::T_ULONG: return "unsigned long"; + case PDB::CodeView::TPI::TypeIndexKind::T_INT4: return "int"; + case PDB::CodeView::TPI::TypeIndexKind::T_UINT4: return "unsigned int"; + case PDB::CodeView::TPI::TypeIndexKind::T_QUAD: return "__int64"; + case PDB::CodeView::TPI::TypeIndexKind::T_UQUAD: return "unsigned __int64"; + case PDB::CodeView::TPI::TypeIndexKind::T_REAL32: return "float"; + case PDB::CodeView::TPI::TypeIndexKind::T_REAL64: return "double"; + case PDB::CodeView::TPI::TypeIndexKind::T_BOOL08: return "bool"; + case PDB::CodeView::TPI::TypeIndexKind::T_WCHAR: return "wchar_t"; + case PDB::CodeView::TPI::TypeIndexKind::T_32PVOID: + case PDB::CodeView::TPI::TypeIndexKind::T_64PVOID: return "void*"; + default: return ""; + } + } - ULONG celt = 0; - std::stringstream ss; - while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && celt == 1) - { - CComPtr sym(symbol); + const PDB::CodeView::TPI::Record* record = typeTable.GetTypeRecord(typeIndex); + if (!record) + return ""; + + switch (record->header.kind) { + case PDB::CodeView::TPI::TypeRecordKind::LF_CLASS: + case PDB::CodeView::TPI::TypeRecordKind::LF_STRUCTURE: + return GetLeafName(record->data.LF_CLASS.data, record->data.LF_CLASS.lfEasy.kind); + case PDB::CodeView::TPI::TypeRecordKind::LF_UNION: + return GetLeafName(record->data.LF_UNION.data, static_cast(0)); + case PDB::CodeView::TPI::TypeRecordKind::LF_ENUM: + return record->data.LF_ENUM.name; + case PDB::CodeView::TPI::TypeRecordKind::LF_POINTER: + return GetTypeNameInternal(typeTable, record->data.LF_POINTER.utype, depth + 1) + "*"; + case PDB::CodeView::TPI::TypeRecordKind::LF_MODIFIER: + return GetTypeNameInternal(typeTable, record->data.LF_MODIFIER.type, depth + 1); + case PDB::CodeView::TPI::TypeRecordKind::LF_ARRAY: + return GetTypeNameInternal(typeTable, record->data.LF_ARRAY.elemtype, depth + 1) + "[]"; + default: + return ""; + } + } - DWORD sym_tag_type; - if (sym->get_symTag(&sym_tag_type) != S_OK) - continue; + void PdbReader::ProcessFieldList(const PDB::CodeView::TPI::Record* record, const std::string& structName, const TypeTable& typeTable) { + if (!record || record->header.kind != PDB::CodeView::TPI::TypeRecordKind::LF_FIELDLIST) + return; - std::string str_name = GetSymbolNameString(sym); + const uint32_t maxSize = record->header.size - sizeof(uint16_t); + if (maxSize == 0 || maxSize > 1000000) + return; - if (FilterSymbols(str_name)) - continue; + for (size_t i = 0; i < maxSize;) { + const uint8_t* rawData = reinterpret_cast(&record->data.LF_FIELD.list) + i; + if (*rawData >= 0xF0) { + size_t padBytes = (*rawData & 0x0F); i += (padBytes > 0) ? padBytes : 1; + continue; + } - const uint32_t sym_id = GetSymbolId(sym); + if (i + sizeof(PDB::CodeView::TPI::TypeRecordKind) > maxSize) + break; - if (visited_.find(sym_id) != visited_.end()) - continue; + const PDB::CodeView::TPI::FieldList* fieldRecord = reinterpret_cast(reinterpret_cast(&record->data.LF_FIELD.list) + i); - visited_.insert(sym_id); + if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_MEMBER) { + uint64_t offset = GetLeafValue(fieldRecord->data.LF_MEMBER.offset, fieldRecord->data.LF_MEMBER.lfEasy.kind); - if (str_name.empty()) - continue; + const char* memberName = GetLeafName(fieldRecord->data.LF_MEMBER.offset, fieldRecord->data.LF_MEMBER.lfEasy.kind); + + if (!memberName || memberName < reinterpret_cast(record) || memberName >= reinterpret_cast(record) + record->header.size + sizeof(uint16_t)) { + i += 8; + i = (i + 3) & ~3; + continue; + } - DWORD offset; - if (sym->get_addressOffset(&offset) != S_OK) - continue; + size_t nameLen = strnlen(memberName, maxSize - i); + if (nameLen > 0 && nameLen < 1000) { + const std::string fullName = structName + "." + memberName; + + const PDB::CodeView::TPI::Record* memberType = typeTable.GetTypeRecord(fieldRecord->data.LF_MEMBER.index); + if (memberType && memberType->header.kind == PDB::CodeView::TPI::TypeRecordKind::LF_BITFIELD) { + BitField bitField; + bitField.offset = static_cast(offset); + bitField.bit_position = memberType->data.LF_BITFIELD.position; + bitField.num_bits = memberType->data.LF_BITFIELD.length; + + uint32_t underlyingType = memberType->data.LF_BITFIELD.type; + if (underlyingType < typeTable.GetFirstTypeIndex()) { + uint32_t sizeIndicator = (underlyingType >> 4) & 0x7; + switch (sizeIndicator) { + case 0: bitField.length = 1; break; + case 1: bitField.length = 2; break; + case 2: bitField.length = 4; break; + case 3: bitField.length = 8; break; + default: bitField.length = 4; break; + } + } + else { + bitField.length = 4; + } + + AddBitField(fullName, bitField); + } + else { + std::string typeName = GetTypeName(typeTable, fieldRecord->data.LF_MEMBER.index); + bool isPointer = typeName.back() == '*'; + + AddOffset(fullName, static_cast(offset)); + AddFieldInfo(fullName, typeName, static_cast(offset), isPointer); + } + } - ss.clear(); - ss.str(std::string()); + i += static_cast(memberName - reinterpret_cast(fieldRecord)); + i += nameLen + 1; + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_STMEMBER) { + const char* memberName = fieldRecord->data.LF_STMEMBER.name; + if (!memberName) { + i += 8; + i = (i + 3) & ~3; + continue; + } - // Check if it's a member function - if (str_name.find(':') != std::string::npos) - { - ss << ReplaceString(str_name, "::", ".") << "(" << GetFunctionSymbolParams(sym) << ")"; - (*offsets_dump_)[ss.str()] = offset; - //const std::string new_str = ReplaceString(str_name, "::", ".") + "(" + GetFunctionSymbolParams(sym) + ")"; - //(*offsets_dump_)[new_str] = offset; + size_t nameLen = strnlen(memberName, maxSize - i); + i += static_cast(memberName - reinterpret_cast(fieldRecord)); + i += nameLen + 1; + i = (i + 3) & ~3; } - else - { - ss << "Global." << str_name << "(" << GetFunctionSymbolParams(sym) << ")"; - (*offsets_dump_)[ss.str()] = offset; - //(*offsets_dump_)["Global." + str_name + "(" + GetFunctionSymbolParams(sym) + ")"] = offset; + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_BCLASS) { + size_t leafSize = GetLeafSize(fieldRecord->data.LF_BCLASS.lfEasy.kind); + i += sizeof(PDB::CodeView::TPI::TypeRecordKind) + sizeof(PDB::CodeView::TPI::MemberAttributes) + sizeof(uint32_t) + leafSize; + i = (i + 3) & ~3; } - } - } - - void PdbReader::DumpGlobalVariables(IDiaSymbol* g_symbol) - { - IDiaSymbol* symbol; - - CComPtr enum_symbols; - if (FAILED(g_symbol->findChildren(SymTagData, nullptr, nsNone, &enum_symbols))) - throw std::runtime_error("Failed to find symbols"); + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_INDEX) { + const PDB::CodeView::TPI::Record* nextRecord = typeTable.GetTypeRecord(fieldRecord->data.LF_INDEX.type); + if (nextRecord) + ProcessFieldList(nextRecord, structName, typeTable); + + i += sizeof(PDB::CodeView::TPI::FieldList::Data::LF_INDEX); + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_VFUNCTAB) { + i += sizeof(PDB::CodeView::TPI::FieldList::Data::LF_VFUNCTAB); + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_ONEMETHOD) { + auto methodProp = static_cast(fieldRecord->data.LF_ONEMETHOD.attributes.mprop); + const char* methodName = nullptr; + + if (methodProp == PDB::CodeView::TPI::MethodProperty::Intro || methodProp == PDB::CodeView::TPI::MethodProperty::PureIntro) + methodName = &reinterpret_cast(fieldRecord->data.LF_ONEMETHOD.vbaseoff)[sizeof(uint32_t)]; + else + methodName = &reinterpret_cast(fieldRecord->data.LF_ONEMETHOD.vbaseoff)[0]; + + if (!methodName || methodName < reinterpret_cast(record)) { + i += 8; + i = (i + 3) & ~3; + continue; + } - ULONG celt = 0; - while (SUCCEEDED(enum_symbols->Next(1, &symbol, &celt)) && celt == 1) - { - CComPtr sym(symbol); + size_t nameLen = strnlen(methodName, maxSize - i); + i += static_cast(methodName - reinterpret_cast(fieldRecord)); + i += nameLen + 1; + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_METHOD) { + const char* methodName = fieldRecord->data.LF_METHOD.name; + if (!methodName) { + i += 8; + i = (i + 3) & ~3; + continue; + } - const uint32_t sym_id = GetSymbolId(symbol); - if (visited_.find(sym_id) != visited_.end()) - return; + size_t nameLen = strnlen(methodName, maxSize - i); + i += static_cast(methodName - reinterpret_cast(fieldRecord)); + i += nameLen + 1; + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_NESTTYPE) { + const char* nestName = fieldRecord->data.LF_NESTTYPE.name; + if (!nestName) { + i += 8; + i = (i + 3) & ~3; + continue; + } - visited_.insert(sym_id); + size_t nameLen = strnlen(nestName, maxSize - i); + i += static_cast(nestName - reinterpret_cast(fieldRecord)); + i += nameLen + 1; + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_ENUMERATE) { + const char* enumName = GetLeafName(fieldRecord->data.LF_ENUMERATE.value, fieldRecord->data.LF_ENUMERATE.lfEasy.kind); + if (!enumName || enumName < reinterpret_cast(record)) { + i += 8; + i = (i + 3) & ~3; + continue; + } - std::string str_name = GetSymbolNameString(sym); - if (FilterSymbols(str_name)) - continue; + size_t nameLen = strnlen(enumName, maxSize - i); + i += static_cast(enumName - reinterpret_cast(fieldRecord)); + i += nameLen + 1; + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_VBCLASS || fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_IVBCLASS) { + const uint8_t* basePtr = reinterpret_cast(fieldRecord); + const uint8_t* leafPtr = reinterpret_cast(&fieldRecord->data.LF_VBCLASS.vbpOffset); - DWORD sym_tag; - if (sym->get_symTag(&sym_tag) != S_OK) - continue; + auto leaf1Kind = *reinterpret_cast(leafPtr); + size_t leaf1Size = GetLeafSize(leaf1Kind); - DWORD offset; - if (sym->get_addressOffset(&offset) != S_OK) - continue; + const uint8_t* leaf2Ptr = leafPtr + leaf1Size; + auto leaf2Kind = *reinterpret_cast(leaf2Ptr); + size_t leaf2Size = GetLeafSize(leaf2Kind); - (*offsets_dump_)["Global." + str_name] = offset; + i += static_cast(leaf2Ptr + leaf2Size - basePtr); + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_FRIENDCLS) { + i += sizeof(uint16_t) + sizeof(uint32_t); + i = (i + 3) & ~3; + } + else if (fieldRecord->kind == PDB::CodeView::TPI::TypeRecordKind::LF_VFUNCOFF) { + i += sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint32_t); + i = (i + 3) & ~3; + } + else { + // Unknown field type - skip the minimum amount and hope for the best + size_t oldI = i; + i += 4; + i = (i + 3) & ~3; + + if (i <= oldI || i >= maxSize) + break; + } } } - void PdbReader::DumpType(IDiaSymbol* symbol, const std::string& structure, int indent) const - { - CComPtr enum_children; - IDiaSymbol* symbol_child; - DWORD sym_tag; - ULONG celt = 0; + void PdbReader::ProcessStructOrClass(const PDB::CodeView::TPI::Record* record, const TypeTable& typeTable) { + if (!record) + return; - if (indent > 5) + if (record->header.kind != PDB::CodeView::TPI::TypeRecordKind::LF_CLASS && record->header.kind != PDB::CodeView::TPI::TypeRecordKind::LF_STRUCTURE) return; - if (symbol->get_symTag(&sym_tag) != S_OK) + if (record->data.LF_CLASS.property.fwdref) return; - switch (sym_tag) - { - case SymTagData: - DumpData(symbol, structure); - break; - case SymTagEnum: - case SymTagUDT: - if (SUCCEEDED(symbol->findChildren(SymTagNull, nullptr, nsNone, &enum_children))) - { - while (SUCCEEDED(enum_children->Next(1, &symbol_child, &celt)) && celt == 1) - { - CComPtr sym_child(symbol_child); + const char* structName = GetLeafName(record->data.LF_CLASS.data, record->data.LF_CLASS.lfEasy.kind); + if (!structName || FilterSymbols(structName)) + return; - DumpType(sym_child, structure, indent + 2); - } + const PDB::CodeView::TPI::Record* fieldRecord = typeTable.GetTypeRecord(record->data.LF_CLASS.field); + if (fieldRecord) + ProcessFieldList(fieldRecord, structName, typeTable); + } + + void PdbReader::ProcessTypes(const PDB::TPIStream& tpiStream, const TypeTable& typeTable) { + for (const auto* record : typeTable.GetTypeRecords()) { + if (record->header.kind == PDB::CodeView::TPI::TypeRecordKind::LF_CLASS || record->header.kind == PDB::CodeView::TPI::TypeRecordKind::LF_STRUCTURE) { + ProcessStructOrClass(record, typeTable); } - break; - default: - break; } } - void PdbReader::DumpData(IDiaSymbol* symbol, const std::string& structure) const + std::string PdbReader::GetFunctionParams(uint32_t typeIndex, const TypeTable& typeTable) { - DWORD loc_type; - if (symbol->get_locationType(&loc_type) != S_OK) - return; - - if (loc_type != LocIsThisRel && loc_type != LocIsBitField) - return; + const PDB::CodeView::TPI::Record* record = typeTable.GetTypeRecord(typeIndex); + if (!record) + return ""; - CComPtr type; - if (symbol->get_type(&type) != S_OK) - return; + return ""; + } - if (type == nullptr) - return; + void PdbReader::ProcessFunctions(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream, const PDB::ImageSectionStream& imageSectionStream) { + // Use public symbol stream which contains mangled names with full signatures + const PDB::PublicSymbolStream publicSymbolStream = dbiStream.CreatePublicSymbolStream(rawFile); + const PDB::CoalescedMSFStream symbolRecordStream = dbiStream.CreateSymbolRecordStream(rawFile); + const PDB::ArrayView hashRecords = publicSymbolStream.GetRecords(); - LONG offset; - if (symbol->get_offset(&offset) != S_OK) - return; + for (const PDB::HashRecord& hashRecord : hashRecords) { + const PDB::CodeView::DBI::Record* record = publicSymbolStream.GetRecord(symbolRecordStream, hashRecord); + + if (record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_PUB32) + continue; - std::string str_name = GetSymbolNameString(symbol); - if (str_name.empty()) - return; + if (record->data.S_PUB32.flags != PDB::CodeView::DBI::PublicSymbolFlags::Function) + continue; - if (loc_type == LocIsBitField) - { - DWORD bit_position; - if (symbol->get_bitPosition(&bit_position) != S_OK) - return; + const char* name = record->data.S_PUB32.name; + // Use raw offset within section, NOT RVA + // This matches DIA SDK's get_addressOffset behavior + uint32_t offset = record->data.S_PUB32.offset; - ULONGLONG num_bits; - if (symbol->get_length(&num_bits) != S_OK) - return; + if (!name || offset == 0) + continue; - ULONGLONG length; - if (type->get_length(&length) != S_OK) - return; + std::string funcName = ExtractFunctionName(name); + + if (FilterSymbols(funcName)) + continue; - const BitField bit_field{static_cast(offset), bit_position, num_bits, length}; + std::string params = ExtractFunctionParams(name); - (*bitfields_dump_)[structure + "." + str_name] = bit_field; - } - else if (loc_type == LocIsThisRel) - { - (*offsets_dump_)[structure + "." + str_name] = offset; + std::string fullName; + if (funcName.find("::") != std::string::npos) + fullName = ReplaceAll(funcName, "::", ".") + "(" + params + ")"; + else + fullName = "Global." + funcName + "(" + params + ")"; + + AddOffset(fullName, static_cast(offset)); + + std::string returnType = ExtractReturnType(name); + std::string signature = funcName.substr(funcName.rfind("::") != std::string::npos ? funcName.rfind("::") + 2 : funcName.rfind('.') != std::string::npos ? funcName.rfind('.') + 1 : 0); + signature += "(" + params + ")"; + + bool isMemberFunction = (funcName.find("::") != std::string::npos); + bool isStatic = isMemberFunction && !FunctionHasThisPointer(fullName); + + std::string paramNames = GetParamNamesForOffset(offset); + + AddFunctionInfo(fullName, returnType, signature, params, paramNames, static_cast(offset), isStatic); } } - bool PdbReader::FilterSymbols(const std::string input) - { - if (input.empty()) - return true; + void PdbReader::ProcessGlobalVariables(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream, const PDB::ImageSectionStream& imageSectionStream, const TypeTable& typeTable) { + const PDB::GlobalSymbolStream globalSymbolStream = dbiStream.CreateGlobalSymbolStream(rawFile); + const PDB::CoalescedMSFStream symbolRecordStream = dbiStream.CreateSymbolRecordStream(rawFile); - for (const auto& filter : filter_set_) - { - if (input.starts_with(filter) - && !input.starts_with("UE::GC")) - return true; - } + const PDB::ArrayView hashRecords = globalSymbolStream.GetRecords(); - if (input.find('`') != std::string::npos) - return true; + for (const PDB::HashRecord& hashRecord : hashRecords) { + const PDB::CodeView::DBI::Record* record = globalSymbolStream.GetRecord(symbolRecordStream, hashRecord); + + const char* name = nullptr; + uint32_t offset = 0; + uint32_t typeIndex = 0; - return false; + if (record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_GDATA32) { + name = record->data.S_GDATA32.name; + // Use raw section offset, not RVA like raw_pdb + offset = record->data.S_GDATA32.offset; + typeIndex = record->data.S_GDATA32.typeIndex; + } + else if (record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_LDATA32) { + name = record->data.S_LDATA32.name; + // Use raw section offset, not RVA like raw_pdb + offset = record->data.S_LDATA32.offset; + typeIndex = record->data.S_LDATA32.typeIndex; + } + + if (name && offset != 0 && !FilterSymbols(name)) { + std::string globalKey = "Global." + std::string(name); + AddOffset(globalKey, static_cast(offset)); + + // Get type name and add field info + if (typeIndex != 0) { + std::string typeName = GetTypeName(typeTable, typeIndex); + if (!typeName.empty()) { + bool isPointer = (typeName.back() == '*'); + AddFieldInfo(globalKey, typeName, static_cast(offset), isPointer); + } + } + } + } } - std::string PdbReader::GetSymbolNameString(IDiaSymbol* symbol) - { - BSTR str = nullptr; + void PdbReader::Read(const std::wstring& path, std::unordered_map* offsets_dump, std::unordered_map* bitfields_dump, const std::unordered_set filter_set, std::unordered_map* fields_dump, std::unordered_map* functions_dump) { + offsets_dump_ = offsets_dump; + bitfields_dump_ = bitfields_dump; + fields_dump_ = fields_dump; + functions_dump_ = functions_dump; + filter_set_ = filter_set; - std::string name; + offsets_dump_->reserve(550000); + bitfields_dump_->reserve(11000); + if (fields_dump_) fields_dump_->reserve(300000); + if (functions_dump_) functions_dump_->reserve(250000); - HRESULT hr = symbol->get_name(&str); - if (hr != S_OK) - return name; + std::ifstream f{path}; + if (!f.good()) + throw std::runtime_error("Failed to open pdb file"); + f.close(); - if (str != nullptr) - { - name = Tools::Utf8Encode(str); + MemoryMappedFile pdbFile = MemoryMappedFile::Open(path); + if (!pdbFile.baseAddress) { + Log::GetLog()->error("Cannot memory-map PDB file"); + throw std::runtime_error("Cannot memory-map PDB file"); } - SysFreeString(str); - - return name; - } + PDB::ErrorCode errorCode = PDB::ValidateFile(pdbFile.baseAddress, pdbFile.length); + if (errorCode != PDB::ErrorCode::Success) { + MemoryMappedFile::Close(pdbFile); + Log::GetLog()->error("Invalid PDB file"); + throw std::runtime_error("Invalid PDB file"); + } - uint32_t PdbReader::GetSymbolId(IDiaSymbol* symbol) - { - DWORD id; - symbol->get_symIndexId(&id); + const PDB::RawFile rawPdbFile = PDB::CreateRawFile(pdbFile.baseAddress); - return id; - } + errorCode = PDB::HasValidDBIStream(rawPdbFile); + if (errorCode != PDB::ErrorCode::Success) { + MemoryMappedFile::Close(pdbFile); + Log::GetLog()->error("Invalid DBI stream"); + throw std::runtime_error("Invalid DBI stream"); + } - void PdbReader::Cleanup(IDiaSymbol* symbol, IDiaSession* session, IDiaDataSource* source) - { - if (symbol != nullptr) - symbol->Release(); - if (session != nullptr) - session->Release(); - if (source != nullptr) - source->Release(); - - CoUninitialize(); - } + const PDB::InfoStream infoStream(rawPdbFile); + if (infoStream.UsesDebugFastLink()) { + MemoryMappedFile::Close(pdbFile); + Log::GetLog()->error("PDB was linked using unsupported option /DEBUG:FASTLINK"); + throw std::runtime_error("PDB uses /DEBUG:FASTLINK"); + } - std::string PdbReader::GetFunctionSymbolParams(IDiaSymbol* pFunction) - { - std::string parameterTypes; - BSTR undecorated = nullptr; - if (SUCCEEDED(pFunction->get_undecoratedNameEx(0x20000, &undecorated)) // 0x20000 - Don't include __ptr64 in output (just on the func sig, but the params can still output it) - && undecorated != nullptr) - { - parameterTypes = Tools::Utf8Encode(undecorated); - const size_t start = parameterTypes.find('('); - const size_t end = parameterTypes.find(')'); - if (start != std::string::npos && end != std::string::npos) - { - parameterTypes = parameterTypes.substr(start + 1, end - start - 1); - parameterTypes = ReplaceString(parameterTypes, "struct ", ""); - parameterTypes = ReplaceString(parameterTypes, "class ", ""); - parameterTypes = ReplaceString(parameterTypes, "enum ", ""); - //parameterTypes = ReplaceString(parameterTypes, "& __ptr64", "*"); // pointers - parameterTypes = ReplaceString(parameterTypes, "const ", ""); - parameterTypes = ReplaceString(parameterTypes, " ", ""); - parameterTypes = ReplaceString(parameterTypes, "__ptr64", ""); - if (parameterTypes == "void") - parameterTypes.clear(); - } + const PDB::DBIStream dbiStream = PDB::CreateDBIStream(rawPdbFile); + + errorCode = PDB::HasValidTPIStream(rawPdbFile); + if (errorCode != PDB::ErrorCode::Success) { + MemoryMappedFile::Close(pdbFile); + Log::GetLog()->error("Invalid TPI stream"); + throw std::runtime_error("Invalid TPI stream"); } - return parameterTypes; - } + const PDB::TPIStream tpiStream = PDB::CreateTPIStream(rawPdbFile); + const PDB::ImageSectionStream imageSectionStream = dbiStream.CreateImageSectionStream(rawPdbFile); + + Log::GetLog()->info("Creating type table..."); + const TypeTable typeTable(tpiStream); + + Log::GetLog()->info("Collecting function parameter names..."); + CollectFunctionParamNames(rawPdbFile, dbiStream); // Do not put in task, must execute first + Log::GetLog()->info("Processing structures..."); + auto typesTask = std::async(std::launch::async, [this, &tpiStream, &typeTable]() { + ProcessTypes(tpiStream, typeTable); + }); + Log::GetLog()->info("Processing functions..."); + auto functionsTask = std::async(std::launch::async, [this, &rawPdbFile, &dbiStream, &imageSectionStream]() { + ProcessFunctions(rawPdbFile, dbiStream, imageSectionStream); + }); + + Log::GetLog()->info("Processing global variables..."); + auto globalsTask = std::async(std::launch::async, [this, &rawPdbFile, &dbiStream, &imageSectionStream, &typeTable]() { + ProcessGlobalVariables(rawPdbFile, dbiStream, imageSectionStream, typeTable); + }); + + // Wait for all tasks to complete + typesTask.wait(); + functionsTask.wait(); + globalsTask.wait(); + + // Cleanup + MemoryMappedFile::Close(pdbFile); + + Log::GetLog()->info("Successfully read information from PDB\n"); + } } // namespace API diff --git a/AsaApi/Core/Private/PDBReader/PDBReader.h b/AsaApi/Core/Private/PDBReader/PDBReader.h index 8e4f383..51e2c43 100644 --- a/AsaApi/Core/Private/PDBReader/PDBReader.h +++ b/AsaApi/Core/Private/PDBReader/PDBReader.h @@ -1,14 +1,61 @@ #pragma once -#include #include +#include +#include +#include +#include +#include #include "json.hpp" - #include +// Forward declarations for raw_pdb +namespace PDB +{ + class RawFile; + class TPIStream; + class DBIStream; + class ModuleInfoStream; + class ImageSectionStream; + + namespace CodeView + { + namespace TPI + { + struct Record; + enum class TypeRecordKind : uint16_t; + } + namespace DBI + { + struct Record; + } + } +} + namespace API { + // Structure to hold field type information + struct FieldInfo + { + std::string type; // The type name (e.g., "FString", "TArray") + intptr_t offset; // Offset within the class + bool isPointer; // Whether the type is a pointer + }; + + // Structure to hold function signature information + struct FunctionInfo + { + std::string returnType; // Return type + std::string signature; // Full signature with params (e.g., "FuncName(int,float)") + std::string params; // Just the parameter types (comma-separated) + std::string paramNames; // Just the parameter names (comma-separated, e.g., "_this,ForPC,bForced") + intptr_t offset; // Function offset + bool isStatic; // Whether function is static + }; + + class TypeTable; + class PdbReader { public: @@ -16,28 +63,61 @@ namespace API ~PdbReader() = default; void Read(const std::wstring& path, std::unordered_map* offsets_dump, - std::unordered_map* bitfields_dump, const std::unordered_set filter_set); + std::unordered_map* bitfields_dump, + const std::unordered_set filter_set, + std::unordered_map* fields_dump = nullptr, + std::unordered_map* functions_dump = nullptr); private: - static void LoadDataFromPdb(const std::wstring& /*path*/, IDiaDataSource** /*dia_source*/, IDiaSession** - /*session*/, IDiaSymbol** /*symbol*/); + // Main processing methods + void ProcessTypes(const PDB::TPIStream& tpiStream, const TypeTable& typeTable); + void ProcessFunctions(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream, const PDB::ImageSectionStream& imageSectionStream); + void ProcessGlobalVariables(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream, const PDB::ImageSectionStream& imageSectionStream, const TypeTable& typeTable); - void DumpStructs(IDiaSymbol* /*g_symbol*/); - void DumpFunctions(IDiaSymbol* /*g_symbol*/); - void DumpGlobalVariables(IDiaSymbol* /*g_symbol*/); - void DumpType(IDiaSymbol* /*symbol*/, const std::string& /*structure*/, int /*indent*/) const; - void DumpData(IDiaSymbol* /*symbol*/, const std::string& /*structure*/) const; + // Type processing helpers + void ProcessStructOrClass(const PDB::CodeView::TPI::Record* record, const TypeTable& typeTable); + void ProcessFieldList(const PDB::CodeView::TPI::Record* record, const std::string& structName, const TypeTable& typeTable); - bool FilterSymbols(const std::string input); - static std::string GetSymbolNameString(IDiaSymbol* /*symbol*/); - static uint32_t GetSymbolId(IDiaSymbol* /*symbol*/); - static void Cleanup(IDiaSymbol* /*symbol*/, IDiaSession* /*session*/, IDiaDataSource* /*source*/); - static std::string GetFunctionSymbolParams(IDiaSymbol* /*symbol*/); + std::string GetFunctionParams(uint32_t typeIndex, const TypeTable& typeTable); + // Utility methods + bool FilterSymbols(const std::string& name) const; + std::string GetTypeName(const TypeTable& typeTable, uint32_t typeIndex) const; + std::string GetTypeNameInternal(const TypeTable& typeTable, uint32_t typeIndex, int depth) const; + + // Thread-safe data access + void AddOffset(const std::string& key, intptr_t value); + void AddBitField(const std::string& key, const BitField& value); + void AddFieldInfo(const std::string& key, const std::string& typeName, intptr_t offset, bool isPointer); + void AddFunctionInfo(const std::string& key, const std::string& returnType, const std::string& signature, const std::string& params, const std::string& paramNames, intptr_t offset, bool isStatic); + bool MarkVisited(uint32_t id); + + // Parameter names extraction and module function processing + void CollectFunctionParamNames(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream); + void ProcessModuleFunctions(const PDB::RawFile& rawFile, const PDB::DBIStream& dbiStream); + std::string GetParamNamesForOffset(uint32_t offset) const; + bool FunctionHasThisPointer(const std::string& funcName) const; + + // Data members std::unordered_map* offsets_dump_{nullptr}; std::unordered_map* bitfields_dump_{nullptr}; - + std::unordered_map* fields_dump_{nullptr}; + std::unordered_map* functions_dump_{nullptr}; std::unordered_set filter_set_; + + // Map from function offset to comma-separated parameter names + std::unordered_map param_names_map_; + // Map from function name key to whether it has 'this' pointer (non-static member function) + std::unordered_map func_has_this_map_; + std::mutex param_names_mutex_; + std::mutex func_has_this_mutex_; + + // Thread synchronization + std::mutex offsets_mutex_; + std::mutex bitfields_mutex_; + std::mutex fields_mutex_; + std::mutex functions_mutex_; + std::mutex visited_mutex_; std::unordered_set visited_; }; } // namespace API diff --git a/Includes/raw_pdb/LICENSE b/Includes/raw_pdb/LICENSE new file mode 100644 index 0000000..d3fe23f --- /dev/null +++ b/Includes/raw_pdb/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright 2011-2022, Molecular Matters GmbH +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Includes/raw_pdb/src/Foundation/PDB_ArrayView.h b/Includes/raw_pdb/src/Foundation/PDB_ArrayView.h new file mode 100644 index 0000000..3c462ee --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_ArrayView.h @@ -0,0 +1,68 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Macros.h" +#include "PDB_Assert.h" + + +namespace PDB +{ + // A read-only view into arrays of any type and length. + template + class PDB_NO_DISCARD ArrayView + { + public: + // Constructs an array view from a C array with explicit length. + inline constexpr explicit ArrayView(const T* const array, size_t length) PDB_NO_EXCEPT + : m_data(array) + , m_length(length) + { + } + + PDB_DEFAULT_COPY_CONSTRUCTOR(ArrayView); + PDB_DEFAULT_MOVE_CONSTRUCTOR(ArrayView); + + // Provides read-only access to the underlying array. + PDB_NO_DISCARD inline constexpr const T* Decay(void) const PDB_NO_EXCEPT + { + return m_data; + } + + // Returns the length of the view. + PDB_NO_DISCARD inline constexpr size_t GetLength(void) const PDB_NO_EXCEPT + { + return m_length; + } + + // Returns the i-th element. + PDB_NO_DISCARD inline const T& operator[](size_t i) const PDB_NO_EXCEPT + { + PDB_ASSERT(i < GetLength(), "Index %zu out of bounds [0, %zu).", i, GetLength()); + return m_data[i]; + } + + + // ------------------------------------------------------------------------------------------------ + // Range-based for-loop support + // ------------------------------------------------------------------------------------------------ + + PDB_NO_DISCARD inline const T* begin(void) const PDB_NO_EXCEPT + { + return m_data; + } + + PDB_NO_DISCARD inline const T* end(void) const PDB_NO_EXCEPT + { + return m_data + m_length; + } + + private: + const T* const m_data; + const size_t m_length; + + PDB_DISABLE_MOVE_ASSIGNMENT(ArrayView); + PDB_DISABLE_COPY_ASSIGNMENT(ArrayView); + }; +} diff --git a/Includes/raw_pdb/src/Foundation/PDB_Assert.h b/Includes/raw_pdb/src/Foundation/PDB_Assert.h new file mode 100644 index 0000000..6991e06 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Assert.h @@ -0,0 +1,27 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Macros.h" +#include "PDB_Log.h" + + +PDB_PUSH_WARNING_CLANG +PDB_DISABLE_WARNING_CLANG("-Wgnu-zero-variadic-macro-arguments") +PDB_DISABLE_WARNING_CLANG("-Wreserved-identifier") + +extern "C" void __cdecl __debugbreak(void); + +#if PDB_COMPILER_MSVC +# pragma intrinsic(__debugbreak) +#endif + + +#ifdef _DEBUG +# define PDB_ASSERT(_condition, _msg, ...) (_condition) ? (void)true : (PDB_LOG_ERROR(_msg, ##__VA_ARGS__), __debugbreak()) +#else +# define PDB_ASSERT(_condition, _msg, ...) PDB_NOOP(_condition, _msg, ##__VA_ARGS__) +#endif + +PDB_POP_WARNING_CLANG diff --git a/Includes/raw_pdb/src/Foundation/PDB_BitOperators.h b/Includes/raw_pdb/src/Foundation/PDB_BitOperators.h new file mode 100644 index 0000000..04f17a4 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_BitOperators.h @@ -0,0 +1,23 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Macros.h" + + +#define PDB_DEFINE_BIT_OPERATORS(_type) \ + PDB_NO_DISCARD inline constexpr _type operator|(_type lhs, _type rhs) PDB_NO_EXCEPT \ + { \ + return static_cast<_type>(PDB_AS_UNDERLYING(lhs) | PDB_AS_UNDERLYING(rhs)); \ + } \ + \ + PDB_NO_DISCARD inline constexpr _type operator&(_type lhs, _type rhs) PDB_NO_EXCEPT \ + { \ + return static_cast<_type>(PDB_AS_UNDERLYING(lhs) & PDB_AS_UNDERLYING(rhs)); \ + } \ + \ + PDB_NO_DISCARD inline constexpr _type operator~(_type value) PDB_NO_EXCEPT \ + { \ + return static_cast<_type>(~PDB_AS_UNDERLYING(value)); \ + } diff --git a/Includes/raw_pdb/src/Foundation/PDB_BitUtil.h b/Includes/raw_pdb/src/Foundation/PDB_BitUtil.h new file mode 100644 index 0000000..7dc5ee3 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_BitUtil.h @@ -0,0 +1,73 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Assert.h" + +#ifdef _WIN32 + PDB_PUSH_WARNING_CLANG + PDB_DISABLE_WARNING_CLANG("-Wreserved-identifier") + + extern "C" unsigned char _BitScanForward(unsigned long* _Index, unsigned long _Mask); + + PDB_POP_WARNING_CLANG + +# if PDB_COMPILER_MSVC +# pragma intrinsic(_BitScanForward) +# endif +#endif + + +namespace PDB +{ + namespace BitUtil + { + // Returns whether the given unsigned value is a power of two. + template + PDB_NO_DISCARD inline constexpr bool IsPowerOfTwo(T value) PDB_NO_EXCEPT + { + PDB_ASSERT(value != 0u, "Invalid value."); + + return (value & (value - 1u)) == 0u; + } + + + // Rounds the given unsigned value up to the next multiple. + template + PDB_NO_DISCARD inline constexpr T RoundUpToMultiple(T numToRound, T multipleOf) PDB_NO_EXCEPT + { + PDB_ASSERT(IsPowerOfTwo(multipleOf), "Multiple must be a power-of-two."); + + return (numToRound + (multipleOf - 1u)) & ~(multipleOf - 1u); + } + + + // Finds the position of the first set bit in the given value starting from the LSB, e.g. FindFirstSetBit(0b00000010) == 1. + // This operation is also known as CTZ (Count Trailing Zeros). + template + PDB_NO_DISCARD inline uint32_t FindFirstSetBit(T value) PDB_NO_EXCEPT; + + template <> + PDB_NO_DISCARD inline uint32_t FindFirstSetBit(uint32_t value) PDB_NO_EXCEPT + { + PDB_ASSERT(value != 0u, "Invalid value."); + +#ifdef _WIN32 + unsigned long result = 0ul; + + _BitScanForward(&result, value); +#else + unsigned int result = 0u; + + result = static_cast(__builtin_ffs(static_cast(value))); + if (result) + { + --result; + } +#endif + + return result; + } + } +} diff --git a/Includes/raw_pdb/src/Foundation/PDB_CRT.h b/Includes/raw_pdb/src/Foundation/PDB_CRT.h new file mode 100644 index 0000000..539dab3 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_CRT.h @@ -0,0 +1,14 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + + +// avoid pulling in different headers just for a few declarations +extern "C" int __cdecl printf(char const* const _Format, ...); + +extern "C" int __cdecl memcmp(void const* _Buf1, void const* _Buf2, size_t _Size); +extern "C" void* __cdecl memcpy(void* _Dst, void const* _Src, size_t _Size); + +extern "C" size_t __cdecl strlen(char const* _Str); +extern "C" int __cdecl strcmp(char const* _Str1, char const* _Str2); diff --git a/Includes/raw_pdb/src/Foundation/PDB_Forward.h b/Includes/raw_pdb/src/Foundation/PDB_Forward.h new file mode 100644 index 0000000..ba82dfe --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Forward.h @@ -0,0 +1,9 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + + +// See Jonathan Mueller's blog for replacing std::move and std::forward: +// https://foonathan.net/2021/09/move-forward/ +#define PDB_FORWARD(...) static_cast(__VA_ARGS__) diff --git a/Includes/raw_pdb/src/Foundation/PDB_Log.h b/Includes/raw_pdb/src/Foundation/PDB_Log.h new file mode 100644 index 0000000..83a8518 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Log.h @@ -0,0 +1,15 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Macros.h" +#include "PDB_CRT.h" + + +PDB_PUSH_WARNING_CLANG +PDB_DISABLE_WARNING_CLANG("-Wgnu-zero-variadic-macro-arguments") + +#define PDB_LOG_ERROR(_format, ...) printf(_format, ##__VA_ARGS__) + +PDB_POP_WARNING_CLANG diff --git a/Includes/raw_pdb/src/Foundation/PDB_Macros.h b/Includes/raw_pdb/src/Foundation/PDB_Macros.h new file mode 100644 index 0000000..fddcccf --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Macros.h @@ -0,0 +1,126 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Platform.h" +#include "PDB_TypeTraits.h" + + +// ------------------------------------------------------------------------------------------------ +// ATTRIBUTES +// ------------------------------------------------------------------------------------------------ + +// Indicates to the compiler that the return value of a function or class should not be ignored. +#if PDB_CPP_17 +# define PDB_NO_DISCARD [[nodiscard]] +#else +# define PDB_NO_DISCARD +#endif + +// Indicates to the compiler that a function does not throw an exception. +#define PDB_NO_EXCEPT noexcept + + +// ------------------------------------------------------------------------------------------------ +// SPECIAL MEMBER FUNCTIONS +// ------------------------------------------------------------------------------------------------ + +// Default special member functions. +#define PDB_DEFAULT_COPY_CONSTRUCTOR(_name) _name(const _name&) PDB_NO_EXCEPT = default +#define PDB_DEFAULT_COPY_ASSIGNMENT(_name) _name& operator=(const _name&) PDB_NO_EXCEPT = default +#define PDB_DEFAULT_MOVE_CONSTRUCTOR(_name) _name(_name&&) PDB_NO_EXCEPT = default +#define PDB_DEFAULT_MOVE_ASSIGNMENT(_name) _name& operator=(_name&&) PDB_NO_EXCEPT = default + +// Default copy member functions. +#define PDB_DEFAULT_COPY(_name) PDB_DEFAULT_COPY_CONSTRUCTOR(_name); PDB_DEFAULT_COPY_ASSIGNMENT(_name) + +// Default move member functions. +#define PDB_DEFAULT_MOVE(_name) PDB_DEFAULT_MOVE_CONSTRUCTOR(_name); PDB_DEFAULT_MOVE_ASSIGNMENT(_name) + +// Single macro to default all copy and move member functions. +#define PDB_DEFAULT_COPY_MOVE(_name) PDB_DEFAULT_COPY(_name); PDB_DEFAULT_MOVE(_name) + +// Disable special member functions. +#define PDB_DISABLE_COPY_CONSTRUCTOR(_name) _name(const _name&) PDB_NO_EXCEPT = delete +#define PDB_DISABLE_COPY_ASSIGNMENT(_name) _name& operator=(const _name&) PDB_NO_EXCEPT = delete +#define PDB_DISABLE_MOVE_CONSTRUCTOR(_name) _name(_name&&) PDB_NO_EXCEPT = delete +#define PDB_DISABLE_MOVE_ASSIGNMENT(_name) _name& operator=(_name&&) PDB_NO_EXCEPT = delete + +// Disable copy member functions. +#define PDB_DISABLE_COPY(_name) PDB_DISABLE_COPY_CONSTRUCTOR(_name); PDB_DISABLE_COPY_ASSIGNMENT(_name) + +// Disable move member functions. +#define PDB_DISABLE_MOVE(_name) PDB_DISABLE_MOVE_CONSTRUCTOR(_name); PDB_DISABLE_MOVE_ASSIGNMENT(_name) + +// Single macro to disable all copy and move member functions. +#define PDB_DISABLE_COPY_MOVE(_name) PDB_DISABLE_COPY(_name); PDB_DISABLE_MOVE(_name) + + +// ------------------------------------------------------------------------------------------------ +// COMPILER WARNINGS +// ------------------------------------------------------------------------------------------------ + +#if PDB_COMPILER_MSVC +# define PDB_PRAGMA(_x) __pragma(_x) + +# define PDB_PUSH_WARNING_MSVC PDB_PRAGMA(warning(push)) +# define PDB_SUPPRESS_WARNING_MSVC(_number) PDB_PRAGMA(warning(suppress : _number)) +# define PDB_DISABLE_WARNING_MSVC(_number) PDB_PRAGMA(warning(disable : _number)) +# define PDB_POP_WARNING_MSVC PDB_PRAGMA(warning(pop)) + +# define PDB_PUSH_WARNING_CLANG +# define PDB_DISABLE_WARNING_CLANG(_diagnostic) +# define PDB_POP_WARNING_CLANG +#elif PDB_COMPILER_CLANG +# define PDB_PRAGMA(_x) _Pragma(#_x) + +# define PDB_PUSH_WARNING_MSVC +# define PDB_SUPPRESS_WARNING_MSVC(_number) +# define PDB_DISABLE_WARNING_MSVC(_number) +# define PDB_POP_WARNING_MSVC + +# define PDB_PUSH_WARNING_CLANG PDB_PRAGMA(clang diagnostic push) +# define PDB_DISABLE_WARNING_CLANG(_diagnostic) PDB_PRAGMA(clang diagnostic ignored _diagnostic) +# define PDB_POP_WARNING_CLANG PDB_PRAGMA(clang diagnostic pop) +#elif PDB_COMPILER_GCC +# define PDB_PRAGMA(_x) _Pragma(#_x) + +# define PDB_PUSH_WARNING_MSVC +# define PDB_SUPPRESS_WARNING_MSVC(_number) +# define PDB_DISABLE_WARNING_MSVC(_number) +# define PDB_POP_WARNING_MSVC + +# define PDB_PUSH_WARNING_CLANG +# define PDB_DISABLE_WARNING_CLANG(_diagnostic) +# define PDB_POP_WARNING_CLANG +#endif + + +// ------------------------------------------------------------------------------------------------ +// MISCELLANEOUS +// ------------------------------------------------------------------------------------------------ + +// Trick to make other macros require a semicolon at the end. +#define PDB_REQUIRE_SEMICOLON static_assert(true, "") + +// Defines a C-like flexible array member. +#define PDB_FLEXIBLE_ARRAY_MEMBER(_type, _name) \ + PDB_PUSH_WARNING_MSVC \ + PDB_PUSH_WARNING_CLANG \ + PDB_DISABLE_WARNING_MSVC(4200) \ + PDB_DISABLE_WARNING_CLANG("-Wzero-length-array") \ + _type _name[0]; \ + PDB_POP_WARNING_MSVC \ + PDB_POP_WARNING_CLANG \ + PDB_REQUIRE_SEMICOLON + +// Casts any value to the value of the underlying type. +#define PDB_AS_UNDERLYING(_value) static_cast::type>(_value) + +// Signals to the compiler that a function should be ignored, but have its argument list parsed (and "used", so as to not generate "unused variable" warnings). +#if PDB_COMPILER_MSVC +# define PDB_NOOP __noop +#else +# define PDB_NOOP(...) (void)sizeof(__VA_ARGS__) +#endif diff --git a/Includes/raw_pdb/src/Foundation/PDB_Memory.h b/Includes/raw_pdb/src/Foundation/PDB_Memory.h new file mode 100644 index 0000000..ccb7e86 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Memory.h @@ -0,0 +1,11 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + + +#define PDB_NEW(_type) new _type +#define PDB_NEW_ARRAY(_type, _length) new _type[_length] + +#define PDB_DELETE(_ptr) delete _ptr +#define PDB_DELETE_ARRAY(_ptr) delete[] _ptr diff --git a/Includes/raw_pdb/src/Foundation/PDB_Move.h b/Includes/raw_pdb/src/Foundation/PDB_Move.h new file mode 100644 index 0000000..04bf78b --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Move.h @@ -0,0 +1,11 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_TypeTraits.h" + + +// See Jonathan Mueller's blog for replacing std::move and std::forward: +// https://foonathan.net/2020/09/move-forward/ +#define PDB_MOVE(...) static_cast::type&&>(__VA_ARGS__) diff --git a/Includes/raw_pdb/src/Foundation/PDB_Platform.h b/Includes/raw_pdb/src/Foundation/PDB_Platform.h new file mode 100644 index 0000000..8775a54 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Platform.h @@ -0,0 +1,45 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + + +// determine the compiler/toolchain used +#if defined(__clang__) +# define PDB_COMPILER_MSVC 0 +# define PDB_COMPILER_CLANG 1 +# define PDB_COMPILER_GCC 0 +#elif defined(_MSC_VER) +# define PDB_COMPILER_MSVC 1 +# define PDB_COMPILER_CLANG 0 +# define PDB_COMPILER_GCC 0 +#elif defined(__GNUC__) +# define PDB_COMPILER_MSVC 0 +# define PDB_COMPILER_CLANG 0 +# define PDB_COMPILER_GCC 1 +#else +# error("Unknown compiler."); +#endif + +// check whether C++17 is available +#if __cplusplus >= 201703L +# define PDB_CPP_17 1 +#else +# define PDB_CPP_17 0 +#endif + +// define used standard types +typedef decltype(sizeof(0)) size_t; +static_assert(sizeof(sizeof(0)) == sizeof(size_t), "Wrong size."); + +typedef int int32_t; +static_assert(sizeof(int32_t) == 4u, "Wrong size."); + +typedef unsigned char uint8_t; +static_assert(sizeof(uint8_t) == 1u, "Wrong size."); + +typedef unsigned short uint16_t; +static_assert(sizeof(uint16_t) == 2u, "Wrong size."); + +typedef unsigned int uint32_t; +static_assert(sizeof(uint32_t) == 4u, "Wrong size."); diff --git a/Includes/raw_pdb/src/Foundation/PDB_PointerUtil.h b/Includes/raw_pdb/src/Foundation/PDB_PointerUtil.h new file mode 100644 index 0000000..014297d --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_PointerUtil.h @@ -0,0 +1,33 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Macros.h" +#include "PDB_TypeTraits.h" + + +namespace PDB +{ + namespace Pointer + { + // Offsets any pointer by a given number of bytes. + template + PDB_NO_DISCARD inline T Offset(U* anyPointer, V howManyBytes) PDB_NO_EXCEPT + { + static_assert(PDB::is_pointer::value == true, "Type T must be a pointer type."); + + union + { + T as_T; + U* as_U_ptr; + char* as_char_ptr; + }; + + as_U_ptr = anyPointer; + as_char_ptr += howManyBytes; + + return as_T; + } + } +} diff --git a/Includes/raw_pdb/src/Foundation/PDB_TypeTraits.h b/Includes/raw_pdb/src/Foundation/PDB_TypeTraits.h new file mode 100644 index 0000000..9286453 --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_TypeTraits.h @@ -0,0 +1,65 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + + +// provide our own type traits to avoid pulling in unnecessary includes +namespace PDB +{ + template + struct is_pointer + { + static constexpr bool value = false; + }; + + template + struct is_pointer + { + static constexpr bool value = true; + }; + + template + struct is_pointer + { + static constexpr bool value = true; + }; + + template + struct is_pointer + { + static constexpr bool value = true; + }; + + template + struct is_pointer + { + static constexpr bool value = true; + }; + + + template + struct remove_reference + { + using type = T; + }; + + template + struct remove_reference + { + using type = T; + }; + + template + struct remove_reference + { + using type = T; + }; + + + template + struct underlying_type + { + using type = __underlying_type(T); + }; +} diff --git a/Includes/raw_pdb/src/Foundation/PDB_Warnings.h b/Includes/raw_pdb/src/Foundation/PDB_Warnings.h new file mode 100644 index 0000000..fbc8a9d --- /dev/null +++ b/Includes/raw_pdb/src/Foundation/PDB_Warnings.h @@ -0,0 +1,45 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "PDB_Platform.h" + +#if PDB_COMPILER_MSVC + // some warnings were introduced with different versions of Visual Studio, so we disable this warning instead of using a bunch of #if/#endif +# pragma warning (disable : 4619) // there is no warning number N + + // we compile with exceptions disabled +# pragma warning (disable : 4530) // C++ exception handler used, but unwind semantics are not enabled.Specify / EHsc +# pragma warning (disable : 4577) // 'noexcept' used with no exception handling mode specified; termination on exception is not guaranteed. Specify /EHsc + + // ignore purely informational warnings +# pragma warning (disable : 4514) // unreferenced inline function has been removed +# pragma warning (disable : 4710) // function not inlined +# pragma warning (disable : 4711) // function selected for automatic inline expansion +# pragma warning (disable : 4820) // 'N' bytes padding added after data member 'm_member' +# pragma warning (disable : 5045) // Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified +#elif PDB_COMPILER_CLANG + // turn on absolutely all available Clang warnings +# pragma clang diagnostic warning "-Wall" +# pragma clang diagnostic warning "-Wextra" +# pragma clang diagnostic warning "-Weverything" +# pragma clang diagnostic warning "-Wpedantic" + + // these warnings contradict -Weverything +# pragma clang diagnostic ignored "-Wc++98-compat" +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" + + // this warning is triggered for templates which are explicitly instantiated. + // forgetting to instantiate the template would trigger a linker error anyway, so we disable this warning. +# pragma clang diagnostic ignored "-Wundefined-func-template" + + // we don't strive for C++20 compatibility +# pragma clang diagnostic ignored "-Wc++20-compat" + + // some structures will have to be padded +# pragma clang diagnostic ignored "-Wpadded" + + // it's impossible to write C++ code using raw pointers without triggering this warning +# pragma clang diagnostic ignored "-Wunsafe-buffer-usage" +#endif diff --git a/Includes/raw_pdb/src/PDB.cpp b/Includes/raw_pdb/src/PDB.cpp new file mode 100644 index 0000000..0bbd3a7 --- /dev/null +++ b/Includes/raw_pdb/src/PDB.cpp @@ -0,0 +1,55 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB.h" +#include "PDB_Types.h" +#include "PDB_Util.h" +#include "PDB_RawFile.h" +#include "Foundation/PDB_PointerUtil.h" +#include "Foundation/PDB_CRT.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::ValidateFile(const void* data, size_t size) PDB_NO_EXCEPT +{ + // validate whether there is enough size for the super block + if (size < sizeof(SuperBlock)) + { + return ErrorCode::InvalidDataSize; + } + // validate the super block + const SuperBlock* superBlock = Pointer::Offset(data, 0u); + { + // validate header magic + if (memcmp(superBlock->fileMagic, SuperBlock::MAGIC, sizeof(SuperBlock::MAGIC)) != 0) + { + return ErrorCode::InvalidSuperBlock; + } + + // validate whether enough size is provided for the PDB file + // blockCount * blockSize is the size of the PDB file on disk + if (size < superBlock->blockCount * superBlock->blockSize) + { + return ErrorCode::InvalidDataSize; + } + + // validate free block map. + // the free block map should always reside at either index 1 or 2. + if (superBlock->freeBlockMapIndex != 1u && superBlock->freeBlockMapIndex != 2u) + { + return ErrorCode::InvalidFreeBlockMap; + } + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::RawFile PDB::CreateRawFile(const void* data) PDB_NO_EXCEPT +{ + return RawFile(data); +} diff --git a/Includes/raw_pdb/src/PDB.h b/Includes/raw_pdb/src/PDB.h new file mode 100644 index 0000000..3f17f9f --- /dev/null +++ b/Includes/raw_pdb/src/PDB.h @@ -0,0 +1,21 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "PDB_ErrorCodes.h" + + +// https://llvm.org/docs/PDB/index.html +namespace PDB +{ + class RawFile; + + + // Validates whether a PDB file is valid. + PDB_NO_DISCARD ErrorCode ValidateFile(const void* data, size_t size) PDB_NO_EXCEPT; + + // Creates a raw PDB file that must have been validated. + PDB_NO_DISCARD RawFile CreateRawFile(const void* data) PDB_NO_EXCEPT; +} diff --git a/Includes/raw_pdb/src/PDB_CoalescedMSFStream.cpp b/Includes/raw_pdb/src/PDB_CoalescedMSFStream.cpp new file mode 100644 index 0000000..fe544e4 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_CoalescedMSFStream.cpp @@ -0,0 +1,169 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_CoalescedMSFStream.h" +#include "PDB_Util.h" +#include "PDB_DirectMSFStream.h" +#include "Foundation/PDB_PointerUtil.h" +#include "Foundation/PDB_Memory.h" +#include "Foundation/PDB_CRT.h" + + +namespace +{ + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static bool AreBlockIndicesContiguous(const uint32_t* blockIndices, uint32_t blockSize, uint32_t streamSize) PDB_NO_EXCEPT + { + const uint32_t blockCount = PDB::ConvertSizeToBlockCount(streamSize, blockSize); + + // start with the first index, checking if all following indices are contiguous (N, N+1, N+2, ...) + uint32_t expectedIndex = blockIndices[0]; + for (uint32_t i = 1u; i < blockCount; ++i) + { + ++expectedIndex; + if (blockIndices[i] != expectedIndex) + { + return false; + } + } + + return true; + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::CoalescedMSFStream::CoalescedMSFStream(void) PDB_NO_EXCEPT + : m_ownedData(nullptr) + , m_data(nullptr) + , m_size(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::CoalescedMSFStream::CoalescedMSFStream(CoalescedMSFStream&& other) PDB_NO_EXCEPT + : m_ownedData(PDB_MOVE(other.m_ownedData)) + , m_data(PDB_MOVE(other.m_data)) + , m_size(PDB_MOVE(other.m_size)) +{ + other.m_ownedData = nullptr; + other.m_data = nullptr; + other.m_size = 0u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::CoalescedMSFStream& PDB::CoalescedMSFStream::operator=(CoalescedMSFStream&& other) PDB_NO_EXCEPT +{ + if (this != &other) + { + PDB_DELETE_ARRAY(m_ownedData); + + m_ownedData = PDB_MOVE(other.m_ownedData); + m_data = PDB_MOVE(other.m_data); + m_size = PDB_MOVE(other.m_size); + + other.m_ownedData = nullptr; + other.m_data = nullptr; + other.m_size = 0u; + } + + return *this; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::CoalescedMSFStream::CoalescedMSFStream(const void* data, uint32_t blockSize, const uint32_t* blockIndices, uint32_t streamSize) PDB_NO_EXCEPT + : m_ownedData(nullptr) + , m_data(nullptr) + , m_size(streamSize) +{ + if (AreBlockIndicesContiguous(blockIndices, blockSize, streamSize)) + { + // fast path, all block indices are contiguous, so we don't have to copy any data at all. + // instead, we directly point into the memory-mapped file at the correct offset. + const uint32_t index = blockIndices[0]; + const size_t fileOffset = PDB::ConvertBlockIndexToFileOffset(index, blockSize); + m_data = Pointer::Offset(data, fileOffset); + } + else + { + // slower path, we need to copy disjunct blocks into our own data array, block by block + m_ownedData = PDB_NEW_ARRAY(Byte, streamSize); + m_data = m_ownedData; + + Byte* destination = m_ownedData; + + // copy full blocks first + const uint32_t fullBlockCount = streamSize / blockSize; + for (uint32_t i = 0u; i < fullBlockCount; ++i) + { + const uint32_t index = blockIndices[i]; + + // read one single block at the correct offset in the stream + const size_t fileOffset = PDB::ConvertBlockIndexToFileOffset(index, blockSize); + const void* sourceData = Pointer::Offset(data, fileOffset); + memcpy(destination, sourceData, blockSize); + + destination += blockSize; + } + + // account for non-full blocks + const uint32_t remainingBytes = streamSize - (fullBlockCount * blockSize); + if (remainingBytes != 0u) + { + const uint32_t index = blockIndices[fullBlockCount]; + + // read remaining bytes at correct offset in the stream + const size_t fileOffset = PDB::ConvertBlockIndexToFileOffset(index, blockSize); + const void* sourceData = Pointer::Offset(data, fileOffset); + memcpy(destination, sourceData, remainingBytes); + } + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::CoalescedMSFStream::CoalescedMSFStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT + : m_ownedData(nullptr) + , m_data(nullptr) + , m_size(size) +{ + const DirectMSFStream::IndexAndOffset indexAndOffset = directStream.GetBlockIndexForOffset(offset); + + // Note: we need to add the offset within the block to the size of the stream to determine if the block + // indices are contiguous. This is needed to deal with the case where reading the requested number of bytes + // from the specified offset would cross a block boundary. For example, if the offset within the block is + // 64 and we want to read 4096 bytes with a block size of 4096, we need to consider *two* block indices, + // not *one*, even though 4096 / 4096 = 1. + if (AreBlockIndicesContiguous(directStream.GetBlockIndices() + indexAndOffset.index, directStream.GetBlockSize(), indexAndOffset.offsetWithinBlock + size)) + { + // fast path, all block indices inside the direct stream from (data + offset) to (data + offset + size) are contiguous + const size_t offsetWithinData = directStream.GetDataOffsetForIndexAndOffset(indexAndOffset); + m_data = Pointer::Offset(directStream.GetData(), offsetWithinData); + } + else + { + // slower path, we need to copy from disjunct blocks, which is performed by the direct stream + m_ownedData = PDB_NEW_ARRAY(Byte, size); + m_data = m_ownedData; + + directStream.ReadAtOffset(m_ownedData, size, offset); + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::CoalescedMSFStream::~CoalescedMSFStream(void) PDB_NO_EXCEPT +{ + PDB_DELETE_ARRAY(m_ownedData); +} diff --git a/Includes/raw_pdb/src/PDB_CoalescedMSFStream.h b/Includes/raw_pdb/src/PDB_CoalescedMSFStream.h new file mode 100644 index 0000000..09d524c --- /dev/null +++ b/Includes/raw_pdb/src/PDB_CoalescedMSFStream.h @@ -0,0 +1,71 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Assert.h" +#include "Foundation/PDB_Macros.h" +#include "PDB_Types.h" + +// https://llvm.org/docs/PDB/index.html#the-msf-container +// https://llvm.org/docs/PDB/MsfFile.html +namespace PDB +{ + class PDB_NO_DISCARD DirectMSFStream; + + + // provides access to a coalesced version of an MSF stream. + // inherently thread-safe, the stream doesn't carry any internal offset or similar. + // coalesces all blocks into a contiguous stream of data upon construction. + // very fast individual reads, useful when almost all data of a stream is needed anyway. + class PDB_NO_DISCARD CoalescedMSFStream + { + public: + CoalescedMSFStream(void) PDB_NO_EXCEPT; + CoalescedMSFStream(CoalescedMSFStream&& other) PDB_NO_EXCEPT; + CoalescedMSFStream& operator=(CoalescedMSFStream&& other) PDB_NO_EXCEPT; + + explicit CoalescedMSFStream(const void* data, uint32_t blockSize, const uint32_t* blockIndices, uint32_t streamSize) PDB_NO_EXCEPT; + + // Creates a coalesced stream from a direct stream at any offset. + explicit CoalescedMSFStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT; + + ~CoalescedMSFStream(void) PDB_NO_EXCEPT; + + // Returns the size of the stream. + PDB_NO_DISCARD inline size_t GetSize(void) const PDB_NO_EXCEPT + { + return m_size; + } + + // Provides read-only access to the data. + template + PDB_NO_DISCARD inline const T* GetDataAtOffset(size_t offset) const PDB_NO_EXCEPT + { + return reinterpret_cast(m_data + offset); + } + + template + PDB_NO_DISCARD inline size_t GetPointerOffset(const T* pointer) const PDB_NO_EXCEPT + { + const Byte* bytePointer = reinterpret_cast(pointer); + const Byte* dataEnd = m_data + m_size; + + PDB_ASSERT(bytePointer >= m_data && bytePointer <= dataEnd, "Pointer 0x%p not within stream range [0x%p:0x%p]", + static_cast(bytePointer), static_cast(m_data), static_cast(dataEnd)); + + return static_cast(bytePointer - m_data); + } + + private: + // contiguous, coalesced data, can be null + Byte* m_ownedData; + + // either points to the owned data that has been copied from disjunct blocks, or points to the + // memory-mapped data directly in case all stream blocks are contiguous. + const Byte* m_data; + size_t m_size; + + PDB_DISABLE_COPY(CoalescedMSFStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_DBIStream.cpp b/Includes/raw_pdb/src/PDB_DBIStream.cpp new file mode 100644 index 0000000..1b6829d --- /dev/null +++ b/Includes/raw_pdb/src/PDB_DBIStream.cpp @@ -0,0 +1,326 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_DBIStream.h" +#include "PDB_RawFile.h" + + +namespace +{ + // the DBI stream always resides at index 3 + static constexpr const uint32_t DBIStreamIndex = 3u; + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetModuleInfoSubstreamOffset(const PDB::DBI::StreamHeader& /* dbiHeader */) PDB_NO_EXCEPT + { + return sizeof(PDB::DBI::StreamHeader); + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetSectionContributionSubstreamOffset(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return GetModuleInfoSubstreamOffset(dbiHeader) + dbiHeader.moduleInfoSize; + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetSectionMapSubstreamOffset(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return GetSectionContributionSubstreamOffset(dbiHeader) + dbiHeader.sectionContributionSize; + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetSourceInfoSubstreamOffset(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return GetSectionMapSubstreamOffset(dbiHeader) + dbiHeader.sectionMapSize; + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetTypeServerMapSubstreamOffset(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return GetSourceInfoSubstreamOffset(dbiHeader) + dbiHeader.sourceInfoSize; + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetECSubstreamOffset(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return GetTypeServerMapSubstreamOffset(dbiHeader) + dbiHeader.typeServerMapSize; + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline uint32_t GetDebugHeaderSubstreamOffset(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return GetECSubstreamOffset(dbiHeader) + dbiHeader.ecSize; + } + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline bool HasDebugHeaderSubstream(const PDB::DBI::StreamHeader& dbiHeader) PDB_NO_EXCEPT + { + return dbiHeader.optionalDebugHeaderSize != 0u; + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::DBIStream::DBIStream(void) PDB_NO_EXCEPT + : m_header() + , m_stream() +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::DBIStream::DBIStream(const RawFile& file, const DBI::StreamHeader& header) PDB_NO_EXCEPT + : m_header(header) + , m_stream(file.CreateMSFStream(DBIStreamIndex)) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::HasValidDBIStream(const RawFile& file) PDB_NO_EXCEPT +{ + DirectMSFStream stream = file.CreateMSFStream(DBIStreamIndex); + if (stream.GetSize() < sizeof(DBI::StreamHeader)) + { + return ErrorCode::InvalidStream; + } + + const DBI::StreamHeader header = stream.ReadAtOffset(0u); + if (header.signature != DBI::StreamHeader::Signature) + { + return ErrorCode::InvalidSignature; + } + else if (header.version != DBI::StreamHeader::Version::V70) + { + return ErrorCode::UnknownVersion; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::DBIStream PDB::CreateDBIStream(const RawFile& file) PDB_NO_EXCEPT +{ + DirectMSFStream stream = file.CreateMSFStream(DBIStreamIndex); + const DBI::StreamHeader header = stream.ReadAtOffset(0u); + + return DBIStream { file, header }; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::DBIStream::HasValidSymbolRecordStream(const RawFile& /* file */) const PDB_NO_EXCEPT +{ + return (m_header.symbolRecordStreamIndex != PDB::NilStreamIndex) ? ErrorCode::Success : ErrorCode::InvalidStreamIndex; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::DBIStream::HasValidImageSectionStream(const RawFile& /* file */) const PDB_NO_EXCEPT +{ + // the debug header stream is optional. if it's not there, we can't get the image section stream either. + if (!HasDebugHeaderSubstream(m_header)) + { + return ErrorCode::InvalidStreamIndex; + } + + // find the debug header sub-stream + const uint32_t debugHeaderOffset = GetDebugHeaderSubstreamOffset(m_header); + const DBI::DebugHeader& debugHeader = m_stream.ReadAtOffset(debugHeaderOffset); + + if (debugHeader.sectionHeaderStreamIndex == DBI::DebugHeader::InvalidStreamIndex) + { + return ErrorCode::InvalidStreamIndex; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::DBIStream::HasValidPublicSymbolStream(const RawFile& file) const PDB_NO_EXCEPT +{ + if (m_header.publicStreamIndex == PDB::NilStreamIndex) + { + return ErrorCode::InvalidStreamIndex; + } + + DirectMSFStream publicStream = file.CreateMSFStream(m_header.publicStreamIndex); + + // the public symbol stream always begins with a header, we are not interested in that. + // following the public symbol stream header is a hash table header. + const HashTableHeader hashHeader = publicStream.ReadAtOffset(sizeof(PublicStreamHeader)); + if (hashHeader.signature != HashTableHeader::Signature) + { + return ErrorCode::InvalidSignature; + } + else if (hashHeader.version != HashTableHeader::Version) + { + return ErrorCode::UnknownVersion; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::DBIStream::HasValidGlobalSymbolStream(const RawFile& file) const PDB_NO_EXCEPT +{ + if (m_header.globalStreamIndex == PDB::NilStreamIndex) + { + return ErrorCode::InvalidStreamIndex; + } + + DirectMSFStream globalStream = file.CreateMSFStream(m_header.globalStreamIndex); + + // the global symbol stream starts with a hash table header + const HashTableHeader hashHeader = globalStream.ReadAtOffset(0u); + if (hashHeader.signature != HashTableHeader::Signature) + { + return ErrorCode::InvalidSignature; + } + else if (hashHeader.version != HashTableHeader::Version) + { + return ErrorCode::UnknownVersion; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::DBIStream::HasValidSectionContributionStream(const RawFile& /* file */) const PDB_NO_EXCEPT +{ + if (m_header.sectionContributionSize < sizeof(DBI::SectionContribution::Version)) + { + return ErrorCode::InvalidStream; + } + + // find the section contribution sub-stream + // https://llvm.org/docs/PDB/DbiStream.html#section-contribution-substream + const uint32_t streamOffset = GetSectionContributionSubstreamOffset(m_header); + + const DBI::SectionContribution::Version version = m_stream.ReadAtOffset(streamOffset); + if (version != DBI::SectionContribution::Version::Ver60) + { + return ErrorCode::UnknownVersion; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::CoalescedMSFStream PDB::DBIStream::CreateSymbolRecordStream(const RawFile& file) const PDB_NO_EXCEPT +{ + // the symbol record stream holds the actual CodeView data of the symbols + return file.CreateMSFStream(m_header.symbolRecordStreamIndex); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ImageSectionStream PDB::DBIStream::CreateImageSectionStream(const RawFile& file) const PDB_NO_EXCEPT +{ + // find the debug header sub-stream + const uint32_t debugHeaderOffset = GetDebugHeaderSubstreamOffset(m_header); + const DBI::DebugHeader& debugHeader = m_stream.ReadAtOffset(debugHeaderOffset); + + // from there, grab the section header stream + return ImageSectionStream(file, debugHeader.sectionHeaderStreamIndex); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::PublicSymbolStream PDB::DBIStream::CreatePublicSymbolStream(const RawFile& file) const PDB_NO_EXCEPT +{ + DirectMSFStream publicStream = file.CreateMSFStream(m_header.publicStreamIndex); + + // the public symbol stream always begins with a header, we are not interested in that. + // following the public symbol stream header is a hash table header. + // we use this to work out how many symbol records are referenced by the public symbol stream. + const HashTableHeader hashHeader = publicStream.ReadAtOffset(sizeof(PublicStreamHeader)); + const uint32_t recordCount = hashHeader.size / sizeof(HashRecord); + + return PublicSymbolStream(file, m_header.publicStreamIndex, recordCount); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::GlobalSymbolStream PDB::DBIStream::CreateGlobalSymbolStream(const RawFile& file) const PDB_NO_EXCEPT +{ + DirectMSFStream globalStream = file.CreateMSFStream(m_header.globalStreamIndex); + + // the global symbol stream starts with a hash table header. + // we use this to work out how many symbol records are referenced by the global symbol stream. + const HashTableHeader hashHeader = globalStream.ReadAtOffset(0u); + const uint32_t recordCount = hashHeader.size / sizeof(HashRecord); + + return GlobalSymbolStream(file, m_header.globalStreamIndex, recordCount); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::SourceFileStream PDB::DBIStream::CreateSourceFileStream(const RawFile& /* file */) const PDB_NO_EXCEPT +{ + // find the source info sub-stream + // https://llvm.org/docs/PDB/DbiStream.html#file-info-substream + const uint32_t streamOffset = GetSourceInfoSubstreamOffset(m_header); + + return SourceFileStream(m_stream, m_header.sourceInfoSize, streamOffset); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::SectionContributionStream PDB::DBIStream::CreateSectionContributionStream(const RawFile& /* file */) const PDB_NO_EXCEPT +{ + // find the section contribution sub-stream + // https://llvm.org/docs/PDB/DbiStream.html#section-contribution-substream + const uint32_t streamOffset = GetSectionContributionSubstreamOffset(m_header); + + return SectionContributionStream(m_stream, m_header.sectionContributionSize - sizeof(DBI::SectionContribution::Version), streamOffset + sizeof(DBI::SectionContribution::Version)); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ModuleInfoStream PDB::DBIStream::CreateModuleInfoStream(const RawFile& /* file */) const PDB_NO_EXCEPT +{ + // find the module info sub-stream + // https://llvm.org/docs/PDB/DbiStream.html#module-info-substream + const uint32_t streamOffset = GetModuleInfoSubstreamOffset(m_header); + + return ModuleInfoStream(m_stream, m_header.moduleInfoSize, streamOffset); +} diff --git a/Includes/raw_pdb/src/PDB_DBIStream.h b/Includes/raw_pdb/src/PDB_DBIStream.h new file mode 100644 index 0000000..4b52598 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_DBIStream.h @@ -0,0 +1,65 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "PDB_ErrorCodes.h" +#include "PDB_DBITypes.h" +#include "PDB_CoalescedMSFStream.h" +#include "PDB_DirectMSFStream.h" +#include "PDB_ImageSectionStream.h" +#include "PDB_PublicSymbolStream.h" +#include "PDB_GlobalSymbolStream.h" +#include "PDB_SourceFileStream.h" +#include "PDB_SectionContributionStream.h" +#include "PDB_ModuleInfoStream.h" + + +// PDB DBI Stream +// https://llvm.org/docs/PDB/DbiStream.html +namespace PDB +{ + class RawFile; + + + class PDB_NO_DISCARD DBIStream + { + public: + DBIStream(void) PDB_NO_EXCEPT; + explicit DBIStream(const RawFile& file, const DBI::StreamHeader& header) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(DBIStream); + + PDB_NO_DISCARD ErrorCode HasValidSymbolRecordStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD ErrorCode HasValidImageSectionStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD ErrorCode HasValidPublicSymbolStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD ErrorCode HasValidGlobalSymbolStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD ErrorCode HasValidSectionContributionStream(const RawFile& file) const PDB_NO_EXCEPT; + + PDB_NO_DISCARD CoalescedMSFStream CreateSymbolRecordStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD ImageSectionStream CreateImageSectionStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD PublicSymbolStream CreatePublicSymbolStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD GlobalSymbolStream CreateGlobalSymbolStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD SourceFileStream CreateSourceFileStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD SectionContributionStream CreateSectionContributionStream(const RawFile& file) const PDB_NO_EXCEPT; + PDB_NO_DISCARD ModuleInfoStream CreateModuleInfoStream(const RawFile& file) const PDB_NO_EXCEPT; + + PDB_NO_DISCARD const DBI::StreamHeader& GetHeader(void) const PDB_NO_EXCEPT + { + return m_header; + } + + private: + DBI::StreamHeader m_header; + DirectMSFStream m_stream; + + PDB_DISABLE_COPY(DBIStream); + }; + + // Returns whether the given raw file provides a valid DBI stream. + PDB_NO_DISCARD ErrorCode HasValidDBIStream(const RawFile& file) PDB_NO_EXCEPT; + + // Creates the DBI stream from a raw file. + PDB_NO_DISCARD DBIStream CreateDBIStream(const RawFile& file) PDB_NO_EXCEPT; +} diff --git a/Includes/raw_pdb/src/PDB_DBITypes.cpp b/Includes/raw_pdb/src/PDB_DBITypes.cpp new file mode 100644 index 0000000..4eaedcf --- /dev/null +++ b/Includes/raw_pdb/src/PDB_DBITypes.cpp @@ -0,0 +1,9 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_DBITypes.h" + + +const uint32_t PDB::DBI::StreamHeader::Signature = 0xffffffffu; +const uint16_t PDB::DBI::DebugHeader::InvalidStreamIndex = 0xFFFFu; diff --git a/Includes/raw_pdb/src/PDB_DBITypes.h b/Includes/raw_pdb/src/PDB_DBITypes.h new file mode 100644 index 0000000..9a798d4 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_DBITypes.h @@ -0,0 +1,928 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_BitOperators.h" + + +namespace PDB +{ + namespace DBI + { + // https://llvm.org/docs/PDB/DbiStream.html#stream-header + // https://github.com/microsoft/microsoft-pdb/blob/master/PDB/dbi/dbi.h#L124 + struct StreamHeader + { + static const uint32_t Signature; + + enum class PDB_NO_DISCARD Version : uint32_t + { + VC41 = 930803u, + V50 = 19960307u, + V60 = 19970606u, + V70 = 19990903u, + V110 = 20091201u + }; + + uint32_t signature; + Version version; + uint32_t age; + uint16_t globalStreamIndex; // index of the global symbol stream + uint16_t toolchain; + uint16_t publicStreamIndex; // index of the public symbol stream + uint16_t pdbDllVersion; + uint16_t symbolRecordStreamIndex; // index of the symbol record stream + uint16_t pdbDllRbld; + uint32_t moduleInfoSize; + uint32_t sectionContributionSize; + uint32_t sectionMapSize; + uint32_t sourceInfoSize; + uint32_t typeServerMapSize; + uint32_t mfcTypeServerIndex; + uint32_t optionalDebugHeaderSize; + uint32_t ecSize; + uint16_t flags; + uint16_t machine; + uint32_t padding; + }; + + // https://llvm.org/docs/PDB/DbiStream.html#optional-debug-header-stream + struct DebugHeader + { + static const uint16_t InvalidStreamIndex; + + uint16_t fpoDataStreamIndex; // IMAGE_DEBUG_TYPE_FPO + uint16_t exceptionDataStreamIndex; // IMAGE_DEBUG_TYPE_EXCEPTION + uint16_t fixupDataStreamIndex; // IMAGE_DEBUG_TYPE_FIXUP + uint16_t omapToSrcDataStreamIndex; // IMAGE_DEBUG_TYPE_OMAP_TO_SRC + uint16_t omapFromSrcDataStreamIndex; // IMAGE_DEBUG_TYPE_OMAP_FROM_SRC + uint16_t sectionHeaderStreamIndex; // a dump of all section headers (IMAGE_SECTION_HEADER) from the original executable + uint16_t tokenDataStreamIndex; + uint16_t xdataStreamIndex; + uint16_t pdataStreamIndex; + uint16_t newFpoDataStreamIndex; + uint16_t originalSectionHeaderDataStreamIndex; + }; + + // https://llvm.org/docs/PDB/DbiStream.html#section-contribution-substream + struct SectionContribution + { + enum class PDB_NO_DISCARD Version : uint32_t + { + Ver60 = 0xeffe0000u + 19970605u, + V2 = 0xeffe0000u + 20140516u + }; + + uint16_t section; + uint16_t padding; + uint32_t offset; + uint32_t size; + uint32_t characteristics; + uint16_t moduleIndex; + uint16_t padding2; + uint32_t dataCrc; + uint32_t relocationCrc; + }; + + // https://llvm.org/docs/PDB/DbiStream.html#module-info-substream + struct ModuleInfo + { + uint32_t unused; + SectionContribution sectionContribution; + uint16_t flags; + uint16_t moduleSymbolStreamIndex; + uint32_t symbolSize; + uint32_t c11Size; + uint32_t c13Size; + uint16_t sourceFileCount; + uint16_t padding; + uint32_t unused2; + uint32_t sourceFileNameIndex; + uint32_t pdbFilePathNameIndex; + }; + } + + + namespace CodeView + { + namespace DBI + { + // code view type records that can appear in a DBI stream. + // this list is not exhaustive, but only contains what we need so far. + // https://llvm.org/docs/PDB/CodeViewSymbols.html + // https://llvm.org/docs/PDB/TpiStream.html#tpi-vs-ipi-stream + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2735 + enum class PDB_NO_DISCARD SymbolRecordKind : uint16_t + { + S_END = 0x0006u, // block, procedure, "with" or thunk end + S_SKIP = 0x0007u, // Reserve symbol space in $$Symbols table + S_FRAMEPROC = 0x1012u, // extra frame and proc information + S_ANNOTATION = 0x1019u, // annotation string literals ("__annotation" intrinsic, e.g. via NT_ASSERT) + S_OBJNAME = 0x1101u, // full path to the original compiled .obj. can point to remote locations and temporary files, not necessarily the file that was linked into the executable + S_THUNK32 = 0x1102u, // thunk start + S_BLOCK32 = 0x1103u, // block start + S_LABEL32 = 0x1105u, // code label + S_REGISTER = 0x1106u, // register variable + S_CONSTANT = 0x1107u, // constant symbol + S_BPREL32 = 0x110Bu, // BP-relative address (almost like S_REGREL32) + S_LDATA32 = 0x110Cu, // (static) local data + S_GDATA32 = 0x110Du, // global data + S_PUB32 = 0x110Eu, // public symbol + S_LPROC32 = 0x110Fu, // local procedure start + S_GPROC32 = 0x1110u, // global procedure start + S_REGREL32 = 0x1111u, // register relative address + S_LTHREAD32 = 0x1112u, // (static) thread-local data + S_GTHREAD32 = 0x1113u, // global thread-local data + S_UNAMESPACE = 0x1124u, // using namespace + S_PROCREF = 0x1125u, // reference to function in any compiland + S_LPROCREF = 0x1127u, // local reference to function in any compiland + S_TRAMPOLINE = 0x112Cu, // incremental linking trampoline + S_SEPCODE = 0x1132u, // separated code (from the compiler) + S_SECTION = 0x1136u, // a COFF section in an executable + S_COFFGROUP = 0x1137u, // original COFF group before it was merged into executable sections by the linker, e.g. .CRT$XCU, .rdata, .bss, .lpp_prepatch_hooks + S_CALLSITEINFO = 0x1139u, // Indirect call site information + S_FRAMECOOKIE = 0x113Au, // Security cookie information + S_COMPILE3 = 0x113Cu, // replacement for S_COMPILE2, more info + S_ENVBLOCK = 0x113Du, // environment block split off from S_COMPILE2 + S_LOCAL = 0x113Eu, // defines a local symbol in optimized code + S_DEFRANGE_REGISTER = 0x1141u, // ranges for en-registered symbol + S_DEFRANGE_FRAMEPOINTER_REL = 0x1142u, // range for stack symbol. + S_DEFRANGE_SUBFIELD_REGISTER = 0x1143u, // ranges for en-registered field of symbol + S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE = 0x1144u, // range for stack symbol span valid full scope of function body, gap might apply. + S_DEFRANGE_REGISTER_REL = 0x1145u, // range for symbol address as register + offset. + S_LPROC32_ID = 0x1146u, // S_PROC symbol that references ID instead of type + S_GPROC32_ID = 0x1147u, // S_PROC symbol that references ID instead of type + S_BUILDINFO = 0x114Cu, // build info/environment details of a compiland/translation unit + S_INLINESITE = 0x114Du, // inlined function callsite + S_INLINESITE_END = 0x114Eu, + S_PROC_ID_END = 0x114Fu, + S_FILESTATIC = 0x1153u, + S_LPROC32_DPC = 0x1155u, + S_LPROC32_DPC_ID = 0x1156u, + S_ARMSWITCHTABLE = 0x1159u, + S_CALLEES = 0x115Au, + S_CALLERS = 0x115Bu, + S_INLINESITE2 = 0x115Du, // extended inline site information + S_HEAPALLOCSITE = 0x115Eu, // heap allocation site + S_INLINEES = 0x1168u, // https://llvm.org/docs/PDB/CodeViewSymbols.html#s-inlinees-0x1168 + S_REGREL32_INDIR = 0x1171u, + S_REGREL32_ENCTMP = 0x1179u, + S_UDT = 0x1108u, // user-defined type + S_UDT_ST = 0x1003u, // user-defined structured types + }; + + // https://docs.microsoft.com/en-us/visualstudio/debugger/debug-interface-access/thunk-ordinal + enum class PDB_NO_DISCARD ThunkOrdinal : uint8_t + { + NoType, + ThisAdjustor, + VirtualCall, + PCode, + DelayLoad, + TrampolineIncremental, + TrampolineBranchIsland + }; + + enum class PDB_NO_DISCARD TrampolineType : uint16_t + { + Incremental, + BranchIsland + }; + + enum class PDB_NO_DISCARD CookieType : uint8_t + { + COPY = 0, + XOR_SP, + XOR_BP, + XOR_R13, + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvconst.h#L392 + enum class PDB_NO_DISCARD Register : uint16_t + { + EAX = 17, + ECX = 18, + EDX = 19, + EBX = 20, + ESP = 21, + EBP = 22, + ESI = 23, + EDI = 24, + + RAX = 328, + RBX = 329, + RCX = 330, + RDX = 331, + RSI = 332, + RDI = 333, + RBP = 334, + RSP = 335, + R8 = 336, + R9 = 337, + R10 = 338, + R11 = 339, + R12 = 340, + R13 = 341, + R14 = 342, + R15 = 343, + + RIP = 33, // also EIP for x32 + EFLAGS = 34, // same for x64 and x32 + }; + + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3038 + enum class PDB_NO_DISCARD ProcedureFlags : uint8_t + { + None = 0u, + NoFPO = 1u << 0u, + InterruptReturn = 1u << 1u, + FarReturn = 1u << 2u, + NoReturn = 1u << 3u, + Unreachable = 1u << 4u, + CustomCallingConvention = 1u << 5u, + NoInline = 1u << 6u, + OptimizedDebugInformation = 1u << 7u + }; + PDB_DEFINE_BIT_OPERATORS(ProcedureFlags); + + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3676 + enum class PDB_NO_DISCARD PublicSymbolFlags : uint32_t + { + None = 0u, + Code = 1u << 0u, // set if public symbol refers to a code address + Function = 1u << 1u, // set if public symbol is a function + ManagedCode = 1u << 2u, // set if managed code (native or IL) + ManagedILCode = 1u << 3u // set if managed IL code + }; + PDB_DEFINE_BIT_OPERATORS(PublicSymbolFlags); + + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3341 + enum class PDB_NO_DISCARD CompileSymbolFlags : uint32_t + { + None = 0u, + SourceLanguageMask = 0xFFu, + EC = 1u << 8u, + NoDebugInfo = 1u << 9u, + LTCG = 1u << 10u, + NoDataAlign = 1u << 11u, + ManagedCodeOrDataPresent = 1u << 12u, + SecurityChecks = 1u << 13u, + HotPatch = 1u << 14u, + CVTCIL = 1u << 15u, + MSILModule = 1u << 16u, + SDL = 1u << 17u, + PGO = 1u << 18u, + Exp = 1u << 19u + }; + PDB_DEFINE_BIT_OPERATORS(CompileSymbolFlags); + + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvconst.h#L324 + enum class PDB_NO_DISCARD CPUType : uint16_t + { + Intel8080 = 0x0, + Intel8086 = 0x1, + Intel80286 = 0x2, + Intel80386 = 0x3, + Intel80486 = 0x4, + Pentium = 0x5, + PentiumII = 0x6, + PentiumPro = PentiumII, + PentiumIII = 0x7, + MIPS = 0x10, + MIPSR4000 = MIPS, + MIPS16 = 0x11, + MIPS32 = 0x12, + MIPS64 = 0x13, + MIPSI = 0x14, + MIPSII = 0x15, + MIPSIII = 0x16, + MIPSIV = 0x17, + MIPSV = 0x18, + M68000 = 0x20, + M68010 = 0x21, + M68020 = 0x22, + M68030 = 0x23, + M68040 = 0x24, + Alpha = 0x30, + Alpha21164 = 0x31, + Alpha21164A = 0x32, + Alpha21264 = 0x33, + Alpha21364 = 0x34, + PPC601 = 0x40, + PPC603 = 0x41, + PPC604 = 0x42, + PPC620 = 0x43, + PPCFP = 0x44, + PPCBE = 0x45, + SH3 = 0x50, + SH3E = 0x51, + SH3DSP = 0x52, + SH4 = 0x53, + SHMedia = 0x54, + ARM3 = 0x60, + ARM4 = 0x61, + ARM4T = 0x62, + ARM5 = 0x63, + ARM5T = 0x64, + ARM6 = 0x65, + ARM_XMAC = 0x66, + ARM_WMMX = 0x67, + ARM7 = 0x68, + Omni = 0x70, + IA64 = 0x80, + IA64_1 = 0x80, + IA64_2 = 0x81, + CEE = 0x90, + AM33 = 0xA0, + M32R = 0xB0, + TriCore = 0xC0, + X64 = 0xD0, + AMD64 = X64, + EBC = 0xE0, + Thumb = 0xF0, + ARMNT = 0xF4, + ARM64 = 0xF6, + HybridX86ARM64 = 0xF7, + ARM64EC = 0xF8, + ARM64X = 0xF9, + D3D11_Shader = 0x100 + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3100 + // represents an address range, used for optimized code debug info + struct LocalVariableAddressRange // defines a range of addresses + { + uint32_t offsetStart; + uint16_t isectionStart; + uint16_t length; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3108 + // Represents the holes in overall address range, all address is pre-bbt. + // it is for compress and reduce the amount of relocations need. + struct LocalVariableAddressGap + { + uint16_t offset; // relative offset from the beginning of the live range. + uint16_t length; // length of this gap. + }; + + // https://github.com/microsoft/microsoft-pdb/blob/0fe89a942f9a0f8e061213313e438884f4c9b876/include/cvinfo.h#L4366 + // https://github.com/microsoft/microsoft-pdb/blob/0fe89a942f9a0f8e061213313e438884f4c9b876/cvdump/dumpsym7.cpp#L5518 + enum class ARMSwitchType : uint16_t + { + INT1 = 0, // signed byte + UINT1 = 1, // unsigned byte + INT2 = 2, // signed two byte + UINT2 = 3, // unsigned two byte + INT4 = 4, // signed four byte + UINT4 = 5, // unsigned four byte + POINTER = 6, + UINT1SHL1 = 7, // unsigned byte scaled by two + UINT2SHL1 = 8, // unsigned two byte scaled by two + INT1SHL1 = 9, // signed byte scaled by two + INT2SHL1 = 10, // signed two byte scaled by two + TBB = UINT1SHL1, + TBH = UINT2SHL1, + }; + + // https://llvm.org/docs/PDB/CodeViewTypes.html#leaf-types + struct RecordHeader + { + uint16_t size; // record length, not including this 2-byte field + SymbolRecordKind kind; // record kind + }; + + // all CodeView records are stored as a header, followed by variable-length data. + // internal Record structs such as S_PUB32, S_GDATA32, etc. correspond to the data layout of a CodeView record of that kind. + struct Record + { + RecordHeader header; + union Data + { +#pragma pack(push, 1) + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4069 + struct + { + uint32_t cbFrame; // count of bytes of total frame of procedure + uint32_t cbPad; // count of bytes of padding in the frame + uint32_t offPad; // offset (relative to frame poniter) to where + // padding starts + uint32_t cbSaveRegs; // count of bytes of callee save registers + uint32_t offExHdlr; // offset of exception handler + uint16_t sectExHdlr; // section id of exception handler + + struct { + uint32_t fHasAlloca : 1; // function uses _alloca() + uint32_t fHasSetJmp : 1; // function uses setjmp() + uint32_t fHasLongJmp : 1; // function uses longjmp() + uint32_t fHasInlAsm : 1; // function uses inline asm + uint32_t fHasEH : 1; // function has EH states + uint32_t fInlSpec : 1; // function was speced as inline + uint32_t fHasSEH : 1; // function has SEH + uint32_t fNaked : 1; // function is __declspec(naked) + uint32_t fSecurityChecks : 1; // function has buffer security check introduced by /GS. + uint32_t fAsyncEH : 1; // function compiled with /EHa + uint32_t fGSNoStackOrdering : 1; // function has /GS buffer checks, but stack ordering couldn't be done + uint32_t fWasInlined : 1; // function was inlined within another function + uint32_t fGSCheck : 1; // function is __declspec(strict_gs_check) + uint32_t fSafeBuffers : 1; // function is __declspec(safebuffers) + uint32_t encodedLocalBasePointer : 2; // record function's local pointer explicitly. + uint32_t encodedParamBasePointer : 2; // record function's parameter pointer explicitly. + uint32_t fPogoOn : 1; // function was compiled with PGO/PGU + uint32_t fValidCounts : 1; // Do we have valid Pogo counts? + uint32_t fOptSpeed : 1; // Did we optimize for speed? + uint32_t fGuardCF : 1; // function contains CFG checks (and no write checks) + uint32_t fGuardCFW : 1; // function contains CFW checks and/or instrumentation + uint32_t pad : 9; // must be zero + } flags; + } S_FRAMEPROC; + + struct + { + uint32_t offset; + uint16_t section; + uint16_t annotationsCount; // number of zero-terminated annotation strings + PDB_FLEXIBLE_ARRAY_MEMBER(char, annotations); // sequence of zero-terminated annotation strings + } S_ANNOTATIONSYM; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3696 + struct + { + PublicSymbolFlags flags; + uint32_t offset; + uint16_t section; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_PUB32; + + struct + { + uint32_t typeIndex; + uint32_t offset; + uint16_t section; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_GDATA32, S_GTHREAD32, S_LDATA32, S_LTHREAD32; + + struct + { + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_UNAMESPACE; + + struct + { + uint32_t signature; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_OBJNAME; + + struct + { + TrampolineType type; + uint16_t size; + uint32_t thunkOffset; + uint32_t targetOffset; + uint16_t thunkSection; + uint16_t targetSection; + } S_TRAMPOLINE; + + struct + { + uint16_t sectionNumber; + uint8_t alignment; + uint32_t rva; + uint32_t length; + uint32_t characteristics; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_SECTION; + + struct + { + uint32_t size; + uint32_t characteristics; + uint32_t offset; + uint16_t section; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_COFFGROUP; + + struct + { + uint32_t offset ; // offset of call site + uint16_t section; // section index of call site + uint16_t padding; // alignment padding field, must be zero + uint32_t typeIndex; // type index describing function signature + } S_CALLSITEINFO; + + struct + { + uint32_t offset; // Frame relative offset + uint16_t reg; // Register index + CookieType cookietype; // Type of the cookie + uint8_t flags; // Flags describing this cookie + } S_FRAMECOOKIE; + + struct + { + uint32_t parent; + uint32_t end; + uint32_t next; + uint32_t offset; + uint16_t section; + uint16_t length; + ThunkOrdinal thunk; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_THUNK32; + + struct + { + uint32_t parent; + uint32_t end; + uint32_t next; + uint32_t codeSize; + uint32_t debugStart; + uint32_t debugEnd; + uint32_t typeIndex; + uint32_t offset; + uint16_t section; + ProcedureFlags flags; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_LPROC32, S_GPROC32, S_LPROC32_ID, S_GPROC32_ID, S_LPROC32_DPC, S_LPROC32_DPC_ID; + + struct + { + uint32_t offset; + uint32_t typeIndex; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_BPRELSYM32; + + struct + { + uint32_t offset; + uint32_t typeIndex; + Register reg; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_REGREL32, S_REGREL32_ENCTMP; + + struct + { + uint32_t typeIndex; + Register reg; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_REGSYM; + + struct + { + uint32_t parent; + uint32_t end; + uint32_t codeSize; + uint32_t offset; + uint16_t section; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_BLOCK32; + + struct + { + uint32_t offset; + uint16_t section; + ProcedureFlags flags; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_LABEL32; + + struct + { + uint32_t typeIndex; + uint16_t value; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_CONSTANT; + + struct + { + uint32_t typeIndex; // refers to a type index in the IPI stream + } S_BUILDINFO; + + struct + { + uint32_t parent; // pointer to the inliner + uint32_t end; // pointer to this block's end + uint32_t inlinee; // CV_ItemId of inlinee + PDB_FLEXIBLE_ARRAY_MEMBER(uint8_t, binaryAnnotations); + } S_INLINESITE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4199 + struct + { + uint32_t typeIndex; // type index + uint32_t moduleFilenameOffset; // index of mod filename in stringtable + + struct + { + uint16_t fIsParam : 1; // variable is a parameter + uint16_t fAddrTaken : 1; // address is taken + uint16_t fCompGenx : 1; // variable is compiler generated + uint16_t fIsAggregate : 1; // the symbol is splitted in temporaries, + // which are treated by compiler as + // independent entities + uint16_t fIsAggregated : 1; // Counterpart of fIsAggregate - tells + // that it is a part of a fIsAggregate symbol + uint16_t fIsAliased : 1; // variable has multiple simultaneous lifetimes + uint16_t fIsAlias : 1; // represents one of the multiple simultaneous lifetimes + uint16_t fIsRetValue : 1; // represents a function return value + uint16_t fIsOptimizedOut : 1; // variable has no lifetimes + uint16_t fIsEnregGlob : 1; // variable is an enregistered global + uint16_t fIsEnregStat : 1; // variable is an enregistered static + uint16_t unused : 5; // must be zero + } flags; + + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_FILESTATIC; + + struct + { + CompileSymbolFlags flags; + CPUType machine; + uint16_t versionFrontendMajor; + uint16_t versionFrontendMinor; + uint16_t versionFrontendBuild; + uint16_t versionFrontendQFE; + uint16_t versionBackendMajor; + uint16_t versionBackendMinor; + uint16_t versionBackendBuild; + uint16_t versionBackendQFE; + PDB_FLEXIBLE_ARRAY_MEMBER(char, version); + } S_COMPILE3; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L3372 + struct + { + uint8_t flags; + PDB_FLEXIBLE_ARRAY_MEMBER(char, strings); + } S_ENVBLOCK; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4190 + struct + { + uint32_t typeIndex; + + struct + { + uint16_t fIsParam : 1; // variable is a parameter + uint16_t fAddrTaken : 1; // address is taken + uint16_t fCompGenx : 1; // variable is compiler generated + uint16_t fIsAggregate : 1; // the symbol is splitted in temporaries, + // which are treated by compiler as + // independent entities + uint16_t fIsAggregated : 1; // Counterpart of fIsAggregate - tells + // that it is a part of a fIsAggregate symbol + uint16_t fIsAliased : 1; // variable has multiple simultaneous lifetimes + uint16_t fIsAlias : 1; // represents one of the multiple simultaneous lifetimes + uint16_t fIsRetValue : 1; // represents a function return value + uint16_t fIsOptimizedOut : 1; // variable has no lifetimes + uint16_t fIsEnregGlob : 1; // variable is an enregistered global + uint16_t fIsEnregStat : 1; // variable is an enregistered static + uint16_t unused : 5; // must be zero + } flags; + + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_LOCAL; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4236 + struct + { + uint16_t reg; // Register to hold the value of the symbol + + struct + { + uint16_t maybe : 1; // May have no user name on one of control flow path. + uint16_t padding : 15; // Padding for future use. + } attribute; // Attribute of the register range. + + LocalVariableAddressRange range; // Range of addresses where this program is valid + PDB_FLEXIBLE_ARRAY_MEMBER(LocalVariableAddressGap, gaps); // The value is not available in following gaps. + } S_DEFRANGE_REGISTER; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4245 + struct + { + uint32_t offsetFramePointer; + LocalVariableAddressRange range; // Range of addresses where this program is valid + PDB_FLEXIBLE_ARRAY_MEMBER(LocalVariableAddressGap, gaps); // The value is not available in following gaps. + } S_DEFRANGE_FRAMEPOINTER_REL; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4265 + struct + { + uint16_t reg; // Register to hold the value of the symbol + + struct + { + uint16_t maybe : 1; // May have no user name on one of control flow path. + uint16_t padding : 15; // Padding for future use. + } attribute; // Attribute of the register range. + + uint32_t offsetParent : 12; // Offset in parent variable. + uint32_t padding : 20; // Padding for future use. + LocalVariableAddressRange range; // Range of addresses where this program is valid + PDB_FLEXIBLE_ARRAY_MEMBER(LocalVariableAddressGap, gaps); // The value is not available in following gaps. + } S_DEFRANGE_SUBFIELD_REGISTER; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4255 + struct + { + uint32_t offsetFramePointer; // offset to frame pointer + } S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4279 + struct + { + uint16_t baseRegister; // Register to hold the base pointer of the symbol + uint16_t spilledUDTMember : 1; // Spilled member for s.i. + uint16_t padding : 3; // Padding for future use. + uint16_t offsetParent : 12; // Offset in parent variable. + uint32_t offsetBasePointer; // offset to register + LocalVariableAddressRange range; // Range of addresses where this program is valid + PDB_FLEXIBLE_ARRAY_MEMBER(LocalVariableAddressGap, gaps); // The value is not available in following gaps. + } S_DEFRANGE_REGISTER_REL; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4500 + struct + { + uint32_t offset; // offset of call site + uint16_t section; // section index of call site + uint16_t instructionLength; // length of heap allocation call instruction + uint32_t typeIndex; // type index describing function signature + } S_HEAPALLOCSITE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4402 + struct + { + uint32_t offsetBase; // Section-relative offset to the base for switch offsets + uint16_t sectionBase; // Section index of the base for switch offsets + ARMSwitchType switchType; // type of each entry + uint32_t offsetBranch; // Section-relative offset to the table branch instruction + uint32_t offsetTable; // Section-relative offset to the start of the table + uint16_t sectionBranch; // Section index of the table branch instruction + uint16_t sectionTable; // Section index of the table + uint32_t numEntries; // number of switch table entries + } S_ARMSWITCHTABLE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4382 + struct + { + uint32_t count; // Number of functions + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, funcs); // List of functions, dim == count + // uint32_t invocations[CV_ZEROLEN]; Followed by a parallel array of + // invocation counts. Counts > reclen are assumed to be zero + } S_CALLERS, S_CALLEES, S_INLINEES; + + struct + { + uint32_t typeIndex; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } S_UDT, S_UDT_ST; + + struct + { + uint32_t unknown1; + uint32_t typeIndex; + uint32_t unknown2; + Register reg; + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + + } S_REGREL32_INDIR; +#pragma pack(pop) + } data; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4576 + enum class PDB_NO_DISCARD DebugSubsectionKind : uint32_t + { + S_IGNORE = 0x80000000, // if this bit is set in a subsection type then ignore the subsection contents + + S_SYMBOLS = 0xF1, + S_LINES = 0xF2, + S_STRINGTABLE = 0xF3, + S_FILECHECKSUMS = 0xF4, + S_FRAMEDATA = 0xF5, + S_INLINEELINES = 0xF6, + S_CROSSSCOPEIMPORTS = 0xF7, + S_CROSSSCOPEEXPORTS = 0xF8, + + S_IL_LINES = 0xF9, + S_FUNC_MDTOKEN_MAP = 0xFA, + S_TYPE_MDTOKEN_MAP = 0xFB, + S_MERGED_ASSEMBLYINPUT = 0xFC, + + S_COFF_SYMBOL_RVA = 0xFD, + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4596 + struct DebugSubsectionHeader + { + DebugSubsectionKind kind; + uint32_t size; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4617 + struct Line + { + uint32_t offset; // Offset to start of code bytes for line number + uint32_t linenumStart : 24; // line where statement/expression starts + uint32_t deltaLineEnd : 7; // delta to line where statement ends (optional) + uint32_t fStatement : 1; // true if a statement linenumber, else an expression line num + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4630 + struct Column + { + uint16_t start; + uint16_t end; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4601 + struct LinesHeader + { + uint32_t sectionOffset; + uint16_t sectionIndex; + struct + { + uint16_t fHasColumns : 1; + uint16_t pad : 15; + } flags; + + uint32_t codeSize; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4608 + struct LinesFileBlockHeader + { + uint32_t fileChecksumOffset; + uint32_t numLines; + uint32_t size; + // Line lines[numLines]; + // Column columns[numLines]; Might not be present + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvconst.h#L88 + enum class PDB_NO_DISCARD ChecksumKind : uint8_t + { + None = 0, + MD5 = 1, + SHA1 = 2, + SHA256 = 3, + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/cvdump/dumpsym7.cpp#L1097 + struct FileChecksumHeader + { + uint32_t filenameOffset; + uint8_t checksumSize; + ChecksumKind checksumKind; + PDB_FLEXIBLE_ARRAY_MEMBER(uint8_t, checksum); + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4822 + enum class InlineeSourceLineKind : uint32_t + { + Signature = 0, + SignatureEx = 1, + }; + + struct InlineeSourceLineHeader + { + InlineeSourceLineKind kind; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L4825 + struct InlineeSourceLine + { + uint32_t inlinee; + uint32_t fileChecksumOffset; + uint32_t lineNumber; + }; + + struct InlineeSourceLineEx + { + uint32_t inlinee; + uint32_t fileChecksumOffset; + uint32_t lineNumber; + uint32_t extraLines; + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, extrafileChecksumOffsets); + }; + + // Combine DebugSubsectionHeader and first subsection header into one struct. + struct LineSection + { + DebugSubsectionHeader header; + union + { + LinesHeader linesHeader; + FileChecksumHeader checksumHeader; + InlineeSourceLineHeader inlineeHeader; + }; + }; + } + } +} diff --git a/Includes/raw_pdb/src/PDB_DirectMSFStream.cpp b/Includes/raw_pdb/src/PDB_DirectMSFStream.cpp new file mode 100644 index 0000000..442dc76 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_DirectMSFStream.cpp @@ -0,0 +1,115 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_DirectMSFStream.h" +#include "Foundation/PDB_PointerUtil.h" +#include "Foundation/PDB_BitUtil.h" +#include "Foundation/PDB_Assert.h" +#include "Foundation/PDB_CRT.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::DirectMSFStream::DirectMSFStream(void) PDB_NO_EXCEPT + : m_data(nullptr) + , m_blockIndices(nullptr) + , m_blockSize(0u) + , m_size(0u) + , m_blockSizeLog2(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::DirectMSFStream::DirectMSFStream(const void* data, uint32_t blockSize, const uint32_t* blockIndices, uint32_t streamSize) PDB_NO_EXCEPT + : m_data(data) + , m_blockIndices(blockIndices) + , m_blockSize(blockSize) + , m_size(streamSize) + , m_blockSizeLog2(BitUtil::FindFirstSetBit(blockSize)) +{ + PDB_ASSERT(BitUtil::IsPowerOfTwo(blockSize), "MSF block size must be a power of two."); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +void PDB::DirectMSFStream::ReadAtOffset(void* destination, size_t size, size_t offset) const PDB_NO_EXCEPT +{ + PDB_ASSERT(destination != nullptr, "Destination buffer not set"); + PDB_ASSERT(offset + size <= m_size, "Not enough data left to read."); + + // work out which block and offset within the block the read offset corresponds to + size_t blockIndex = offset >> m_blockSizeLog2; + const size_t offsetWithinBlock = offset & (m_blockSize - 1u); + + // work out the offset within the data based on the block indices + size_t offsetWithinData = (static_cast(m_blockIndices[blockIndex]) << m_blockSizeLog2) + offsetWithinBlock; + const size_t bytesLeftInBlock = m_blockSize - offsetWithinBlock; + + if (bytesLeftInBlock >= size) + { + // fast path, all the data can be read in one go + const void* const sourceData = Pointer::Offset(m_data, offsetWithinData); + memcpy(destination, sourceData, size); + } + else + { + // slower path, data is scattered across several blocks. + // read remaining bytes in current block first. + { + const void* const sourceData = Pointer::Offset(m_data, offsetWithinData); + memcpy(destination, sourceData, bytesLeftInBlock); + } + + // read remaining bytes from blocks + size_t bytesLeftToRead = size - bytesLeftInBlock; + while (bytesLeftToRead != 0u) + { + // advance to the next block + ++blockIndex; + offsetWithinData = static_cast(m_blockIndices[blockIndex]) << m_blockSizeLog2; + + void* const destinationData = Pointer::Offset(destination, size - bytesLeftToRead); + const void* const sourceData = Pointer::Offset(m_data, offsetWithinData); + + if (bytesLeftToRead > m_blockSize) + { + // copy a whole block at once + memcpy(destinationData, sourceData, m_blockSize); + bytesLeftToRead -= m_blockSize; + } + else + { + // copy remaining bytes + memcpy(destinationData, sourceData, bytesLeftToRead); + bytesLeftToRead -= bytesLeftToRead; + } + } + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::DirectMSFStream::IndexAndOffset PDB::DirectMSFStream::GetBlockIndexForOffset(uint32_t offset) const PDB_NO_EXCEPT +{ + // work out which block and offset within the block the offset corresponds to + const uint32_t blockIndex = offset >> m_blockSizeLog2; + const uint32_t offsetWithinBlock = offset & (m_blockSize - 1u); + + return IndexAndOffset { blockIndex, offsetWithinBlock }; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD size_t PDB::DirectMSFStream::GetDataOffsetForIndexAndOffset(const IndexAndOffset& indexAndOffset) const PDB_NO_EXCEPT +{ + // work out the offset within the data based on the block indices + const size_t offsetWithinData = (static_cast(m_blockIndices[indexAndOffset.index]) << m_blockSizeLog2) + indexAndOffset.offsetWithinBlock; + + return offsetWithinData; +} diff --git a/Includes/raw_pdb/src/PDB_DirectMSFStream.h b/Includes/raw_pdb/src/PDB_DirectMSFStream.h new file mode 100644 index 0000000..7002459 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_DirectMSFStream.h @@ -0,0 +1,84 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" + + +// https://llvm.org/docs/PDB/index.html#the-msf-container +// https://llvm.org/docs/PDB/MsfFile.html +namespace PDB +{ + // provides direct access to the data of an MSF stream. + // inherently thread-safe, the stream doesn't carry any internal offset or similar. + // trivial to construct. + // slower individual reads, but pays off when not all data of a stream is needed. + class PDB_NO_DISCARD DirectMSFStream + { + public: + DirectMSFStream(void) PDB_NO_EXCEPT; + explicit DirectMSFStream(const void* data, uint32_t blockSize, const uint32_t* blockIndices, uint32_t streamSize) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(DirectMSFStream); + + // Reads a number of bytes from the stream. + void ReadAtOffset(void* destination, size_t size, size_t offset) const PDB_NO_EXCEPT; + + // Reads from the stream. + template + PDB_NO_DISCARD inline T ReadAtOffset(size_t offset) const PDB_NO_EXCEPT + { + T data; + ReadAtOffset(&data, sizeof(T), offset); + return data; + } + + // Returns the block size of the stream. + PDB_NO_DISCARD inline uint32_t GetBlockSize(void) const PDB_NO_EXCEPT + { + return m_blockSize; + } + + // Returns the size of the stream. + PDB_NO_DISCARD inline uint32_t GetSize(void) const PDB_NO_EXCEPT + { + return m_size; + } + + private: + friend class CoalescedMSFStream; + + struct IndexAndOffset + { + uint32_t index; + uint32_t offsetWithinBlock; + }; + + // Returns the block index and offset within the block that correspond to the given offset. + PDB_NO_DISCARD IndexAndOffset GetBlockIndexForOffset(uint32_t offset) const PDB_NO_EXCEPT; + + // Returns the offset into the data that corresponds to the given indices and offset within a block. + PDB_NO_DISCARD size_t GetDataOffsetForIndexAndOffset(const IndexAndOffset& indexAndOffset) const PDB_NO_EXCEPT; + + // Provides read-only access to the memory-mapped data. + PDB_NO_DISCARD inline const void* GetData(void) const PDB_NO_EXCEPT + { + return m_data; + } + + // Provides read-only access to the block indices. + PDB_NO_DISCARD inline const uint32_t* GetBlockIndices(void) const PDB_NO_EXCEPT + { + return m_blockIndices; + } + + const void* m_data; + const uint32_t* m_blockIndices; + uint32_t m_blockSize; + uint32_t m_size; + uint32_t m_blockSizeLog2; + + PDB_DISABLE_COPY(DirectMSFStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_ErrorCodes.h b/Includes/raw_pdb/src/PDB_ErrorCodes.h new file mode 100644 index 0000000..bac0c73 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ErrorCodes.h @@ -0,0 +1,26 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" + + +namespace PDB +{ + enum class PDB_NO_DISCARD ErrorCode : unsigned int + { + Success = 0u, + + // main PDB validation + InvalidDataSize, + InvalidSuperBlock, + InvalidFreeBlockMap, + + // stream validation + InvalidStream, + InvalidSignature, + InvalidStreamIndex, + UnknownVersion + }; +} diff --git a/Includes/raw_pdb/src/PDB_GlobalSymbolStream.cpp b/Includes/raw_pdb/src/PDB_GlobalSymbolStream.cpp new file mode 100644 index 0000000..461b869 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_GlobalSymbolStream.cpp @@ -0,0 +1,43 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_GlobalSymbolStream.h" +#include "PDB_RawFile.h" +#include "PDB_Types.h" +#include "PDB_DBITypes.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::GlobalSymbolStream::GlobalSymbolStream(void) PDB_NO_EXCEPT + : m_stream() + , m_hashRecords(nullptr) + , m_count(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::GlobalSymbolStream::GlobalSymbolStream(const RawFile& file, uint16_t streamIndex, uint32_t count) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(streamIndex)) + , m_hashRecords(m_stream.GetDataAtOffset(sizeof(HashTableHeader))) + , m_count(count) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD const PDB::CodeView::DBI::Record* PDB::GlobalSymbolStream::GetRecord(const CoalescedMSFStream& symbolRecordStream, const HashRecord& hashRecord) const PDB_NO_EXCEPT +{ + // hash record offsets start at 1, not at 0 + const uint32_t headerOffset = hashRecord.offset - 1u; + + // the offset doesn't point to the global symbol directly, but to the CodeView record: + // https://llvm.org/docs/PDB/CodeViewSymbols.html + const CodeView::DBI::Record* record = symbolRecordStream.GetDataAtOffset(headerOffset); + + return record; +} diff --git a/Includes/raw_pdb/src/PDB_GlobalSymbolStream.h b/Includes/raw_pdb/src/PDB_GlobalSymbolStream.h new file mode 100644 index 0000000..cb58777 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_GlobalSymbolStream.h @@ -0,0 +1,49 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class RawFile; + struct HashRecord; + + namespace CodeView + { + namespace DBI + { + struct Record; + } + } + + + class PDB_NO_DISCARD GlobalSymbolStream + { + public: + GlobalSymbolStream(void) PDB_NO_EXCEPT; + explicit GlobalSymbolStream(const RawFile& file, uint16_t streamIndex, uint32_t count) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(GlobalSymbolStream); + + // Turns a given hash record into a DBI record using the given symbol stream. + PDB_NO_DISCARD const CodeView::DBI::Record* GetRecord(const CoalescedMSFStream& symbolRecordStream, const HashRecord& hashRecord) const PDB_NO_EXCEPT; + + // Returns a view of all the records in the stream. + PDB_NO_DISCARD inline ArrayView GetRecords(void) const PDB_NO_EXCEPT + { + return ArrayView(m_hashRecords, m_count); + } + + private: + CoalescedMSFStream m_stream; + const HashRecord* m_hashRecords; + uint32_t m_count; + + PDB_DISABLE_COPY(GlobalSymbolStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_IPIStream.cpp b/Includes/raw_pdb/src/PDB_IPIStream.cpp new file mode 100644 index 0000000..dfd21bc --- /dev/null +++ b/Includes/raw_pdb/src/PDB_IPIStream.cpp @@ -0,0 +1,140 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_IPIStream.h" +#include "PDB_RawFile.h" +#include "PDB_Util.h" +#include "PDB_DirectMSFStream.h" +#include "PDB_InfoStream.h" +#include "Foundation/PDB_Memory.h" + +namespace +{ + // the IPI stream always resides at index 4 + static constexpr const uint32_t IPIStreamIndex = 4u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::IPIStream::IPIStream(void) PDB_NO_EXCEPT + : m_header() + , m_stream() + , m_records(nullptr) + , m_recordCount(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::IPIStream::IPIStream(IPIStream&& other) PDB_NO_EXCEPT + : m_header(PDB_MOVE(other.m_header)) + , m_stream(PDB_MOVE(other.m_stream)) + , m_records(PDB_MOVE(other.m_records)) + , m_recordCount(PDB_MOVE(other.m_recordCount)) +{ + other.m_records = nullptr; + other.m_recordCount = 0u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::IPIStream& PDB::IPIStream::operator=(IPIStream&& other) PDB_NO_EXCEPT +{ + if (this != &other) + { + PDB_DELETE_ARRAY(m_records); + + m_header = PDB_MOVE(other.m_header); + m_stream = PDB_MOVE(other.m_stream); + m_records = PDB_MOVE(other.m_records); + m_recordCount = PDB_MOVE(other.m_recordCount); + + other.m_records = nullptr; + other.m_recordCount = 0u; + } + + return *this; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::IPIStream::IPIStream(const RawFile& file, const IPI::StreamHeader& header) PDB_NO_EXCEPT + : m_header(header) + , m_stream(file.CreateMSFStream(IPIStreamIndex)) + , m_records(nullptr) + , m_recordCount(GetLastTypeIndex() - GetFirstTypeIndex()) +{ + // types in the IPI stream are accessed by their index from other streams. + // however, the index is not stored with types in the IPI stream directly, but has to be built while walking the stream. + // similarly, because types are variable-length records, there are no direct offsets to access individual types. + // we therefore walk the IPI stream once, and store pointers to the records for trivial O(N) array lookup by index later. + m_records = PDB_NEW_ARRAY(const CodeView::IPI::Record*, m_recordCount); + + // ignore the stream's header + size_t offset = sizeof(IPI::StreamHeader); + + // parse the CodeView records + uint32_t typeIndex = 0u; + while (offset < m_stream.GetSize()) + { + // https://llvm.org/docs/PDB/CodeViewTypes.html + const CodeView::IPI::Record* record = m_stream.GetDataAtOffset(offset); + const uint32_t recordSize = GetCodeViewRecordSize(record); + m_records[typeIndex] = record; + + // position the stream offset at the next record + offset += sizeof(CodeView::IPI::RecordHeader) + recordSize; + + ++typeIndex; + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::IPIStream::~IPIStream(void) PDB_NO_EXCEPT +{ + PDB_DELETE_ARRAY(m_records); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::HasValidIPIStream(const RawFile& file) PDB_NO_EXCEPT +{ + const PDB::InfoStream infoStream(file); + if (!infoStream.HasIPIStream()) + { + return ErrorCode::InvalidStream; + } + + DirectMSFStream stream = file.CreateMSFStream(IPIStreamIndex); + if (stream.GetSize() < sizeof(IPI::StreamHeader)) + { + return ErrorCode::InvalidStream; + } + + const IPI::StreamHeader header = stream.ReadAtOffset(0u); + if (header.version != IPI::StreamHeader::Version::V80) + { + return ErrorCode::UnknownVersion; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::IPIStream PDB::CreateIPIStream(const RawFile& file) PDB_NO_EXCEPT +{ + DirectMSFStream stream = file.CreateMSFStream(IPIStreamIndex); + + const IPI::StreamHeader header = stream.ReadAtOffset(0u); + return IPIStream { file, header }; +} diff --git a/Includes/raw_pdb/src/PDB_IPIStream.h b/Includes/raw_pdb/src/PDB_IPIStream.h new file mode 100644 index 0000000..0efdbe2 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_IPIStream.h @@ -0,0 +1,66 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_ErrorCodes.h" +#include "PDB_IPITypes.h" +#include "PDB_CoalescedMSFStream.h" + + +// PDB IPI stream +// https://llvm.org/docs/PDB/TpiStream.html +namespace PDB +{ + class RawFile; + + + class PDB_NO_DISCARD IPIStream + { + public: + IPIStream(void) PDB_NO_EXCEPT; + IPIStream(IPIStream&& other) PDB_NO_EXCEPT; + IPIStream& operator=(IPIStream&& other) PDB_NO_EXCEPT; + + explicit IPIStream(const RawFile& file, const IPI::StreamHeader& header) PDB_NO_EXCEPT; + ~IPIStream(void) PDB_NO_EXCEPT; + + // Returns the index of the first type, which is not necessarily zero. + PDB_NO_DISCARD inline uint32_t GetFirstTypeIndex(void) const PDB_NO_EXCEPT + { + return m_header.typeIndexBegin; + } + + // Returns the index of the last type. + PDB_NO_DISCARD inline uint32_t GetLastTypeIndex(void) const PDB_NO_EXCEPT + { + return m_header.typeIndexEnd; + } + + // Returns a view of all type records. + // Records identified by a type index can be accessed via "allRecords[typeIndex - firstTypeIndex]". + PDB_NO_DISCARD inline ArrayView GetTypeRecords(void) const PDB_NO_EXCEPT + { + return ArrayView(m_records, m_recordCount); + } + + private: + IPI::StreamHeader m_header; + CoalescedMSFStream m_stream; + const CodeView::IPI::Record** m_records; + size_t m_recordCount; + + PDB_DISABLE_COPY(IPIStream); + }; + + + // ------------------------------------------------------------------------------------------------ + // General + // ------------------------------------------------------------------------------------------------ + + PDB_NO_DISCARD ErrorCode HasValidIPIStream(const RawFile& file) PDB_NO_EXCEPT; + + PDB_NO_DISCARD IPIStream CreateIPIStream(const RawFile& file) PDB_NO_EXCEPT; +} diff --git a/Includes/raw_pdb/src/PDB_IPITypes.h b/Includes/raw_pdb/src/PDB_IPITypes.h new file mode 100644 index 0000000..c9c4e08 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_IPITypes.h @@ -0,0 +1,144 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" + + +namespace PDB +{ + namespace IPI + { + // https://llvm.org/docs/PDB/TpiStream.html#tpi-header + struct StreamHeader + { + enum class PDB_NO_DISCARD Version : uint32_t + { + V40 = 19950410u, + V41 = 19951122u, + V50 = 19961031u, + V70 = 19990903u, + V80 = 20040203u + }; + + Version version; + uint32_t headerSize; + uint32_t typeIndexBegin; + uint32_t typeIndexEnd; + uint32_t typeRecordBytes; + uint16_t hashStreamIndex; + uint16_t hashAuxStreamIndex; + uint32_t hashKeySize; + uint32_t hashBucketCount; + uint32_t hashValueBufferOffset; + uint32_t hashValueBufferLength; + uint32_t indexOffsetBufferOffset; + uint32_t indexOffsetBufferLength; + uint32_t hashAdjBufferOffset; + uint32_t hashAdjBufferLength; + }; + } + + + namespace CodeView + { + namespace IPI + { + // code view type records that can appear in an IPI stream + // https://llvm.org/docs/PDB/CodeViewTypes.html + // https://llvm.org/docs/PDB/TpiStream.html#tpi-vs-ipi-stream + enum class PDB_NO_DISCARD TypeRecordKind : uint16_t + { + LF_FUNC_ID = 0x1601u, // global function ID + LF_MFUNC_ID = 0x1602u, // member function ID + LF_BUILDINFO = 0x1603u, // build information + LF_SUBSTR_LIST = 0x1604u, // similar to LF_ARGLIST for a list of substrings + LF_STRING_ID = 0x1605u, // string ID + LF_UDT_SRC_LINE = 0x1606u, // source and line on where an UDT (User Defined Type) is defined, generated by the compiler + LF_UDT_MOD_SRC_LINE = 0x1607u // module, source and line on where an UDT is defined, generated by the linker + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1715 + enum class PDB_NO_DISCARD BuildInfoType : uint8_t + { + CurrentDirectory, // compiler working directory + BuildTool, // tool path + SourceFile, // path to source file, relative or absolute + TypeServerPDB, // path to PDB file + CommandLine // command-line used to build the source file + }; + + struct RecordHeader + { + uint16_t size; // record length, not including this 2-byte field + TypeRecordKind kind; // record kind + }; + + // all CodeView records are stored as a header, followed by variable-length data. + // internal Record structs such as S_PUB32, S_GDATA32, etc. correspond to the data layout of a CodeView record of that kind. + struct Record + { + RecordHeader header; + union Data + { +#pragma pack(push, 1) + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1680 + struct + { + uint32_t scopeId; // parent scope of the ID, 0 if global + uint32_t typeIndex; // function type + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } LF_FUNC_ID; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1687 + struct + { + uint32_t parentTypeIndex; // parent scope of the ID, 0 if global + uint32_t typeIndex; // function type + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } LF_MFUNC_ID; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1694 + struct + { + uint32_t id; // ID to list of sub-string IDs + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } LF_STRING_ID; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1700 + struct + { + uint32_t typeIndex; // UDT's type index + uint32_t stringIndex; // index to LF_STRING_ID record where source file name is saved + uint32_t line; // line number + } LF_UDT_SRC_LINE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1707 + struct + { + uint32_t typeIndex; // UDT's type index + uint32_t stringIndex; // index into '/names' string table where source file name is saved + uint32_t line; // line number + uint16_t moduleIndex; // module that contributes this UDT definition + } LF_UDT_MOD_SRC_LINE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2043 + struct + { + uint32_t count; + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, typeIndices); + } LF_SUBSTR_LIST; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1726 + struct + { + uint16_t count; + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, typeIndices); + } LF_BUILDINFO; +#pragma pack(pop) + } data; + }; + } + } +} diff --git a/Includes/raw_pdb/src/PDB_ImageSectionStream.cpp b/Includes/raw_pdb/src/PDB_ImageSectionStream.cpp new file mode 100644 index 0000000..3d495ad --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ImageSectionStream.cpp @@ -0,0 +1,47 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_ImageSectionStream.h" +#include "PDB_RawFile.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ImageSectionStream::ImageSectionStream(void) PDB_NO_EXCEPT + : m_stream() + , m_headers(nullptr) + , m_count(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ImageSectionStream::ImageSectionStream(const RawFile& file, uint16_t streamIndex) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(streamIndex)) + , m_headers(m_stream.GetDataAtOffset(0u)) + , m_count(m_stream.GetSize() / sizeof(IMAGE_SECTION_HEADER)) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD uint32_t PDB::ImageSectionStream::ConvertSectionOffsetToRVA(uint16_t oneBasedSectionIndex, uint32_t offsetInSection) const PDB_NO_EXCEPT +{ + if (oneBasedSectionIndex == 0u) + { + // should never happen, but prevent underflow + return 0u; + } + else if (oneBasedSectionIndex > m_count) + { + // this symbol is "contained" in a section that is neither part of the PDB, nor the EXE. + // it is a special compiler-generated or linker-generated symbol such as CFG symbols (e.g. __guard_fids_count, __guard_flags). + // we can safely ignore those symbols. + return 0u; + } + + return m_headers[oneBasedSectionIndex - 1u].VirtualAddress + offsetInSection; +} diff --git a/Includes/raw_pdb/src/PDB_ImageSectionStream.h b/Includes/raw_pdb/src/PDB_ImageSectionStream.h new file mode 100644 index 0000000..190c722 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ImageSectionStream.h @@ -0,0 +1,42 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_Types.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class RawFile; + struct IMAGE_SECTION_HEADER; + + + class PDB_NO_DISCARD ImageSectionStream + { + public: + ImageSectionStream(void) PDB_NO_EXCEPT; + explicit ImageSectionStream(const RawFile& file, uint16_t streamIndex) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(ImageSectionStream); + + // Converts a one-based section offset into an RVA. + PDB_NO_DISCARD uint32_t ConvertSectionOffsetToRVA(uint16_t oneBasedSectionIndex, uint32_t offsetInSection) const PDB_NO_EXCEPT; + + // Returns a view of all the sections in the stream. + PDB_NO_DISCARD inline ArrayView GetImageSections(void) const PDB_NO_EXCEPT + { + return ArrayView(m_headers, m_count); + } + + private: + CoalescedMSFStream m_stream; + const IMAGE_SECTION_HEADER* m_headers; + size_t m_count; + + PDB_DISABLE_COPY(ImageSectionStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_InfoStream.cpp b/Includes/raw_pdb/src/PDB_InfoStream.cpp new file mode 100644 index 0000000..fa16600 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_InfoStream.cpp @@ -0,0 +1,102 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_InfoStream.h" +#include "PDB_RawFile.h" +#include "Foundation/PDB_CRT.h" + +namespace +{ + // the PDB info stream always resides at index 1 + static constexpr const uint32_t InfoStreamIndex = 1u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::InfoStream::InfoStream(void) PDB_NO_EXCEPT + : m_stream() + , m_header(nullptr) + , m_namesStreamIndex(0) + , m_usesDebugFastlink(false) + , m_hasIPIStream(false) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::InfoStream::InfoStream(const RawFile& file) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(InfoStreamIndex)) + , m_header(m_stream.GetDataAtOffset(0u)) + , m_namesStreamIndex(0) + , m_usesDebugFastlink(false) + , m_hasIPIStream(false) +{ + // the info stream starts with the header, followed by the named stream map, followed by the feature codes + // https://llvm.org/docs/PDB/PdbStream.html#named-stream-map + size_t streamOffset = sizeof(Header); + + const NamedStreamMap* namedStreamMap = m_stream.GetDataAtOffset(streamOffset); + streamOffset += sizeof(NamedStreamMap) + namedStreamMap->length; + + const SerializedHashTable::Header* hashTableHeader = m_stream.GetDataAtOffset(streamOffset); + streamOffset += sizeof(SerializedHashTable::Header); + + const SerializedHashTable::BitVector* presentBitVector = m_stream.GetDataAtOffset(streamOffset); + streamOffset += sizeof(SerializedHashTable::BitVector) + sizeof(uint32_t) * presentBitVector->wordCount; + + const SerializedHashTable::BitVector* deletedBitVector = m_stream.GetDataAtOffset(streamOffset); + streamOffset += sizeof(SerializedHashTable::BitVector) + sizeof(uint32_t) * deletedBitVector->wordCount; + + // the hash table entries can be used to identify the indices of certain common streams like: + // "/UDTSRCLINEUNDONE" + // "/src/headerblock" + // "/LinkInfo" + // "/TMCache" + // "/names" + + const NamedStreamMap::HashTableEntry* namedStreamMapHashEntries = m_stream.GetDataAtOffset(streamOffset); + + // Find "/names" stream, used to look up filenames for lines. + for (uint32_t i = 0, size = hashTableHeader->size; i < size; ++i) + { + const NamedStreamMap::HashTableEntry& entry = namedStreamMapHashEntries[i]; + const char* streamName = &namedStreamMap->stringTable[entry.stringTableOffset]; + + if (strcmp("/names", streamName) == 0) + { + m_namesStreamIndex = entry.streamIndex; + } + } + + streamOffset += sizeof(NamedStreamMap::HashTableEntry) * hashTableHeader->size; + + // read feature codes by consuming remaining bytes + // https://llvm.org/docs/PDB/PdbStream.html#pdb-feature-codes + const FeatureCode* featureCodes = m_stream.GetDataAtOffset(streamOffset); + const size_t remainingBytes = m_stream.GetSize() - streamOffset; + const size_t count = remainingBytes / sizeof(FeatureCode); + + for (size_t i=0u; i < count; ++i) + { + FeatureCode code = featureCodes[i]; + if (code == PDB::FeatureCode::MinimalDebugInfo) + { + m_usesDebugFastlink = true; + } + else if (code == PDB::FeatureCode::VC110 || code == PDB::FeatureCode::VC140) + { + m_hasIPIStream = true; + } + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::NamesStream PDB::InfoStream::CreateNamesStream(const RawFile& file) const PDB_NO_EXCEPT +{ + return NamesStream(file, m_namesStreamIndex); +} diff --git a/Includes/raw_pdb/src/PDB_InfoStream.h b/Includes/raw_pdb/src/PDB_InfoStream.h new file mode 100644 index 0000000..9e15ebc --- /dev/null +++ b/Includes/raw_pdb/src/PDB_InfoStream.h @@ -0,0 +1,62 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "PDB_Types.h" +#include "PDB_CoalescedMSFStream.h" +#include "PDB_NamesStream.h" + +namespace PDB +{ + class RawFile; + + + // PDB Info Stream + // https://llvm.org/docs/PDB/PdbStream.html + class PDB_NO_DISCARD InfoStream + { + public: + InfoStream(void) PDB_NO_EXCEPT; + explicit InfoStream(const RawFile& file) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(InfoStream); + + // Returns the header of the stream. + PDB_NO_DISCARD inline const Header* GetHeader(void) const PDB_NO_EXCEPT + { + return m_header; + } + + // Returns whether the module has a names stream. + PDB_NO_DISCARD inline bool HasNamesStream(void) const PDB_NO_EXCEPT + { + return (m_namesStreamIndex != 0u); + } + + // Returns whether the PDB file was linked using /DEBUG:FASTLINK. + PDB_NO_DISCARD inline bool UsesDebugFastLink(void) const PDB_NO_EXCEPT + { + return m_usesDebugFastlink; + } + + // Returns whether the PDB file has an IPI stream. + PDB_NO_DISCARD inline bool HasIPIStream(void) const PDB_NO_EXCEPT + { + return m_hasIPIStream; + } + + // Create names stream + PDB_NO_DISCARD NamesStream CreateNamesStream(const RawFile& file) const PDB_NO_EXCEPT; + + private: + CoalescedMSFStream m_stream; + const Header* m_header; + uint32_t m_namesStreamIndex; + bool m_usesDebugFastlink; + bool m_hasIPIStream; + + PDB_DISABLE_COPY(InfoStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_ModuleInfoStream.cpp b/Includes/raw_pdb/src/PDB_ModuleInfoStream.cpp new file mode 100644 index 0000000..86040ff --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ModuleInfoStream.cpp @@ -0,0 +1,184 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_ModuleInfoStream.h" +#include "Foundation/PDB_Memory.h" +#include "Foundation/PDB_CRT.h" + +namespace +{ + static constexpr const char* LinkerSymbolName("* Linker *"); + + + // ------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------ + PDB_NO_DISCARD static inline size_t EstimateModuleCount(size_t streamSize) PDB_NO_EXCEPT + { + // work out how many modules are stored in the stream at most. + // the module info is stored in variable-length records, so we can't determine the exact number without walking the stream. + return streamSize / sizeof(PDB::DBI::ModuleInfo); + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream::Module::Module(void) PDB_NO_EXCEPT + : m_info(nullptr) + , m_name(nullptr) + , m_nameLength(0u) + , m_objectName(nullptr) + , m_objectNameLength(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream::Module::Module(const DBI::ModuleInfo* info, const char* name, size_t nameLength, const char* objectName, size_t objectNameLength) PDB_NO_EXCEPT + : m_info(info) + , m_name(name) + , m_nameLength(nameLength) + , m_objectName(objectName) + , m_objectNameLength(objectNameLength) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD bool PDB::ModuleInfoStream::Module::HasSymbolStream(void) const PDB_NO_EXCEPT +{ + const uint16_t streamIndex = m_info->moduleSymbolStreamIndex; + + // some modules don't have a symbol stream, i.e. no additional debug information is present. + // this usually happens when private symbols are stripped from a PDB. + return (streamIndex != 0xFFFFu); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD bool PDB::ModuleInfoStream::Module::HasLineStream(void) const PDB_NO_EXCEPT +{ + return (m_info->c13Size > 0); +} + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ModuleSymbolStream PDB::ModuleInfoStream::Module::CreateSymbolStream(const RawFile& file) const PDB_NO_EXCEPT +{ + PDB_ASSERT(HasSymbolStream(), "Module symbol stream index is invalid."); + + return ModuleSymbolStream(file, m_info->moduleSymbolStreamIndex, m_info->symbolSize); +} + +PDB_NO_DISCARD PDB::ModuleLineStream PDB::ModuleInfoStream::Module::CreateLineStream(const RawFile& file) const PDB_NO_EXCEPT +{ + PDB_ASSERT(HasLineStream(), "Module line stream is not present."); + + return ModuleLineStream(file, m_info->moduleSymbolStreamIndex, m_info->symbolSize + m_info->c11Size + m_info->c13Size, m_info->symbolSize + m_info->c11Size); +} + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream::ModuleInfoStream(void) PDB_NO_EXCEPT + : m_stream() + , m_modules(nullptr) + , m_moduleCount(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream::ModuleInfoStream(ModuleInfoStream&& other) PDB_NO_EXCEPT + : m_stream(PDB_MOVE(other.m_stream)) + , m_modules(PDB_MOVE(other.m_modules)) + , m_moduleCount(PDB_MOVE(other.m_moduleCount)) +{ + other.m_modules = nullptr; + other.m_moduleCount = 0u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream& PDB::ModuleInfoStream::operator=(ModuleInfoStream&& other) PDB_NO_EXCEPT +{ + if (this != &other) + { + PDB_DELETE_ARRAY(m_modules); + + m_stream = PDB_MOVE(other.m_stream); + m_modules = PDB_MOVE(other.m_modules); + m_moduleCount = PDB_MOVE(other.m_moduleCount); + + other.m_modules = nullptr; + other.m_moduleCount = 0u; + } + + return *this; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream::ModuleInfoStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT + : m_stream(directStream, size, offset) + , m_modules(nullptr) + , m_moduleCount(0u) +{ + m_modules = PDB_NEW_ARRAY(Module, EstimateModuleCount(size)); + + size_t streamOffset = 0u; + while (streamOffset < size) + { + const DBI::ModuleInfo* moduleInfo = m_stream.GetDataAtOffset(streamOffset); + streamOffset += sizeof(DBI::ModuleInfo); + + const char* name = m_stream.GetDataAtOffset(streamOffset); + const size_t nameLength = strlen(name); + streamOffset += nameLength + 1u; + + const char* objectName = m_stream.GetDataAtOffset(streamOffset); + const size_t objectNameLength = strlen(objectName); + streamOffset += objectNameLength + 1u; + + // the stream is aligned to 4 bytes + streamOffset = BitUtil::RoundUpToMultiple(streamOffset, 4ul); + + m_modules[m_moduleCount] = Module(moduleInfo, name, nameLength, objectName, objectNameLength); + ++m_moduleCount; + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleInfoStream::~ModuleInfoStream(void) PDB_NO_EXCEPT +{ + PDB_DELETE_ARRAY(m_modules); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD const PDB::ModuleInfoStream::Module* PDB::ModuleInfoStream::FindLinkerModule(void) const PDB_NO_EXCEPT +{ + const size_t count = m_moduleCount; + for (size_t i = 0u; i < count; ++i) + { + // with both MSVC cl.exe and Clang, the linker symbol is the last one to be stored, so start searching from the end + const Module& module = m_modules[count - i - 1u]; + + // check if this is the linker symbol + if (strcmp(module.GetName().Decay(), LinkerSymbolName) == 0) + { + return &module; + } + } + + return nullptr; +} diff --git a/Includes/raw_pdb/src/PDB_ModuleInfoStream.h b/Includes/raw_pdb/src/PDB_ModuleInfoStream.h new file mode 100644 index 0000000..4fef0fe --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ModuleInfoStream.h @@ -0,0 +1,104 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_CoalescedMSFStream.h" +#include "PDB_ModuleSymbolStream.h" +#include "PDB_ModuleLineStream.h" + +namespace PDB +{ + class PDB_NO_DISCARD DirectMSFStream; + + class PDB_NO_DISCARD ModuleInfoStream + { + public: + class PDB_NO_DISCARD Module + { + public: + Module(void) PDB_NO_EXCEPT; + explicit Module(const DBI::ModuleInfo* info, const char* name, size_t nameLength, const char* objectName, size_t objectNameLength) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(Module); + + // Returns whether the module has a symbol stream. + PDB_NO_DISCARD bool HasSymbolStream(void) const PDB_NO_EXCEPT; + + // Returns whether the module has a line stream. + PDB_NO_DISCARD bool HasLineStream(void) const PDB_NO_EXCEPT; + + // Creates a symbol stream for the module. + PDB_NO_DISCARD ModuleSymbolStream CreateSymbolStream(const RawFile& file) const PDB_NO_EXCEPT; + + // Create a line stream for the module + PDB_NO_DISCARD ModuleLineStream CreateLineStream(const RawFile& file) const PDB_NO_EXCEPT; + + + // Returns the PDB module info. + PDB_NO_DISCARD inline const DBI::ModuleInfo* GetInfo(void) const PDB_NO_EXCEPT + { + return m_info; + } + + // Returns the name of the module. + PDB_NO_DISCARD inline ArrayView GetName(void) const PDB_NO_EXCEPT + { + return ArrayView(m_name, m_nameLength); + } + + // Returns the name of the object file of the module. + PDB_NO_DISCARD inline ArrayView GetObjectName(void) const PDB_NO_EXCEPT + { + return ArrayView(m_objectName, m_objectNameLength); + } + + private: + // the module info is stored in variable-length arrays inside the stream, so rather than store an array directly, + // we need to store pointers to the individual data items inside the stream. + const DBI::ModuleInfo* m_info; + + // the module name, e.g. the path to an object file or import library such as "Import:kernel32.dll" + const char* m_name; + size_t m_nameLength; + + // the name of the object file. either the same as the module name, or the path to the archive that contained the module + const char* m_objectName; + size_t m_objectNameLength; + + PDB_DISABLE_COPY(Module); + }; + + ModuleInfoStream(void) PDB_NO_EXCEPT; + ModuleInfoStream(ModuleInfoStream&& other) PDB_NO_EXCEPT; + ModuleInfoStream& operator=(ModuleInfoStream&& other) PDB_NO_EXCEPT; + + explicit ModuleInfoStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT; + + ~ModuleInfoStream(void) PDB_NO_EXCEPT; + + // Tries to find the linker module corresponding to the linker, i.e. the module named "* Linker *". + PDB_NO_DISCARD const Module* FindLinkerModule(void) const PDB_NO_EXCEPT; + + // Returns the module with the given index. + PDB_NO_DISCARD inline const Module& GetModule(uint32_t index) const PDB_NO_EXCEPT + { + return m_modules[index]; + } + + // Returns a view of all modules in the info stream. + PDB_NO_DISCARD inline ArrayView GetModules(void) const PDB_NO_EXCEPT + { + return ArrayView(m_modules, m_moduleCount); + } + + private: + CoalescedMSFStream m_stream; + Module* m_modules; + size_t m_moduleCount; + + PDB_DISABLE_COPY(ModuleInfoStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_ModuleLineStream.cpp b/Includes/raw_pdb/src/PDB_ModuleLineStream.cpp new file mode 100644 index 0000000..201983e --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ModuleLineStream.cpp @@ -0,0 +1,31 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_ModuleLineStream.h" +#include "PDB_RawFile.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleLineStream::ModuleLineStream(void) PDB_NO_EXCEPT + : m_stream(), m_c13LineInfoOffset(0) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleLineStream::ModuleLineStream(const RawFile& file, uint16_t streamIndex, uint32_t streamSize, size_t c13LineInfoOffset) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(streamIndex, streamSize)), m_c13LineInfoOffset(c13LineInfoOffset) +{ + // https://llvm.org/docs/PDB/ModiStream.html + // struct ModiStream { + // uint32_t Signature; + // uint8_t Symbols[SymbolSize - 4]; + // uint8_t C11LineInfo[C11Size]; + // uint8_t C13LineInfo[C13Size]; + // uint32_t GlobalRefsSize; + // uint8_t GlobalRefs[GlobalRefsSize]; + // }; +} diff --git a/Includes/raw_pdb/src/PDB_ModuleLineStream.h b/Includes/raw_pdb/src/PDB_ModuleLineStream.h new file mode 100644 index 0000000..d1148cc --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ModuleLineStream.h @@ -0,0 +1,151 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_BitUtil.h" +#include "PDB_DBITypes.h" +#include "PDB_Util.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class RawFile; + + class PDB_NO_DISCARD ModuleLineStream + { + public: + ModuleLineStream(void) PDB_NO_EXCEPT; + explicit ModuleLineStream(const RawFile& file, uint16_t streamIndex, uint32_t streamSize, size_t c13LineInfoOffset) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(ModuleLineStream); + + template + void ForEachSection(F&& functor) const PDB_NO_EXCEPT + { + size_t offset = m_c13LineInfoOffset; + + // read the line stream sections + while (offset < m_stream.GetSize()) + { + const CodeView::DBI::LineSection* section = m_stream.GetDataAtOffset(offset); + + functor(section); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + section->header.size, 4u); + } + } + + template + void ForEachLinesBlock(const CodeView::DBI::LineSection* section, F&& functor) const PDB_NO_EXCEPT + { + PDB_ASSERT(section->header.kind == CodeView::DBI::DebugSubsectionKind::S_LINES, + "DebugSubsectionHeader::Kind %X != S_LINES (%X)", + static_cast(section->header.kind), static_cast(CodeView::DBI::DebugSubsectionKind::S_LINES)); + + size_t offset = m_stream.GetPointerOffset(section); + const size_t headerEnd = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + section->header.size, 4u); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + sizeof(CodeView::DBI::LinesHeader), 4u); + + // read all blocks of lines + while (offset < headerEnd) + { + const CodeView::DBI::LinesFileBlockHeader* linesBlockHeader = m_stream.GetDataAtOffset(offset); + const CodeView::DBI::Line* blockLines = m_stream.GetDataAtOffset(offset + sizeof(CodeView::DBI::LinesFileBlockHeader)); + + const size_t blockColumnsOffset = sizeof(CodeView::DBI::LinesFileBlockHeader) + (linesBlockHeader->numLines * (sizeof(CodeView::DBI::Line))); + const CodeView::DBI::Column* blockColumns = blockColumnsOffset < linesBlockHeader->size ? m_stream.GetDataAtOffset(offset) : nullptr; + + functor(linesBlockHeader, blockLines, blockColumns); + + offset = BitUtil::RoundUpToMultiple(offset + linesBlockHeader->size, 4u); + } + + PDB_ASSERT(offset == headerEnd, "Mismatch between offset %zu and header end %zu when reading lines blocks", offset, headerEnd); + } + + template + void ForEachFileChecksum(const CodeView::DBI::LineSection* section, F&& functor) const PDB_NO_EXCEPT + { + PDB_ASSERT(section->header.kind == CodeView::DBI::DebugSubsectionKind::S_FILECHECKSUMS, + "DebugSubsectionHeader::Kind %X != S_FILECHECKSUMS (%X)", + static_cast(section->header.kind), static_cast(CodeView::DBI::DebugSubsectionKind::S_FILECHECKSUMS)); + + size_t offset = m_stream.GetPointerOffset(section); + const size_t headerEnd = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + section->header.size, 4u); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader), 4u); + + // read all file checksums + while (offset < headerEnd) + { + const CodeView::DBI::FileChecksumHeader* fileChecksumHeader = m_stream.GetDataAtOffset(offset); + + functor(fileChecksumHeader); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::FileChecksumHeader) + fileChecksumHeader->checksumSize, 4u); + } + + PDB_ASSERT(offset == headerEnd, "Mismatch between offset %zu and header end %zu when reading file checksums", offset, headerEnd); + } + + template + void ForEachInlineeSourceLine(const CodeView::DBI::LineSection* section, F&& functor) const PDB_NO_EXCEPT + { + PDB_ASSERT(section->header.kind == CodeView::DBI::DebugSubsectionKind::S_INLINEELINES, + "DebugSubsectionHeader::Kind %X != S_INLINEELINES (%X)", + static_cast(section->header.kind), static_cast(CodeView::DBI::DebugSubsectionKind::S_INLINEELINES)); + + PDB_ASSERT(section->inlineeHeader.kind == CodeView::DBI::InlineeSourceLineKind::Signature, + "InlineeSourceLineKind %X != :InlineeSourceLineKind::Signature (%X)", static_cast(section->header.kind), static_cast(CodeView::DBI::InlineeSourceLineKind::Signature)); + + size_t offset = m_stream.GetPointerOffset(section); + const size_t headerEnd = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + section->header.size, 4u); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + sizeof(CodeView::DBI::InlineeSourceLineHeader), 4u); + + // read all file checksums + while (offset < headerEnd) + { + const CodeView::DBI::InlineeSourceLine* inlineeSourceLine = m_stream.GetDataAtOffset(offset); + + functor(inlineeSourceLine); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::InlineeSourceLine), 4u); + } + } + + template + void ForEachInlineeSourceLineEx(const CodeView::DBI::LineSection* section, F&& functor) const PDB_NO_EXCEPT + { + PDB_ASSERT(section->header.kind == CodeView::DBI::DebugSubsectionKind::S_INLINEELINES, + "DebugSubsectionHeader::Kind %X != S_INLINEELINES (%X)", static_cast(section->header.kind), static_cast(CodeView::DBI::DebugSubsectionKind::S_INLINEELINES)); + + PDB_ASSERT(section->inlineeHeader.kind == CodeView::DBI::InlineeSourceLineKind::SignatureEx, + "InlineeSourceLineKind %X != :InlineeSourceLineKind::SignatureEx (%X)", static_cast(section->header.kind), static_cast(CodeView::DBI::InlineeSourceLineKind::SignatureEx)); + + size_t offset = m_stream.GetPointerOffset(section); + const size_t headerEnd = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + section->header.size, 4u); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::DebugSubsectionHeader) + sizeof(CodeView::DBI::InlineeSourceLineHeader), 4u); + + // read all file checksums + while (offset < headerEnd) + { + const CodeView::DBI::InlineeSourceLineEx* inlineeSourceLineEx = m_stream.GetDataAtOffset(offset); + + functor(inlineeSourceLineEx); + + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::InlineeSourceLineEx) + (inlineeSourceLineEx->extraLines * sizeof(uint32_t)), 4u); + } + } + private: + CoalescedMSFStream m_stream; + size_t m_c13LineInfoOffset; + + PDB_DISABLE_COPY(ModuleLineStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_ModuleSymbolStream.cpp b/Includes/raw_pdb/src/PDB_ModuleSymbolStream.cpp new file mode 100644 index 0000000..ae10a11 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ModuleSymbolStream.cpp @@ -0,0 +1,61 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_ModuleSymbolStream.h" +#include "PDB_RawFile.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleSymbolStream::ModuleSymbolStream(void) PDB_NO_EXCEPT + : m_stream() +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::ModuleSymbolStream::ModuleSymbolStream(const RawFile& file, uint16_t streamIndex, uint32_t symbolStreamSize) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(streamIndex, symbolStreamSize)) +{ + // https://llvm.org/docs/PDB/ModiStream.html + // struct ModiStream { + // uint32_t Signature; + // uint8_t Symbols[SymbolSize - 4]; + // uint8_t C11LineInfo[C11Size]; + // uint8_t C13LineInfo[C13Size]; + // uint32_t GlobalRefsSize; + // uint8_t GlobalRefs[GlobalRefsSize]; + // }; + // we are only interested in the symbols, but not the line information or global refs. + // the coalesced stream is therefore only built for the symbols, not all the data in the stream. + // this potentially saves a lot of memory and performance on large PDBs. +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD const PDB::CodeView::DBI::Record* PDB::ModuleSymbolStream::FindRecord(CodeView::DBI::SymbolRecordKind kind) const PDB_NO_EXCEPT +{ + // ignore the stream's 4-byte signature + size_t offset = sizeof(uint32_t); + + // parse the CodeView records + while (offset < m_stream.GetSize()) + { + // https://llvm.org/docs/PDB/CodeViewTypes.html + const CodeView::DBI::Record* record = m_stream.GetDataAtOffset(offset); + if (record->header.kind == kind) + { + return record; + } + + const uint32_t recordSize = GetCodeViewRecordSize(record); + + // position the module stream offset at the next record + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::RecordHeader) + recordSize, 4u); + } + + return nullptr; +} diff --git a/Includes/raw_pdb/src/PDB_ModuleSymbolStream.h b/Includes/raw_pdb/src/PDB_ModuleSymbolStream.h new file mode 100644 index 0000000..2138708 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_ModuleSymbolStream.h @@ -0,0 +1,70 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_BitUtil.h" +#include "PDB_DBITypes.h" +#include "PDB_Util.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class RawFile; + + + class PDB_NO_DISCARD ModuleSymbolStream + { + public: + ModuleSymbolStream(void) PDB_NO_EXCEPT; + explicit ModuleSymbolStream(const RawFile& file, uint16_t streamIndex, uint32_t symbolStreamSize) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(ModuleSymbolStream); + + // Returns a record's parent record. + template + PDB_NO_DISCARD inline const CodeView::DBI::Record* GetParentRecord(const T& record) const PDB_NO_EXCEPT + { + return m_stream.GetDataAtOffset(record.parent); + } + + // Returns a record's end record. + template + PDB_NO_DISCARD inline const CodeView::DBI::Record* GetEndRecord(const T& record) const PDB_NO_EXCEPT + { + return m_stream.GetDataAtOffset(record.end); + } + + // Finds a record of a certain kind. + PDB_NO_DISCARD const CodeView::DBI::Record* FindRecord(CodeView::DBI::SymbolRecordKind Kind) const PDB_NO_EXCEPT; + + + // Iterates all records in the stream. + template + void ForEachSymbol(F&& functor) const PDB_NO_EXCEPT + { + // ignore the stream's 4-byte signature + size_t offset = sizeof(uint32_t); + + // parse the CodeView records + while (offset < m_stream.GetSize()) + { + // https://llvm.org/docs/PDB/CodeViewTypes.html + const CodeView::DBI::Record* record = m_stream.GetDataAtOffset(offset); + const uint32_t recordSize = GetCodeViewRecordSize(record); + + functor(record); + + // position the module stream offset at the next record + offset = BitUtil::RoundUpToMultiple(offset + sizeof(CodeView::DBI::RecordHeader) + recordSize, 4u); + } + } + + private: + CoalescedMSFStream m_stream; + + PDB_DISABLE_COPY(ModuleSymbolStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_NamesStream.cpp b/Includes/raw_pdb/src/PDB_NamesStream.cpp new file mode 100644 index 0000000..fae895e --- /dev/null +++ b/Includes/raw_pdb/src/PDB_NamesStream.cpp @@ -0,0 +1,28 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_NamesStream.h" +#include "PDB_RawFile.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::NamesStream::NamesStream(void) PDB_NO_EXCEPT + : m_stream() + , m_header(nullptr) + , m_stringTable(nullptr) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::NamesStream::NamesStream(const RawFile& file, uint32_t streamIndex) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(streamIndex)) + , m_header(m_stream.GetDataAtOffset(0u)) + , m_stringTable(nullptr) +{ + // grab a pointer into the string table + m_stringTable = m_stream.GetDataAtOffset(sizeof(NamesHeader)); +} diff --git a/Includes/raw_pdb/src/PDB_NamesStream.h b/Includes/raw_pdb/src/PDB_NamesStream.h new file mode 100644 index 0000000..c305242 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_NamesStream.h @@ -0,0 +1,48 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "PDB_Types.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class RawFile; + + struct NamesHeader + { + uint32_t magic; + uint32_t hashVersion; + uint32_t size; + }; + + class PDB_NO_DISCARD NamesStream + { + public: + NamesStream(void) PDB_NO_EXCEPT; + explicit NamesStream(const RawFile& file, uint32_t streamIndex) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(NamesStream); + + // Returns the header of the stream. + PDB_NO_DISCARD inline const NamesHeader* GetHeader(void) const PDB_NO_EXCEPT + { + return m_header; + } + + PDB_NO_DISCARD inline const char* GetFilename(uint32_t filenameOffset) const PDB_NO_EXCEPT + { + return m_stringTable + filenameOffset; + } + + private: + CoalescedMSFStream m_stream; + const NamesHeader* m_header; + const char* m_stringTable; + + PDB_DISABLE_COPY(NamesStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_PCH.cpp b/Includes/raw_pdb/src/PDB_PCH.cpp new file mode 100644 index 0000000..70ca8d6 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_PCH.cpp @@ -0,0 +1,4 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" diff --git a/Includes/raw_pdb/src/PDB_PCH.h b/Includes/raw_pdb/src/PDB_PCH.h new file mode 100644 index 0000000..8374e10 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_PCH.h @@ -0,0 +1,20 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +// this needs to be the first include, since it determines the platform/toolchain we're compiling for +#include "Foundation/PDB_Platform.h" +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_Warnings.h" + +// library includes +#include "Foundation/PDB_Log.h" +#include "Foundation/PDB_Assert.h" +#include "Foundation/PDB_Move.h" +#include "Foundation/PDB_Forward.h" +#include "Foundation/PDB_Memory.h" +#include "Foundation/PDB_ArrayView.h" +#include "Foundation/PDB_BitUtil.h" +#include "Foundation/PDB_BitOperators.h" +#include "Foundation/PDB_PointerUtil.h" diff --git a/Includes/raw_pdb/src/PDB_PublicSymbolStream.cpp b/Includes/raw_pdb/src/PDB_PublicSymbolStream.cpp new file mode 100644 index 0000000..5efad22 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_PublicSymbolStream.cpp @@ -0,0 +1,43 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_PublicSymbolStream.h" +#include "PDB_RawFile.h" +#include "PDB_Types.h" +#include "PDB_DBITypes.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::PublicSymbolStream::PublicSymbolStream(void) PDB_NO_EXCEPT + : m_stream() + , m_hashRecords(nullptr) + , m_count(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::PublicSymbolStream::PublicSymbolStream(const RawFile& file, uint16_t streamIndex, uint32_t count) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(streamIndex)) + , m_hashRecords(m_stream.GetDataAtOffset(sizeof(PublicStreamHeader) + sizeof(HashTableHeader))) + , m_count(count) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD const PDB::CodeView::DBI::Record* PDB::PublicSymbolStream::GetRecord(const CoalescedMSFStream& symbolRecordStream, const HashRecord& hashRecord) const PDB_NO_EXCEPT +{ + // hash record offsets start at 1, not at 0 + const uint32_t headerOffset = hashRecord.offset - 1u; + + // the offset doesn't point to the public symbol directly, but to the CodeView record: + // https://llvm.org/docs/PDB/CodeViewSymbols.html + const CodeView::DBI::Record* record = symbolRecordStream.GetDataAtOffset(headerOffset); + + return record; +} diff --git a/Includes/raw_pdb/src/PDB_PublicSymbolStream.h b/Includes/raw_pdb/src/PDB_PublicSymbolStream.h new file mode 100644 index 0000000..681a613 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_PublicSymbolStream.h @@ -0,0 +1,49 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class RawFile; + struct HashRecord; + + namespace CodeView + { + namespace DBI + { + struct Record; + } + } + + + class PDB_NO_DISCARD PublicSymbolStream + { + public: + PublicSymbolStream(void) PDB_NO_EXCEPT; + explicit PublicSymbolStream(const RawFile& file, uint16_t streamIndex, uint32_t count) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(PublicSymbolStream); + + // Turns a given hash record into a DBI record using the given symbol stream. + PDB_NO_DISCARD const CodeView::DBI::Record* GetRecord(const CoalescedMSFStream& symbolRecordStream, const HashRecord& hashRecord) const PDB_NO_EXCEPT; + + // Returns a view of all the records in the stream. + PDB_NO_DISCARD inline ArrayView GetRecords(void) const PDB_NO_EXCEPT + { + return ArrayView(m_hashRecords, m_count); + } + + private: + CoalescedMSFStream m_stream; + const HashRecord* m_hashRecords; + uint32_t m_count; + + PDB_DISABLE_COPY(PublicSymbolStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_RawFile.cpp b/Includes/raw_pdb/src/PDB_RawFile.cpp new file mode 100644 index 0000000..f135efe --- /dev/null +++ b/Includes/raw_pdb/src/PDB_RawFile.cpp @@ -0,0 +1,147 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_RawFile.h" +#include "PDB_Types.h" +#include "PDB_Util.h" +#include "PDB_DirectMSFStream.h" +#include "Foundation/PDB_PointerUtil.h" +#include "Foundation/PDB_Memory.h" +#include "Foundation/PDB_Assert.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::RawFile::RawFile(RawFile&& other) PDB_NO_EXCEPT + : m_data(PDB_MOVE(other.m_data)) + , m_superBlock(PDB_MOVE(other.m_superBlock)) + , m_directoryStream(PDB_MOVE(other.m_directoryStream)) + , m_streamCount(PDB_MOVE(other.m_streamCount)) + , m_streamSizes(PDB_MOVE(other.m_streamSizes)) + , m_streamBlocks(PDB_MOVE(other.m_streamBlocks)) +{ + other.m_data = nullptr; + other.m_superBlock = nullptr; + other.m_streamCount = 0u; + other.m_streamSizes = nullptr; + other.m_streamBlocks = nullptr; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::RawFile& PDB::RawFile::operator=(RawFile&& other) PDB_NO_EXCEPT +{ + if (this != &other) + { + PDB_DELETE_ARRAY(m_streamBlocks); + + m_data = PDB_MOVE(other.m_data); + m_superBlock = PDB_MOVE(other.m_superBlock); + m_directoryStream = PDB_MOVE(other.m_directoryStream); + m_streamCount = PDB_MOVE(other.m_streamCount); + m_streamSizes = PDB_MOVE(other.m_streamSizes); + m_streamBlocks = PDB_MOVE(other.m_streamBlocks); + + other.m_data = nullptr; + other.m_superBlock = nullptr; + other.m_streamCount = 0u; + other.m_streamSizes = nullptr; + other.m_streamBlocks = nullptr; + } + + return *this; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::RawFile::RawFile(const void* data) PDB_NO_EXCEPT + : m_data(data) + , m_superBlock(Pointer::Offset(data, 0u)) + , m_directoryStream() + , m_streamCount(0u) + , m_streamSizes(nullptr) + , m_streamBlocks(nullptr) +{ + // the SuperBlock stores an array of indices of blocks that make up the indices of directory blocks, which need to be stitched together to form the directory. + // the blocks holding the indices of directory blocks are not necessarily contiguous, so they need to be coalesced first. + const uint32_t directoryBlockCount = PDB::ConvertSizeToBlockCount(m_superBlock->directorySize, m_superBlock->blockSize); + + // the directory is made up of directoryBlockCount blocks, so we need that many indices to be read from the blocks that make up the indices + CoalescedMSFStream directoryIndicesStream(data, m_superBlock->blockSize, m_superBlock->directoryBlockIndices, directoryBlockCount * sizeof(uint32_t)); + + // these are the indices of blocks making up the directory stream, now guaranteed to be contiguous + const uint32_t* directoryIndices = directoryIndicesStream.GetDataAtOffset(0u); + + m_directoryStream = CoalescedMSFStream(data, m_superBlock->blockSize, directoryIndices, m_superBlock->directorySize); + + // https://llvm.org/docs/PDB/MsfFile.html#the-stream-directory + // parse the directory from its contiguous version. the directory matches the following struct: + // struct StreamDirectory + // { + // uint32_t streamCount; + // uint32_t streamSizes[streamCount]; + // uint32_t streamBlocks[streamCount][]; + // }; + m_streamCount = *m_directoryStream.GetDataAtOffset(0u); + + // we can assign pointers into the stream directly, since the RawFile keeps ownership of the directory stream + m_streamSizes = m_directoryStream.GetDataAtOffset(sizeof(uint32_t)); + const uint32_t* directoryStreamBlocks = m_directoryStream.GetDataAtOffset(sizeof(uint32_t) + sizeof(uint32_t) * m_streamCount); + + // prepare indices for directly accessing individual streams + m_streamBlocks = PDB_NEW_ARRAY(const uint32_t*, m_streamCount); + + const uint32_t* indicesForCurrentBlock = directoryStreamBlocks; + for (uint32_t i = 0u; i < m_streamCount; ++i) + { + const uint32_t sizeInBytes = GetStreamSize(i); + const uint32_t blockCount = ConvertSizeToBlockCount(sizeInBytes, m_superBlock->blockSize); + m_streamBlocks[i] = indicesForCurrentBlock; + + indicesForCurrentBlock += blockCount; + } +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::RawFile::~RawFile(void) PDB_NO_EXCEPT +{ + PDB_DELETE_ARRAY(m_streamBlocks); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +template +PDB_NO_DISCARD T PDB::RawFile::CreateMSFStream(uint32_t streamIndex) const PDB_NO_EXCEPT +{ + PDB_ASSERT(streamIndex != PDB::NilStreamIndex, "Invalid stream index."); + PDB_ASSERT(streamIndex < m_streamCount, "Invalid stream index."); + + return T(m_data, m_superBlock->blockSize, m_streamBlocks[streamIndex], GetStreamSize(streamIndex)); +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +template +PDB_NO_DISCARD T PDB::RawFile::CreateMSFStream(uint32_t streamIndex, uint32_t streamSize) const PDB_NO_EXCEPT +{ + PDB_ASSERT(streamIndex != PDB::NilStreamIndex, "Invalid stream index."); + PDB_ASSERT(streamIndex < m_streamCount, "Invalid stream index."); + PDB_ASSERT(streamSize <= GetStreamSize(streamIndex), "Invalid stream size."); + + return T(m_data, m_superBlock->blockSize, m_streamBlocks[streamIndex], streamSize); +} + + +// explicit template instantiation +template PDB::CoalescedMSFStream PDB::RawFile::CreateMSFStream(uint32_t streamIndex) const PDB_NO_EXCEPT; +template PDB::DirectMSFStream PDB::RawFile::CreateMSFStream(uint32_t streamIndex) const PDB_NO_EXCEPT; + +template PDB::CoalescedMSFStream PDB::RawFile::CreateMSFStream(uint32_t streamIndex, uint32_t streamSize) const PDB_NO_EXCEPT; +template PDB::DirectMSFStream PDB::RawFile::CreateMSFStream(uint32_t streamIndex, uint32_t streamSize) const PDB_NO_EXCEPT; diff --git a/Includes/raw_pdb/src/PDB_RawFile.h b/Includes/raw_pdb/src/PDB_RawFile.h new file mode 100644 index 0000000..bf88673 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_RawFile.h @@ -0,0 +1,66 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "PDB_CoalescedMSFStream.h" + + +// https://llvm.org/docs/PDB/index.html +namespace PDB +{ + struct SuperBlock; + + + class PDB_NO_DISCARD RawFile + { + public: + RawFile(RawFile&& other) PDB_NO_EXCEPT; + RawFile& operator=(RawFile&& other) PDB_NO_EXCEPT; + + explicit RawFile(const void* data) PDB_NO_EXCEPT; + ~RawFile(void) PDB_NO_EXCEPT; + + // Creates any type of MSF stream. + template + PDB_NO_DISCARD T CreateMSFStream(uint32_t streamIndex) const PDB_NO_EXCEPT; + + // Creates any type of MSF stream with the given size. + template + PDB_NO_DISCARD T CreateMSFStream(uint32_t streamIndex, uint32_t streamSize) const PDB_NO_EXCEPT; + + + // Returns the SuperBlock. + PDB_NO_DISCARD inline const SuperBlock* GetSuperBlock(void) const PDB_NO_EXCEPT + { + return m_superBlock; + } + + // Returns the number of streams in the PDB file. + PDB_NO_DISCARD inline uint32_t GetStreamCount(void) const PDB_NO_EXCEPT + { + return m_streamCount; + } + + // Returns the size of the stream with the given index, taking into account nil page sizes. + PDB_NO_DISCARD inline uint32_t GetStreamSize(uint32_t streamIndex) const PDB_NO_EXCEPT + { + const uint32_t streamSize = m_streamSizes[streamIndex]; + + return (streamSize == NilPageSize) ? 0u : streamSize; + } + + private: + const void* m_data; + const SuperBlock* m_superBlock; + CoalescedMSFStream m_directoryStream; + + // stream directory + uint32_t m_streamCount; + const uint32_t* m_streamSizes; + const uint32_t** m_streamBlocks; + + PDB_DISABLE_COPY(RawFile); + }; +} diff --git a/Includes/raw_pdb/src/PDB_SectionContributionStream.cpp b/Includes/raw_pdb/src/PDB_SectionContributionStream.cpp new file mode 100644 index 0000000..a8a944e --- /dev/null +++ b/Includes/raw_pdb/src/PDB_SectionContributionStream.cpp @@ -0,0 +1,25 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_SectionContributionStream.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::SectionContributionStream::SectionContributionStream(void) PDB_NO_EXCEPT + : m_stream() + , m_contributions(nullptr) + , m_count(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::SectionContributionStream::SectionContributionStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT + : m_stream(directStream, size, offset) + , m_contributions(m_stream.GetDataAtOffset(0u)) + , m_count(size / sizeof(DBI::SectionContribution)) +{ +} diff --git a/Includes/raw_pdb/src/PDB_SectionContributionStream.h b/Includes/raw_pdb/src/PDB_SectionContributionStream.h new file mode 100644 index 0000000..c1a183d --- /dev/null +++ b/Includes/raw_pdb/src/PDB_SectionContributionStream.h @@ -0,0 +1,38 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_DBITypes.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class PDB_NO_DISCARD DirectMSFStream; + + + class PDB_NO_DISCARD SectionContributionStream + { + public: + SectionContributionStream(void) PDB_NO_EXCEPT; + explicit SectionContributionStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(SectionContributionStream); + + // Returns a view of all section contributions in the stream. + PDB_NO_DISCARD inline ArrayView GetContributions(void) const PDB_NO_EXCEPT + { + return ArrayView(m_contributions, m_count); + } + + private: + CoalescedMSFStream m_stream; + const DBI::SectionContribution* m_contributions; + size_t m_count; + + PDB_DISABLE_COPY(SectionContributionStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_SourceFileStream.cpp b/Includes/raw_pdb/src/PDB_SourceFileStream.cpp new file mode 100644 index 0000000..fc860f6 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_SourceFileStream.cpp @@ -0,0 +1,68 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_SourceFileStream.h" + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::SourceFileStream::SourceFileStream(void) PDB_NO_EXCEPT + : m_stream() + , m_moduleCount(0u) + , m_moduleIndices(nullptr) + , m_moduleFileCounts(nullptr) + , m_fileNameOffsets(nullptr) + , m_stringTable(nullptr) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::SourceFileStream::SourceFileStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT + : m_stream(directStream, size, offset) + , m_moduleCount(0u) + , m_moduleIndices(nullptr) + , m_moduleFileCounts(nullptr) + , m_fileNameOffsets(nullptr) + , m_stringTable(nullptr) +{ + // we are going to consume the whole source info sub-stream, so create a coalesced stream for faster read operations and direct access. + // the sub-stream has the following layout: + // struct SourceInfoSubstream + // { + // uint16_t moduleCount; + // uint16_t sourceFileCount; + // uint16_t moduleIndices[moduleCount]; + // uint16_t moduleFileCounts[moduleCount]; + // uint32_t fileNameOffsets[realSourceFileCount]; + // char stringTable[][realSourceFileCount]; + // }; + m_moduleCount = *m_stream.GetDataAtOffset(0u); + size_t readOffset = sizeof(uint16_t); + + // skip number of source files. this would only support 64k unique files and is no longer used. + // the number of source files is computed dynamically instead. + readOffset += sizeof(uint16_t); + + // grab direct pointers into the stream data + m_moduleIndices = m_stream.GetDataAtOffset(readOffset); + readOffset += sizeof(uint16_t) * m_moduleCount; + + m_moduleFileCounts = m_stream.GetDataAtOffset(readOffset); + readOffset += sizeof(uint16_t) * m_moduleCount; + + // count the actual number of source files + size_t sourceFileCount = 0u; + for (unsigned int i = 0u; i < m_moduleCount; ++i) + { + sourceFileCount += m_moduleFileCounts[i]; + } + + m_fileNameOffsets = m_stream.GetDataAtOffset(readOffset); + readOffset += sizeof(uint32_t) * sourceFileCount; + + // grab a pointer into the string table + m_stringTable = m_stream.GetDataAtOffset(readOffset); +} diff --git a/Includes/raw_pdb/src/PDB_SourceFileStream.h b/Includes/raw_pdb/src/PDB_SourceFileStream.h new file mode 100644 index 0000000..a32c4bd --- /dev/null +++ b/Includes/raw_pdb/src/PDB_SourceFileStream.h @@ -0,0 +1,65 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_CoalescedMSFStream.h" + + +namespace PDB +{ + class PDB_NO_DISCARD DirectMSFStream; + + + class PDB_NO_DISCARD SourceFileStream + { + public: + SourceFileStream(void) PDB_NO_EXCEPT; + explicit SourceFileStream(const DirectMSFStream& directStream, uint32_t size, uint32_t offset) PDB_NO_EXCEPT; + + PDB_DEFAULT_MOVE(SourceFileStream); + + // Returns the number of modules. + PDB_NO_DISCARD inline uint32_t GetModuleCount(void) const PDB_NO_EXCEPT + { + return m_moduleCount; + } + + // Returns a view of all the filename offsets for the module with the given index. + PDB_NO_DISCARD inline ArrayView GetModuleFilenameOffsets(size_t moduleIndex) const PDB_NO_EXCEPT + { + const uint16_t moduleStartIndex = m_moduleIndices[moduleIndex]; + const uint16_t moduleFileCount = m_moduleFileCounts[moduleIndex]; + + return ArrayView(m_fileNameOffsets + moduleStartIndex, moduleFileCount); + } + + // Returns a filename for the given filename offset. + PDB_NO_DISCARD inline const char* GetFilename(uint32_t filenameOffset) const PDB_NO_EXCEPT + { + return m_stringTable + filenameOffset; + } + + private: + CoalescedMSFStream m_stream; + + // the number of modules + uint32_t m_moduleCount; + + // the indices into the file name offsets, for each module + const uint16_t* m_moduleIndices; + + // the number of files, for each module + const uint16_t* m_moduleFileCounts; + + // the filename offsets into the string table, for all modules + const uint32_t* m_fileNameOffsets; + + // the string table storing all filenames + const char* m_stringTable; + + PDB_DISABLE_COPY(SourceFileStream); + }; +} diff --git a/Includes/raw_pdb/src/PDB_TPIStream.cpp b/Includes/raw_pdb/src/PDB_TPIStream.cpp new file mode 100644 index 0000000..164195e --- /dev/null +++ b/Includes/raw_pdb/src/PDB_TPIStream.cpp @@ -0,0 +1,86 @@ +#include "PDB_PCH.h" +#include "PDB_TPIStream.h" +#include "PDB_RawFile.h" +#include "PDB_DirectMSFStream.h" +#include "Foundation/PDB_Memory.h" + +namespace +{ + // the TPI stream always resides at index 2 + static constexpr const uint32_t TPIStreamIndex = 2u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::TPIStream::TPIStream(void) PDB_NO_EXCEPT + : m_stream() + , m_header() + , m_recordCount(0u) +{ +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::TPIStream::TPIStream(TPIStream&& other) PDB_NO_EXCEPT + : m_stream(PDB_MOVE(other.m_stream)) + , m_header(PDB_MOVE(other.m_header)) + , m_recordCount(PDB_MOVE(other.m_recordCount)) +{ + other.m_recordCount = 0u; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::TPIStream& PDB::TPIStream::operator=(TPIStream&& other) PDB_NO_EXCEPT +{ + if (this != &other) + { + m_stream = PDB_MOVE(other.m_stream); + m_header = PDB_MOVE(other.m_header); + m_recordCount = PDB_MOVE(other.m_recordCount); + + other.m_recordCount = 0u; + } + + return *this; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB::TPIStream::TPIStream(const RawFile& file) PDB_NO_EXCEPT + : m_stream(file.CreateMSFStream(TPIStreamIndex)), + m_header(m_stream.ReadAtOffset(0u)), + m_recordCount(GetLastTypeIndex() - GetFirstTypeIndex()) +{ +} + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::ErrorCode PDB::HasValidTPIStream(const RawFile& file) PDB_NO_EXCEPT +{ + DirectMSFStream stream = file.CreateMSFStream(TPIStreamIndex); + if (stream.GetSize() < sizeof(TPI::StreamHeader)) + { + return ErrorCode::InvalidStream; + } + + const TPI::StreamHeader header = stream.ReadAtOffset(0u); + if (header.version != TPI::StreamHeader::Version::V80) + { + return ErrorCode::UnknownVersion; + } + + return ErrorCode::Success; +} + + +// ------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------ +PDB_NO_DISCARD PDB::TPIStream PDB::CreateTPIStream(const RawFile& file) PDB_NO_EXCEPT +{ + return TPIStream { file }; +} diff --git a/Includes/raw_pdb/src/PDB_TPIStream.h b/Includes/raw_pdb/src/PDB_TPIStream.h new file mode 100644 index 0000000..81e9af0 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_TPIStream.h @@ -0,0 +1,85 @@ +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_ArrayView.h" +#include "PDB_ErrorCodes.h" +#include "PDB_TPITypes.h" +#include "PDB_DirectMSFStream.h" +#include "PDB_Util.h" + +// PDB TPI stream +// https://llvm.org/docs/PDB/TpiStream.html +namespace PDB +{ + class RawFile; + + + class PDB_NO_DISCARD TPIStream + { + public: + TPIStream(void) PDB_NO_EXCEPT; + TPIStream(TPIStream&& other) PDB_NO_EXCEPT; + TPIStream& operator=(TPIStream&& other) PDB_NO_EXCEPT; + + explicit TPIStream(const RawFile& file) PDB_NO_EXCEPT; + + PDB_NO_DISCARD inline const DirectMSFStream& GetDirectMSFStream(void) const PDB_NO_EXCEPT + { + return m_stream; + } + + // Returns the index of the first type, which is not necessarily zero. + PDB_NO_DISCARD inline uint32_t GetFirstTypeIndex(void) const PDB_NO_EXCEPT + { + return m_header.typeIndexBegin; + } + + // Returns the index of the last type. + PDB_NO_DISCARD inline uint32_t GetLastTypeIndex(void) const PDB_NO_EXCEPT + { + return m_header.typeIndexEnd; + } + + // Returns the number of type records. + PDB_NO_DISCARD inline size_t GetTypeRecordCount(void) const PDB_NO_EXCEPT + { + return m_recordCount; + } + + CodeView::TPI::RecordHeader ReadTypeRecordHeader(size_t offset) const PDB_NO_EXCEPT + { + const CodeView::TPI::RecordHeader header = m_stream.ReadAtOffset(offset); + return header; + } + + template + void ForEachTypeRecordHeaderAndOffset(F&& functor) const PDB_NO_EXCEPT + { + // ignore the stream's header + size_t offset = sizeof(TPI::StreamHeader); + + while (offset < m_stream.GetSize()) + { + const CodeView::TPI::RecordHeader header = ReadTypeRecordHeader(offset); + + functor(header, offset); + + // position the stream offset at the next record + offset += sizeof(CodeView::TPI::RecordHeader) + header.size - sizeof(uint16_t); + } + } + + private: + DirectMSFStream m_stream; + TPI::StreamHeader m_header; + size_t m_recordCount; + + PDB_DISABLE_COPY(TPIStream); + }; + + // Returns whether the given raw file provides a valid TPI stream. + PDB_NO_DISCARD ErrorCode HasValidTPIStream(const RawFile& file) PDB_NO_EXCEPT; + + // Creates the TPI stream from a raw file. + PDB_NO_DISCARD TPIStream CreateTPIStream(const RawFile& file) PDB_NO_EXCEPT; +} diff --git a/Includes/raw_pdb/src/PDB_TPITypes.h b/Includes/raw_pdb/src/PDB_TPITypes.h new file mode 100644 index 0000000..c12a5ed --- /dev/null +++ b/Includes/raw_pdb/src/PDB_TPITypes.h @@ -0,0 +1,867 @@ +#pragma once + +#include "Foundation/PDB_Macros.h" +#include "Foundation/PDB_BitOperators.h" + +namespace PDB +{ + namespace TPI + { + // https://llvm.org/docs/PDB/TpiStream.html#stream-header + struct StreamHeader + { + enum class PDB_NO_DISCARD Version : uint32_t + { + V40 = 19950410u, + V41 = 19951122u, + V50 = 19961031u, + V70 = 19990903u, + V80 = 20040203u + }; + + Version version; + uint32_t headerSize; + uint32_t typeIndexBegin; + uint32_t typeIndexEnd; + uint32_t typeRecordBytes; + + uint16_t hashStreamIndex; + uint16_t hashAuxStreamIndex; + uint32_t hashKeySize; + uint32_t numHashBuckets; + + int32_t hashValueBufferOffset; + uint32_t hashValueBufferLength; + + int32_t indexOffsetBufferOffset; + uint32_t indexOffsetBufferLength; + + int32_t hashAdjBufferOffset; + uint32_t hashAdjBufferLength; + }; + } + + + namespace CodeView + { + namespace TPI + { + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L772 + enum class PDB_NO_DISCARD TypeRecordKind : uint16_t + { + LF_POINTER = 0x1002u, + LF_MODIFIER = 0x1001u, + LF_PROCEDURE = 0x1008u, + LF_MFUNCTION = 0x1009u, + LF_LABEL = 0x000eu, + LF_ARGLIST = 0x1201u, + LF_FIELDLIST = 0x1203u, + LF_VTSHAPE = 0x000au, + LF_BITFIELD = 0x1205u, + LF_METHODLIST = 0x1206u, + LF_ENDPRECOMP = 0x0014u, + + LF_BCLASS = 0x001400u, + LF_VBCLASS = 0x001401u, + LF_IVBCLASS = 0x001402u, + LF_FRIENDFCN_ST = 0x001403u, + LF_INDEX = 0x001404u, + LF_MEMBER_ST = 0x001405u, + LF_STMEMBER_ST = 0x001406u, + LF_METHOD_ST = 0x001407u, + LF_NESTTYPE_ST = 0x001408u, + LF_VFUNCTAB = 0x001409u, + LF_FRIENDCLS = 0x00140Au, + LF_ONEMETHOD_ST = 0x00140Bu, + LF_VFUNCOFF = 0x00140Cu, + LF_NESTTYPEEX_ST = 0x00140Du, + LF_MEMBERMODIFY_ST = 0x00140Eu, + LF_MANAGED_ST = 0x00140Fu, + + LF_SMAX = 0x001500u, + LF_TYPESERVER = 0x001501u, + LF_ENUMERATE = 0x001502u, + LF_ARRAY = 0x001503u, + LF_CLASS = 0x001504u, + LF_STRUCTURE = 0x001505u, + LF_UNION = 0x001506u, + LF_ENUM = 0x001507u, + LF_DIMARRAY = 0x001508u, + LF_PRECOMP = 0x001509u, + LF_ALIAS = 0x00150Au, + LF_DEFARG = 0x00150Bu, + LF_FRIENDFCN = 0x00150Cu, + LF_MEMBER = 0x00150Du, + LF_STMEMBER = 0x00150Eu, + LF_METHOD = 0x00150Fu, + LF_NESTTYPE = 0x001510u, + LF_ONEMETHOD = 0x001511u, + LF_NESTTYPEEX = 0x001512u, + LF_MEMBERMODIFY = 0x001513u, + LF_MANAGED = 0x001514u, + LF_TYPESERVER2 = 0x001515u, + LF_CLASS2 = 0x001608u, + LF_STRUCTURE2 = 0x001609u, + + LF_NUMERIC = 0x8000u, + LF_CHAR = 0x8000u, + LF_SHORT = 0x8001u, + LF_USHORT = 0x8002u, + LF_LONG = 0x8003u, + LF_ULONG = 0x8004u, + LF_REAL32 = 0x8005u, + LF_REAL64 = 0x8006u, + LF_REAL80 = 0x8007u, + LF_REAL128 = 0x8008u, + LF_QUADWORD = 0x8009u, + LF_UQUADWORD = 0x800au, + LF_REAL48 = 0x800bu, + LF_COMPLEX32 = 0x800cu, + LF_COMPLEX64 = 0x800du, + LF_COMPLEX80 = 0x800eu, + LF_COMPLEX128 = 0x800fu, + LF_VARSTRING = 0x8010u, + + LF_OCTWORD = 0x8017u, + LF_UOCTWORD = 0x8018u, + + LF_DECIMAL = 0x8019u, + LF_DATE = 0x801au, + LF_UTF8STRING = 0x801bu, + + LF_REAL16 = 0x801cu + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L328 + // https://github.com/ValveSoftware/wine/blob/cd165953c8b379a78418711f07417022e503c81b/include/wine/mscvpdb.h + enum class TypeIndexKind : uint16_t + { + T_NOTYPE = 0x0000u, // uncharacterized type (no type) + T_ABS = 0x0001u, // absolute symbol + T_SEGMENT = 0x0002u, // segment type + T_VOID = 0x0003u, // void + T_HRESULT = 0x0008u, // OLE/COM HRESULT + T_32PHRESULT = 0x0408u, // OLE/COM HRESULT __ptr32 * + T_64PHRESULT = 0x0608u, // OLE/COM HRESULT __ptr64 * + + // Emitted due to a compiler bug? + // 0x0600 bits appears to indicate a 64-bit pointer, but it has no type? + // Seen as type index for C11 "_Atomic uint32_t*" variable and constant. + T_UNKNOWN_0600 = 0x0600u, + + T_PVOID = 0x0103u, // near pointer to void + T_PFVOID = 0x0203u, // far pointer to void + T_PHVOID = 0x0303u, // huge pointer to void + T_32PVOID = 0x0403u, // 32 bit pointer to void + T_32PFVOID = 0x0503u, // 16:32 pointer to void + T_64PVOID = 0x0603u, // 64 bit pointer to void + T_CURRENCY = 0x0004u, // BASIC 8 byte currency value + T_NBASICSTR = 0x0005u, // Near BASIC string + T_FBASICSTR = 0x0006u, // Far BASIC string + T_NOTTRANS = 0x0007u, // type not translated by cvpack + T_BIT = 0x0060u, // bit + T_PASCHAR = 0x0061u, // Pascal CHAR + T_BOOL32FF = 0x0062u, // 32-bit BOOL where true is 0xffffffff + + T_CHAR = 0x0010u, // 8 bit signed + T_PCHAR = 0x0110u, // 16 bit pointer to 8 bit signed + T_PFCHAR = 0x0210u, // 16:16 far pointer to 8 bit signed + T_PHCHAR = 0x0310u, // 16:16 huge pointer to 8 bit signed + T_32PCHAR = 0x0410u, // 32 bit pointer to 8 bit signed + T_32PFCHAR = 0x0510u, // 16:32 pointer to 8 bit signed + T_64PCHAR = 0x0610u, // 64 bit pointer to 8 bit signed + + T_UCHAR = 0x0020u, // 8 bit unsigned + T_PUCHAR = 0x0120u, // 16 bit pointer to 8 bit unsigned + T_PFUCHAR = 0x0220u, // 16:16 far pointer to 8 bit unsigned + T_PHUCHAR = 0x0320u, // 16:16 huge pointer to 8 bit unsigned + T_32PUCHAR = 0x0420u, // 32 bit pointer to 8 bit unsigned + T_32PFUCHAR = 0x0520u, // 16:32 pointer to 8 bit unsigned + T_64PUCHAR = 0x0620u, // 64 bit pointer to 8 bit unsigned + + T_RCHAR = 0x0070u, // really a char + T_PRCHAR = 0x0170u, // 16 bit pointer to a real char + T_PFRCHAR = 0x0270u, // 16:16 far pointer to a real char + T_PHRCHAR = 0x0370u, // 16:16 huge pointer to a real char + T_32PRCHAR = 0x0470u, // 32 bit pointer to a real char + T_32PFRCHAR = 0x0570u, // 16:32 pointer to a real char + T_64PRCHAR = 0x0670u, // 64 bit pointer to a real char + + // wide character types + T_WCHAR = 0x0071u, // wide char + T_PWCHAR = 0x0171u, // 16 bit pointer to a wide char + T_PFWCHAR = 0x0271u, // 16:16 far pointer to a wide char + T_PHWCHAR = 0x0371u, // 16:16 huge pointer to a wide char + T_32PWCHAR = 0x0471u, // 32 bit pointer to a wide char + T_32PFWCHAR = 0x0571u, // 16:32 pointer to a wide char + T_64PWCHAR = 0x0671u, // 64 bit pointer to a wide char + + // 8-bit unicode char + T_CHAR8 = 0x007c, // 8-bit unicode char (C++ 20) + T_PCHAR8 = 0x017c, // Near pointer to 8-bit unicode char + T_PFCHAR8 = 0x027c, // Far pointer to 8-bit unicode char + T_PHCHAR8 = 0x037c, // Huge pointer to 8-bit unicode char + T_32PCHAR8 = 0x047c, // 16:32 near pointer to 8-bit unicode char + T_32PFCHAR8 = 0x057c, // 16:32 far pointer to 8-bit unicode char + T_64PCHAR8 = 0x067c, // 64 bit near pointer to 8-bit unicode char + + // 16-bit unicode char + T_CHAR16 = 0x007au, // 16-bit unicode char + T_PCHAR16 = 0x017au, // 16 bit pointer to a 16-bit unicode char + T_PFCHAR16 = 0x027au, // 16:16 far pointer to a 16-bit unicode char + T_PHCHAR16 = 0x037au, // 16:16 huge pointer to a 16-bit unicode char + T_32PCHAR16 = 0x047au, // 32 bit pointer to a 16-bit unicode char + T_32PFCHAR16 = 0x057au, // 16:32 pointer to a 16-bit unicode char + T_64PCHAR16 = 0x067au, // 64 bit pointer to a 16-bit unicode char + + // 32-bit unicode char + T_CHAR32 = 0x007bu, // 32-bit unicode char + T_PCHAR32 = 0x017bu, // 16 bit pointer to a 32-bit unicode char + T_PFCHAR32 = 0x027bu, // 16:16 far pointer to a 32-bit unicode char + T_PHCHAR32 = 0x037bu, // 16:16 huge pointer to a 32-bit unicode char + T_32PCHAR32 = 0x047bu, // 32 bit pointer to a 32-bit unicode char + T_32PFCHAR32 = 0x057bu, // 16:32 pointer to a 32-bit unicode char + T_64PCHAR32 = 0x067bu, // 64 bit pointer to a 32-bit unicode char + + // 8 bit int types + T_INT1 = 0x0068u, // 8 bit signed int + T_PINT1 = 0x0168u, // 16 bit pointer to 8 bit signed int + T_PFINT1 = 0x0268u, // 16:16 far pointer to 8 bit signed int + T_PHINT1 = 0x0368u, // 16:16 huge pointer to 8 bit signed int + T_32PINT1 = 0x0468u, // 32 bit pointer to 8 bit signed int + T_32PFINT1 = 0x0568u, // 16:32 pointer to 8 bit signed int + T_64PINT1 = 0x0668u, // 64 bit pointer to 8 bit signed int + + T_UINT1 = 0x0069u, // 8 bit unsigned int + T_PUINT1 = 0x0169u, // 16 bit pointer to 8 bit unsigned int + T_PFUINT1 = 0x0269u, // 16:16 far pointer to 8 bit unsigned int + T_PHUINT1 = 0x0369u, // 16:16 huge pointer to 8 bit unsigned int + T_32PUINT1 = 0x0469u, // 32 bit pointer to 8 bit unsigned int + T_32PFUINT1 = 0x0569u, // 16:32 pointer to 8 bit unsigned int + T_64PUINT1 = 0x0669u, // 64 bit pointer to 8 bit unsigned int + + // 16 bit short types + T_SHORT = 0x0011u, // 16 bit signed + T_PSHORT = 0x0111u, // 16 bit pointer to 16 bit signed + T_PFSHORT = 0x0211u, // 16:16 far pointer to 16 bit signed + T_PHSHORT = 0x0311u, // 16:16 huge pointer to 16 bit signed + T_32PSHORT = 0x0411u, // 32 bit pointer to 16 bit signed + T_32PFSHORT = 0x0511u, // 16:32 pointer to 16 bit signed + T_64PSHORT = 0x0611u, // 64 bit pointer to 16 bit signed + + T_USHORT = 0x0021u, + T_PUSHORT = 0x0121u, + T_PFUSHORT = 0x0221u, + T_PHUSHORT = 0x0321u, + T_32PUSHORT = 0x0421u, + T_32PFUSHORT = 0x0521u, + T_64PUSHORT = 0x0621u, + + T_INT2 = 0x0072u, + T_PINT2 = 0x0172u, + T_PFINT2 = 0x0272u, + T_PHINT2 = 0x0372u, + T_32PINT2 = 0x0472u, + T_32PFINT2 = 0x0572u, + T_64PINT2 = 0x0672u, + + T_UINT2 = 0x0073u, + T_PUINT2 = 0x0173u, + T_PFUINT2 = 0x0273u, + T_PHUINT2 = 0x0373u, + T_32PUINT2 = 0x0473u, + T_32PFUINT2 = 0x0573u, + T_64PUINT2 = 0x0673u, + + T_LONG = 0x0012u, + T_PLONG = 0x0112u, + T_PFLONG = 0x0212u, + T_PHLONG = 0x0312u, + T_32PLONG = 0x0412u, + T_32PFLONG = 0x0512u, + T_64PLONG = 0x0612u, + + T_ULONG = 0x0022u, + T_PULONG = 0x0122u, + T_PFULONG = 0x0222u, + T_PHULONG = 0x0322u, + T_32PULONG = 0x0422u, + T_32PFULONG = 0x0522u, + T_64PULONG = 0x0622u, + + T_INT4 = 0x0074u, + T_PINT4 = 0x0174u, + T_PFINT4 = 0x0274u, + T_PHINT4 = 0x0374u, + T_32PINT4 = 0x0474u, + T_32PFINT4 = 0x0574u, + T_64PINT4 = 0x0674u, + + T_UINT4 = 0x0075u, + T_PUINT4 = 0x0175u, + T_PFUINT4 = 0x0275u, + T_PHUINT4 = 0x0375u, + T_32PUINT4 = 0x0475u, + T_32PFUINT4 = 0x0575u, + T_64PUINT4 = 0x0675u, + + T_QUAD = 0x0013u, + T_PQUAD = 0x0113u, + T_PFQUAD = 0x0213u, + T_PHQUAD = 0x0313u, + T_32PQUAD = 0x0413u, + T_32PFQUAD = 0x0513u, + T_64PQUAD = 0x0613u, + + T_UQUAD = 0x0023u, + T_PUQUAD = 0x0123u, + T_PFUQUAD = 0x0223u, + T_PHUQUAD = 0x0323u, + T_32PUQUAD = 0x0423u, + T_32PFUQUAD = 0x0523u, + T_64PUQUAD = 0x0623u, + + T_INT8 = 0x0076u, + T_PINT8 = 0x0176u, + T_PFINT8 = 0x0276u, + T_PHINT8 = 0x0376u, + T_32PINT8 = 0x0476u, + T_32PFINT8 = 0x0576u, + T_64PINT8 = 0x0676u, + + T_UINT8 = 0x0077u, + T_PUINT8 = 0x0177u, + T_PFUINT8 = 0x0277u, + T_PHUINT8 = 0x0377u, + T_32PUINT8 = 0x0477u, + T_32PFUINT8 = 0x0577u, + T_64PUINT8 = 0x0677u, + + T_OCT = 0x0014u, + T_POCT = 0x0114u, + T_PFOCT = 0x0214u, + T_PHOCT = 0x0314u, + T_32POCT = 0x0414u, + T_32PFOCT = 0x0514u, + T_64POCT = 0x0614u, + + T_UOCT = 0x0024u, + T_PUOCT = 0x0124u, + T_PFUOCT = 0x0224u, + T_PHUOCT = 0x0324u, + T_32PUOCT = 0x0424u, + T_32PFUOCT = 0x0524u, + T_64PUOCT = 0x0624u, + + T_INT16 = 0x0078u, + T_PINT16 = 0x0178u, + T_PFINT16 = 0x0278u, + T_PHINT16 = 0x0378u, + T_32PINT16 = 0x0478u, + T_32PFINT16 = 0x0578u, + T_64PINT16 = 0x0678u, + + T_UINT16 = 0x0079u, + T_PUINT16 = 0x0179u, + T_PFUINT16 = 0x0279u, + T_PHUINT16 = 0x0379u, + T_32PUINT16 = 0x0479u, + T_32PFUINT16 = 0x0579u, + T_64PUINT16 = 0x0679u, + + T_REAL32 = 0x0040u, + T_PREAL32 = 0x0140u, + T_PFREAL32 = 0x0240u, + T_PHREAL32 = 0x0340u, + T_32PREAL32 = 0x0440u, + T_32PFREAL32 = 0x0540u, + T_64PREAL32 = 0x0640u, + + T_REAL48 = 0x0044u, + T_PREAL48 = 0x0144u, + T_PFREAL48 = 0x0244u, + T_PHREAL48 = 0x0344u, + T_32PREAL48 = 0x0444u, + T_32PFREAL48 = 0x0544u, + T_64PREAL48 = 0x0644u, + + T_REAL64 = 0x0041u, + T_PREAL64 = 0x0141u, + T_PFREAL64 = 0x0241u, + T_PHREAL64 = 0x0341u, + T_32PREAL64 = 0x0441u, + T_32PFREAL64 = 0x0541u, + T_64PREAL64 = 0x0641u, + + T_REAL80 = 0x0042u, + T_PREAL80 = 0x0142u, + T_PFREAL80 = 0x0242u, + T_PHREAL80 = 0x0342u, + T_32PREAL80 = 0x0442u, + T_32PFREAL80 = 0x0542u, + T_64PREAL80 = 0x0642u, + + T_REAL128 = 0x0043u, + T_PREAL128 = 0x0143u, + T_PFREAL128 = 0x0243u, + T_PHREAL128 = 0x0343u, + T_32PREAL128 = 0x0443u, + T_32PFREAL128 = 0x0543u, + T_64PREAL128 = 0x0643u, + + T_CPLX32 = 0x0050u, + T_PCPLX32 = 0x0150u, + T_PFCPLX32 = 0x0250u, + T_PHCPLX32 = 0x0350u, + T_32PCPLX32 = 0x0450u, + T_32PFCPLX32 = 0x0550u, + T_64PCPLX32 = 0x0650u, + + T_CPLX64 = 0x0051u, + T_PCPLX64 = 0x0151u, + T_PFCPLX64 = 0x0251u, + T_PHCPLX64 = 0x0351u, + T_32PCPLX64 = 0x0451u, + T_32PFCPLX64 = 0x0551u, + T_64PCPLX64 = 0x0651u, + + T_CPLX80 = 0x0052u, + T_PCPLX80 = 0x0152u, + T_PFCPLX80 = 0x0252u, + T_PHCPLX80 = 0x0352u, + T_32PCPLX80 = 0x0452u, + T_32PFCPLX80 = 0x0552u, + T_64PCPLX80 = 0x0652u, + + T_CPLX128 = 0x0053u, + T_PCPLX128 = 0x0153u, + T_PFCPLX128 = 0x0253u, + T_PHCPLX128 = 0x0353u, + T_32PCPLX128 = 0x0453u, + T_32PFCPLX128 = 0x0553u, + T_64PCPLX128 = 0x0653u, + + T_BOOL08 = 0x0030u, + T_PBOOL08 = 0x0130u, + T_PFBOOL08 = 0x0230u, + T_PHBOOL08 = 0x0330u, + T_32PBOOL08 = 0x0430u, + T_32PFBOOL08 = 0x0530u, + T_64PBOOL08 = 0x0630u, + + T_BOOL16 = 0x0031u, + T_PBOOL16 = 0x0131u, + T_PFBOOL16 = 0x0231u, + T_PHBOOL16 = 0x0331u, + T_32PBOOL16 = 0x0431u, + T_32PFBOOL16 = 0x0531u, + T_64PBOOL16 = 0x0631u, + + T_BOOL32 = 0x0032u, + T_PBOOL32 = 0x0132u, + T_PFBOOL32 = 0x0232u, + T_PHBOOL32 = 0x0332u, + T_32PBOOL32 = 0x0432u, + T_32PFBOOL32 = 0x0532u, + T_64PBOOL32 = 0x0632u, + + T_BOOL64 = 0x0033u, + T_PBOOL64 = 0x0133u, + T_PFBOOL64 = 0x0233u, + T_PHBOOL64 = 0x0333u, + T_32PBOOL64 = 0x0433u, + T_32PFBOOL64 = 0x0533u, + T_64PBOOL64 = 0x0633u, + + T_NCVPTR = 0x01F0u, + T_FCVPTR = 0x02F0u, + T_HCVPTR = 0x03F0u, + T_32NCVPTR = 0x04F0u, + T_32FCVPTR = 0x05F0u, + T_64NCVPTR = 0x06F0u + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvconst.h#L31 + enum class CallingConvention : uint8_t + { + NEAR_C = 0x00u, // near right to left pushu, caller pops stack + FAR_C = 0x01u, // far right to left pushu, caller pops stack + NEAR_PASCAL = 0x02u,// near left to right pushu, callee pops stack + FAR_PASCAL = 0x03u, // far left to right pushu, callee pops stack + NEAR_FAST = 0x04u, // near left to right push with regsu, callee pops stack + FAR_FAST = 0x05u, // far left to right push with regsu, callee pops stack + SKIPPED = 0x06u, // skipped (unused) call index + NEAR_STD = 0x07u, // near standard call + FAR_STD = 0x08u, // far standard call + NEAR_SYS = 0x09u, // near sys call + FAR_SYS = 0x0au, // far sys call + THISCALL = 0x0bu, // this call (this passed in register) + MIPSCALL = 0x0cu, // Mips call + GENERIC = 0x0du, // Generic call sequence + ALPHACALL = 0x0eu, // Alpha call + PPCCALL = 0x0fu, // PPC call + SHCALL = 0x10u, // Hitachi SuperH call + ARMCALL = 0x11u, // ARM call + AM33CALL = 0x12u, // AM33 call + TRICALL = 0x13u, // TriCore Call + SH5CALL = 0x14u, // Hitachi SuperH-5 call + M32RCALL = 0x15u, // M32R Call + CLRCALL = 0x16u, // clr call + INLINE = 0x17u, // Marker for routines always inlined and thus lacking a convention + NEAR_VECTOR = 0x18u,// near left to right push with regsu, callee pops stack + RESERVED = 0x19u // first unused call enumeration + + // Do NOT add any more machine specific conventions. This is to be used for + // calling conventions in the source only (e.g. __cdeclu, __stdcall). + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1049 + enum class MethodProperty : uint8_t + { + Vanilla = 0x00u, + Virtual = 0x01u, + Static = 0x02u, + Friend = 0x03u, + Intro = 0x04u, + PureVirt = 0x05u, + PureIntro = 0x06u + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1120 + struct TypeProperty + { + uint16_t packed : 1; // true if structure is packed + uint16_t ctor : 1; // true if constructors or destructors present + uint16_t ovlops : 1; // true if overloaded operators present + uint16_t isnested : 1; // true if this is a nested class + uint16_t cnested : 1; // true if this class contains nested types + uint16_t opassign : 1; // true if overloaded assignment (=) + uint16_t opcast : 1; // true if casting methods + uint16_t fwdref : 1; // true if forward reference (incomplete defn) + uint16_t scoped : 1; // scoped definition + uint16_t hasuniquename : 1; // true if there is a decorated name following the regular name + uint16_t sealed : 1; // true if class cannot be used as a base class + uint16_t hfa : 2; // CV_HFA_e + uint16_t intrinsic : 1; // true if class is an intrinsic type (e.g. __m128d) + uint16_t mocom : 2; // CV_MOCOM_UDe + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1142 + struct MemberAttributes + { + uint16_t access : 2; // access protection CV_access_t + uint16_t mprop : 3; // method properties CV_methodprop_t + uint16_t pseudo : 1; // compiler generated fcn and does not exist + uint16_t noinherit : 1; // true if class cannot be inherited + uint16_t noconstruct : 1; // true if class cannot be constructed + uint16_t compgenx : 1; // compiler generated fcn and does exist + uint16_t sealed : 1; // true if method cannot be overridden + uint16_t unused : 6; // unused + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1156 + struct FunctionAttributes + { + uint8_t cxxreturnudt : 1; // true if C++ style ReturnUDT + uint8_t ctor : 1; // true if func is an instance constructor + uint8_t ctorvbase : 1; // true if func is an instance constructor of a class with virtual bases + uint8_t unused : 5; // unused + }; + + struct RecordHeader + { + uint16_t size; // record length, not including this 2-byte field + TypeRecordKind kind; // record kind + }; + + struct LeafEasy + { + TypeRecordKind kind; // record kind + }; + + struct FieldList + { + TypeRecordKind kind; // record kind + union Data + { +#pragma pack(push, 1) + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2499 + struct + { + MemberAttributes attributes; // method attribute + uint32_t index; // type index of base class + union + { + PDB_FLEXIBLE_ARRAY_MEMBER(char, offset); // variable length offset of base within class + LeafEasy lfEasy; + }; + }LF_BCLASS; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2521 + struct + { + MemberAttributes attributes; // attribute + uint32_t index; // type index of direct virtual base class + uint32_t vbpIndex; // type index of virtual base pointer + PDB_FLEXIBLE_ARRAY_MEMBER(char, vbpOffset); // virtual base pointer offset from address point + } LF_VBCLASS, LF_IVBCLASS; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2483 + // index leaf - contains type index of another leaf + // a major use of this leaf is to allow the compilers to emit a + // long complex list (LF_FIELD) in smaller pieces. + struct + { + uint16_t pad0; // internal padding, must be 0 + uint32_t type; // type index of referenced leaf + } LF_INDEX; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2615 + struct + { + uint16_t pad0; // internal padding, must be 0. + uint32_t type; // type index of pointer + }LF_VFUNCTAB; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2683 + struct + { + MemberAttributes attributes; + union + { + PDB_FLEXIBLE_ARRAY_MEMBER(char, value); + LeafEasy lfEasy; + }; + } LF_ENUMERATE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2693 + struct + { + uint16_t pad0; // internal padding, must be 0 + uint32_t index; // index of nested type definition + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + }LF_NESTTYPE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2650 + struct + { + uint16_t count; // number of occurrences of function + uint32_t mList; // index to LF_METHODLIST record + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + }LF_METHOD; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2671 + struct + { + MemberAttributes attributes; // method attribute + uint32_t index; // index to type record for procedure + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, vbaseoff); // offset in vfunctable if + }LF_ONEMETHOD; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2580 + struct + { + MemberAttributes attributes; + uint32_t index; // type index of referenced leaf + union + { + PDB_FLEXIBLE_ARRAY_MEMBER(char, offset); + LeafEasy lfEasy; + }; + } LF_MEMBER; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2592 + struct + { + MemberAttributes attributes; + uint32_t index; // index of type record for field + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + }LF_STMEMBER; +#pragma pack(pop) + } data; + }; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2131 + struct MethodListEntry + { + MemberAttributes attributes; // method attribute + uint16_t pad0; // internal padding, must be 0 + uint32_t index; // index to type record for procedure + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, vbaseoff); // offset in vfunctable if virtual, empty otherwise. + }; + + // all CodeView records are stored as a header, followed by variable-length data. + // internal Record structs such as S_PUB32, S_GDATA32, etc. correspond to the data layout of a CodeView record of that kind. + struct Record + { + RecordHeader header; + union Data + { +#pragma pack(push, 1) + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2144 + struct + { + // This is actually a list of the MethodListEntry type above, but it has flexible + // size, so you need to manually iterate. + PDB_FLEXIBLE_ARRAY_MEMBER(char, mList); + } LF_METHODLIST; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1801 + struct + { + uint32_t rvtype; // type index of return value + uint32_t classtype; // type index of containing class + uint32_t thistype; // type index of this pointer (model specific) + uint8_t calltype; // calling convention (call_t) + FunctionAttributes funcattr; // attributes + uint16_t parmcount; // number of parameters + uint32_t arglist; // type index of argument list + int32_t thisadjust; // this adjuster (long because pad required anyway) + } LF_MFUNCTION; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1460 + struct + { + uint32_t type; // modified type + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1090 + struct + { + uint16_t MOD_const : 1; + uint16_t MOD_volatile : 1; + uint16_t MOD_unaligned : 1; + uint16_t MOD_unused : 13; + } attr; // modifier attribute modifier_t + } LF_MODIFIER; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1508 + struct + { + uint32_t utype; // type index of the underlying type + struct PointerAttributes + { + uint32_t ptrtype : 5; // ordinal specifying pointer type (CV_ptrtype_e) + uint32_t ptrmode : 3; // ordinal specifying pointer mode (CV_ptrmode_e) + uint32_t isflat32 : 1; // TRUE if 0:32 pointer + uint32_t isvolatile : 1; // TRUE if volatile pointer + uint32_t isconst : 1; // TRUE if const pointer + uint32_t isunaligned : 1; // TRUE if unaligned pointer + uint32_t isrestrict : 1; // TRUE if restricted pointer (allow agressive opts) + uint32_t size : 6; // size of pointer (in bytes) + uint32_t ismocom : 1; // TRUE if it is a MoCOM pointer (^ or %) + uint32_t islref : 1; // TRUE if it is this pointer of member function with & ref-qualifier + uint32_t isrref : 1; // TRUE if it is this pointer of member function with && ref-qualifier + uint32_t unused : 10; // pad out to 32-bits for following cv_typ_t's + } attr; + + union + { + struct + { + uint32_t pmclass; // index of containing class for pointer to member + uint16_t pmenum; // enumeration specifying pm format (CV_pmtype_e) + } pm; + + uint16_t bseg; // base segment if PTR_BASE_SEG + PDB_FLEXIBLE_ARRAY_MEMBER(uint8_t, Sym); // copy of base symbol record (including length) + + struct + { + uint32_t index; // type index if CV_PTR_BASE_TYPE + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); // name of base type + } btype; + } pbase; + } LF_POINTER; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1775 + struct + { + uint32_t rvtype; // type index of return value + CallingConvention calltype; // calling convention (CV_call_t) + FunctionAttributes funcattr; // attributes + uint16_t parmcount; // number of parameters + uint32_t arglist; // type index of argument list + } LF_PROCEDURE; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2043 + struct + { + uint32_t count; // number of arguments + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, arg); + } LF_ARGLIST; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2164 + struct + { + uint32_t type; + uint8_t length; + uint8_t position; + PDB_FLEXIBLE_ARRAY_MEMBER(char, data); + } LF_BITFIELD; + + struct + { + uint32_t elemtype; // type index of element type + uint32_t idxtype; // type index of indexing type + PDB_FLEXIBLE_ARRAY_MEMBER(char, data); // variable length data specifying size in bytes and name + } LF_ARRAY; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1631 + struct + { + uint16_t count; // count of number of elements in class + TypeProperty property; // property attribute field + uint32_t field; // type index of LF_FIELD descriptor list + uint32_t derived; // type index of derived from list if not zero + uint32_t vshape; // type index of vshape table for this class + union + { + PDB_FLEXIBLE_ARRAY_MEMBER(char, data); + LeafEasy lfEasy; + }; + } LF_CLASS; + + struct + { + uint16_t count; // count of number of elements in class + uint32_t property; // property attribute field + uint32_t field; // type index of LF_FIELD descriptor list + uint32_t derived; // type index of derived from list if not zero + uint32_t vshape; // type index of vshape table for this class + union + { + PDB_FLEXIBLE_ARRAY_MEMBER(char, data); + LeafEasy lfEasy; + }; + } LF_CLASS2; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1647 + struct + { + uint16_t count; // count of number of elements in class + TypeProperty property; // property attribute field + uint32_t field; // type index of LF_FIELD descriptor list + PDB_FLEXIBLE_ARRAY_MEMBER(char, data); + } LF_UNION; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L1752 + struct + { + uint16_t count; // count of number of elements in class + TypeProperty property; // property attribute field + uint32_t utype; // underlying type of the enum + uint32_t field; // type index of LF_FIELD descriptor list + PDB_FLEXIBLE_ARRAY_MEMBER(char, name); + } LF_ENUM; + + // https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h#L2112 + struct + { + FieldList list; + } LF_FIELD; +#pragma pack(pop) + } data; + }; + } + } +} diff --git a/Includes/raw_pdb/src/PDB_Types.cpp b/Includes/raw_pdb/src/PDB_Types.cpp new file mode 100644 index 0000000..66c5cea --- /dev/null +++ b/Includes/raw_pdb/src/PDB_Types.cpp @@ -0,0 +1,12 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#include "PDB_PCH.h" +#include "PDB_Types.h" + + +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L962 +const char PDB::SuperBlock::MAGIC[30u] = "Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53"; + +const uint32_t PDB::HashTableHeader::Signature = 0xffffffffu; +const uint32_t PDB::HashTableHeader::Version = 0xeffe0000u + 19990810u; diff --git a/Includes/raw_pdb/src/PDB_Types.h b/Includes/raw_pdb/src/PDB_Types.h new file mode 100644 index 0000000..39945fc --- /dev/null +++ b/Includes/raw_pdb/src/PDB_Types.h @@ -0,0 +1,167 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" + + +namespace PDB +{ + // emulating std::byte from C++17 to make the intention clear that we're dealing with untyped data in certain cases, without actually requiring C++17 + enum class Byte : unsigned char {}; + + // PDB files have the notion of "nil" pages, denoted by a special size + // https://github.com/microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L177 + const uint32_t NilPageSize = 0xffffffffu; + + // PDB files have the notion of a "nil" stream index + // https://github.com/microsoft/microsoft-pdb/blob/master/PDB/include/msf.h#L45 + const uint16_t NilStreamIndex = 0xffffu; + + // this matches the definition in guiddef.h, but we don't want to pull that in + struct GUID + { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; + }; + + static_assert(sizeof(GUID) == 16u, "Size mismatch."); + + // this matches the definition in winnt.h, but we don't want to pull that in + struct IMAGE_SECTION_HEADER + { + uint8_t Name[8]; + union + { + uint32_t PhysicalAddress; + uint32_t VirtualSize; + } Misc; + uint32_t VirtualAddress; + uint32_t SizeOfRawData; + uint32_t PointerToRawData; + uint32_t PointerToRelocations; + uint32_t PointerToLinenumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLinenumbers; + uint32_t Characteristics; + }; + + static_assert(sizeof(IMAGE_SECTION_HEADER) == 40u, "Size mismatch."); + + // https://llvm.org/docs/PDB/MsfFile.html#msf-superblock + struct PDB_NO_DISCARD SuperBlock + { + static const char MAGIC[30u]; + + char fileMagic[30u]; + char padding[2u]; + uint32_t blockSize; + uint32_t freeBlockMapIndex; // index of the free block map + uint32_t blockCount; // number of blocks in the file + uint32_t directorySize; // size of the stream directory in bytes + uint32_t unknown; + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, directoryBlockIndices); // indices of the blocks that make up the directory indices + }; + + // https://llvm.org/docs/PDB/PdbStream.html#stream-header + struct Header + { + enum class PDB_NO_DISCARD Version : uint32_t + { + VC2 = 19941610u, + VC4 = 19950623u, + VC41 = 19950814u, + VC50 = 19960307u, + VC98 = 19970604u, + VC70Dep = 19990604u, + VC70 = 20000404u, + VC80 = 20030901u, + VC110 = 20091201u, + VC140 = 20140508u + }; + + Version version; + uint32_t signature; + uint32_t age; + GUID guid; + }; + + // https://llvm.org/docs/PDB/PdbStream.html + struct NamedStreamMap + { + uint32_t length; + PDB_FLEXIBLE_ARRAY_MEMBER(char, stringTable); + + struct HashTableEntry + { + uint32_t stringTableOffset; + uint32_t streamIndex; + }; + }; + + // https://llvm.org/docs/PDB/HashTable.html + struct SerializedHashTable + { + struct Header + { + uint32_t size; + uint32_t capacity; + }; + + struct BitVector + { + uint32_t wordCount; + PDB_FLEXIBLE_ARRAY_MEMBER(uint32_t, words); + }; + }; + + // https://llvm.org/docs/PDB/PdbStream.html#pdb-feature-codes + enum class PDB_NO_DISCARD FeatureCode : uint32_t + { + VC110 = 20091201, + VC140 = 20140508, + + // https://github.com/microsoft/microsoft-pdb/blob/master/PDB/include/pdbcommon.h#L23 + NoTypeMerge = 0x4D544F4E, // "NOTM" + MinimalDebugInfo = 0x494E494D // "MINI", i.e. executable was linked with /DEBUG:FASTLINK + }; + + // header of the public stream, based on PSGSIHDR defined here: + // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h#L240 + struct PublicStreamHeader + { + uint32_t symHash; + uint32_t addrMap; + uint32_t thunkCount; + uint32_t sizeOfThunk; + uint16_t isectThunkTable; + uint16_t padding; + uint32_t offsetThunkTable; + uint16_t sectionCount; + uint16_t padding2; + }; + + // header of the hash tables used by the public and global symbol stream, based on GSIHashHdr defined here: + // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h#L62 + struct HashTableHeader + { + static const uint32_t Signature; + static const uint32_t Version; + + uint32_t signature; + uint32_t version; + uint32_t size; + uint32_t bucketCount; + }; + + // hash record, based on HRFile defined here: + // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h#L8 + struct HashRecord + { + uint32_t offset; // offset into the symbol record stream + uint32_t cref; + }; +} diff --git a/Includes/raw_pdb/src/PDB_Util.h b/Includes/raw_pdb/src/PDB_Util.h new file mode 100644 index 0000000..c722659 --- /dev/null +++ b/Includes/raw_pdb/src/PDB_Util.h @@ -0,0 +1,56 @@ +// Copyright 2011-2022, Molecular Matters GmbH +// See LICENSE.txt for licensing details (2-clause BSD License: https://opensource.org/licenses/BSD-2-Clause) + +#pragma once + +#include "Foundation/PDB_Macros.h" + + +namespace PDB +{ + // Converts a block index into a file offset, based on the block size of the PDB file + PDB_NO_DISCARD inline size_t ConvertBlockIndexToFileOffset(uint32_t blockIndex, uint32_t blockSize) PDB_NO_EXCEPT + { + // cast to size_t to avoid potential overflow in 64-bit + return static_cast(blockIndex) * static_cast(blockSize); + } + + // Calculates how many blocks are needed for a certain number of bytes + PDB_NO_DISCARD inline uint32_t ConvertSizeToBlockCount(uint32_t sizeInBytes, uint32_t blockSize) PDB_NO_EXCEPT + { + // integer ceil to account for non-full blocks + return static_cast((static_cast(sizeInBytes) + blockSize - 1u) / blockSize); + }; + + // Returns the actual size of the data associated with a CodeView record, not including the size of the header + template + PDB_NO_DISCARD inline uint32_t GetCodeViewRecordSize(const T* record) PDB_NO_EXCEPT + { + // the stored size includes the size of the 'kind' field, but not the size of the 'size' field itself + return record->header.size - sizeof(uint16_t); + } + + template + PDB_NO_DISCARD inline size_t GetNameLength(const Header& header, const T& record) PDB_NO_EXCEPT + { + // we can estimate the length of the string from the size of the record + const size_t estimatedLength = header.size - sizeof(uint16_t) - sizeof(T); + if (estimatedLength == 0u) + { + return estimatedLength; + } + + // we still need to account for padding after the string to find the real length + size_t nullTerminatorCount = 0u; + for (/* nothing */; nullTerminatorCount < estimatedLength; ++nullTerminatorCount) + { + if (record.name[estimatedLength - nullTerminatorCount - 1u] != '\0') + { + break; + } + } + + const size_t length = estimatedLength - nullTerminatorCount; + return length; + } +}