diff --git a/coredump_parser/Makefile b/coredump_parser/Makefile new file mode 100644 index 0000000..c475191 --- /dev/null +++ b/coredump_parser/Makefile @@ -0,0 +1,21 @@ +# +# Makefile for Phoenix-RTOS coredump parser +# +# Copyright 2025 Phoenix Systems +# + +NAME := coredump_parser +LOCAL_DIR := $(call my-dir) +SRCS := $(wildcard $(LOCAL_DIR)*.cpp) +LOCAL_LDLIBS := -lstdc++ +SCRIPTS := $(wildcard $(LOCAL_DIR)*.py) + + +include $(binary.mk) + + +$(NAME)-scripts: + mkdir -p $(PREFIX_BUILD)/scripts + cp $(SCRIPTS) $(PREFIX_BUILD)/scripts + +ALL_COMPONENTS += $(NAME)-scripts \ No newline at end of file diff --git a/coredump_parser/gdb-phoenix-core.py b/coredump_parser/gdb-phoenix-core.py new file mode 100644 index 0000000..24e2a96 --- /dev/null +++ b/coredump_parser/gdb-phoenix-core.py @@ -0,0 +1,223 @@ +# Phoenix-RTOS +# +# Coredump parser - gdb script +# +# Copyright 2025 Phoenix Systems +# Authors: Jakub Klimek +# +# This file is part of Phoenix-RTOS. +# +# %LICENSE% +# + +from elftools.elf.elffile import ELFFile +from elftools.elf.constants import SH_FLAGS +from itertools import pairwise +from pathlib import Path +import argparse +import struct +import sys +import os +import subprocess +import tempfile + +NT_LMA = 0x00414D4C + +COREDUMP_PARSER_PATH = (Path(__file__).parent.parent / "prog" / "coredump_parser").absolute() +COREDUMP_PARSER_OUTPUT = Path(tempfile.gettempdir()) / "phoenix-coredumps" + +os.makedirs(COREDUMP_PARSER_OUTPUT, exist_ok=True) + + +def has_lma_note(elffile): + for segment in elffile.iter_segments(): + if segment['p_type'] != 'PT_NOTE': + continue + for note in segment.iter_notes(): + if note['n_type'] == NT_LMA: + return True + return False + +def parse_core_file(elffile): + for segment in elffile.iter_segments(): + if segment['p_type'] != 'PT_NOTE': + continue + for note in segment.iter_notes(): + if note['n_type'] != NT_LMA: + continue + values = struct.unpack(f'<{len(note["n_desc"]) // 4}I', note['n_desc']) + res = {} + for lma, vma in pairwise(values): + res[vma] = lma + return res + return {} + +def parse_symbol_file(elffile): + section_to_segment_mapping = {} + + for section in elffile.iter_sections(): + if not (section['sh_flags'] & SH_FLAGS.SHF_ALLOC) or section['sh_size'] == 0: + continue + + sec_start_addr = section['sh_addr'] + sec_end_addr = sec_start_addr + section['sh_size'] + + for segment in elffile.iter_segments(): + if segment['p_type'] != 'PT_LOAD': + continue + seg_start_addr = segment['p_vaddr'] + seg_end_addr = seg_start_addr + segment['p_memsz'] + + if sec_start_addr >= seg_start_addr and sec_end_addr <= seg_end_addr: + section_to_segment_mapping[section.name] = { + 'seg_addr': seg_start_addr, + 'offset': section['sh_addr'] - seg_start_addr, + } + break + + return section_to_segment_mapping + +def create_mapping_args(core_file, symbol_file): + core_segment_mapping = parse_core_file(core_file) + elf_section_mapping = parse_symbol_file(symbol_file) + args = [] + for section_name, section_info in elf_section_mapping.items(): + if section_info['seg_addr'] not in core_segment_mapping: + print(f"Warning: Segment containing '{section_name}' not found in core NT_FILE mapping.", file=sys.stderr) + if section_name == '.text': + print("Warning: No valid segment mapping found for '.text' section!", file=sys.stderr) + return [] + continue + new_addr = core_segment_mapping[section_info['seg_addr']] + section_info['offset'] + args.append(f"-s {section_name} {new_addr:#x}") + if len(args) == 0: + print("Error: No valid segment mappings found.", file=sys.stderr) + return args + +def run_parser(coredump=None, symbolfile=None): + arglist = [COREDUMP_PARSER_OUTPUT] + if symbolfile: arglist.append(Path(symbolfile).name) + try: + if coredump: + with open(coredump, "rb") as f: + result = subprocess.run([COREDUMP_PARSER_PATH, *arglist], check=True, stdin=f, stdout=subprocess.PIPE) + return result.stdout.decode('utf-8').strip() + else: + result = subprocess.run([COREDUMP_PARSER_PATH, *arglist], check=True, stdout=subprocess.PIPE) + return result.stdout.decode('utf-8').strip() + except subprocess.CalledProcessError as e: + print(f"Running coredump_parser failed with exit status: {e.returncode}", file=sys.stderr) + return [] + +def generate_gdb_commands(args): + """ + If symbols is None, only corefile will be loaded + If core is None, corefile will be loaded from stdin + """ + coreelf = None + symbolelf = None + corefile = None + symbolfile = None + + try: + corefile = open(args.core, 'rb') + coreelf = ELFFile(corefile) + except Exception as e: + args.core = run_parser(args.core, args.symbol) + if args.core: + print(f"Using core file '{args.core}' with symbol file '{args.symbol}'.", file=sys.stderr) + corefile = open(args.core, 'rb') + coreelf = ELFFile(corefile) + + if args.symbol: + symbolfile = open(args.symbol, 'rb') + symbolelf = ELFFile(symbolfile) + + if not coreelf: + print("No core file found.", file=sys.stderr) + if corefile: corefile.close() + if symbolfile: symbolfile.close() + return [] + + commands = [] + if has_lma_note(coreelf): + if not symbolelf: + print("Warning: NOMMU coreelf detected, but no symbol file found. Symbols won't be resolved properly when loading symbols separately!", file=sys.stderr) + else: + mapping_args = create_mapping_args(coreelf, symbolelf) + commands.append(f"symbol-file") # clear previous + commands.append(f"exec-file") # clear previous + commands.append(f"add-symbol-file {args.symbol} {' '.join(mapping_args)}") + commands.append(f"exec-file {args.symbol} {' '.join(mapping_args)}") + elif symbolelf: + commands.append(f"symbol-file") # clear previous + commands.append(f"exec-file") # clear previous + commands.append(f"add-symbol-file {args.symbol}") + commands.append(f"exec-file {args.symbol}") + + commands.append(f"core-file {args.core}") + if corefile: corefile.close() + if symbolfile: symbolfile.close() + return commands + +def parse_args(args): + parser = argparse.ArgumentParser(add_help=True) + parser.add_argument("-c", "--core", help="Path to the core file or to text containing dump (by default stdin will be used)") + parser.add_argument("-s", "--symbol", "--sym", help="Path to the symbol file (by default in gdb currently loaded symbol file will be used)") + try: + args = parser.parse_args(args) + except SystemExit: + return {"help": True} + return args + +is_in_gdb = True +try: + import gdb +except ImportError: + is_in_gdb = False + +if is_in_gdb: + class PhoenixCoreCommand(gdb.Command): + """ + Auto-load Phoenix core file and offset symbols on NOMMU targets. + Use `phoenix-load --help` for more information. + """ + + def __init__(self): + super(PhoenixCoreCommand, self).__init__("phoenix-core", gdb.COMMAND_USER) + self.target_arch = None + + def invoke(self, arg, from_tty): + args = parse_args(gdb.string_to_argv(arg)) + if "help" in args: + return + if not args.symbol: + try: + if gdb.objfiles(): + args.symbol = gdb.objfiles()[0].filename + except gdb.error as e: + print(f"Phoenix-core: Note: Could not determine current GDB symbol file:", e, file=sys.stderr) + commands = generate_gdb_commands(args) + for command in commands: + gdb.execute(command) + def complete(self, text, word): + options = ["-c", "--core", "-s", "--symbol", "--sym", "-h", "--help"] + if word and word.startswith("-"): + return [opt for opt in options if opt.startswith(word)] + return gdb.COMPLETE_FILENAME + + + PhoenixCoreCommand() + +elif __name__ == "__main__": + args = parse_args(sys.argv[1:]) + if "help" in args: + exit(0) + commands = generate_gdb_commands(args) + if commands: + print("\n\n# Generated GDB commands:\n") + for cmd in commands: + print(cmd) + print() + else: + print("No commands generated.", file=sys.stderr) diff --git a/coredump_parser/parser.cpp b/coredump_parser/parser.cpp new file mode 100644 index 0000000..61fe588 --- /dev/null +++ b/coredump_parser/parser.cpp @@ -0,0 +1,319 @@ +/* + * Phoenix-RTOS + * + * Coredump parser - text dump parser + * + * Copyright 2025 Phoenix Systems + * + * Author: Jakub Klimek + * + * This file is part of Phoenix-RTOS. + * + * %LICENSE% + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum class ParseStatus : int { + Success = 0, + EndOfFileReached = -1, + B64Invalid = -2, + RleBroken = -3, + CrcInvalid = -4, + CoredumpCorrupted = -5, +}; + +const uint32_t CRC32POLY_LE = 0xedb88320; + +const size_t MAX_VARINT_COUNT = 0x40000000; /* 1 GB */ + +typedef struct { + std::string process_name; + std::string exception; + uint32_t crc32; +} additional_info; + + +int b64_index(char c) +{ + if ('A' <= c && c <= 'Z') + return c - 'A'; + if ('a' <= c && c <= 'z') + return c - 'a' + 26; + if ('0' <= c && c <= '9') + return c - '0' + 52; + if (c == '+') + return 62; + if (c == '/') + return 63; + return -1; +} + +ParseStatus read_b64(std::vector &decoded) +{ + std::string line; + uint32_t buf = 0; + int bits = 0; + + while (std::getline(std::cin, line)) { + if (line.find("_COREDUMP_END_") != std::string::npos) { + break; + } + std::istringstream iss(line); + + char c; + while (iss >> std::ws >> c) { + if (c == '=') + break; + int val = b64_index(c); + if (val < 0) { + std::cerr << "Error: Invalid base64 character: " << c << std::endl; + return ParseStatus::B64Invalid; + } + + buf = (buf << 6) | val; + bits += 6; + + if (bits >= 8) { + bits -= 8; + decoded.push_back((buf >> bits) & 0xFF); + } + } + } + + return ParseStatus::Success; +} + +ParseStatus decode_rle(std::vector &rle_encoded, + std::vector &decoded) +{ + size_t len = rle_encoded.size(); + + for (size_t i = 0; i < len;) { + uint8_t byte = rle_encoded[i++]; + if (byte == 0xFE) { + /* Decode varint */ + size_t count = 0; + int shift = 0; + while (i < len) { + uint8_t b = rle_encoded[i++]; + count |= (b & 0x7F) << shift; + if (!(b & 0x80)) + break; + shift += 7; + } + + if (count > MAX_VARINT_COUNT) { + std::cerr << "Error: Varint count exceeds maximum allowed value: " << count << " repeated bytes at position " << i << "/" << len << ". Continue parsing? (y/N): "; + char response; + std::cin.get(response); + if (response != 'y' && response != 'Y') { + return ParseStatus::RleBroken; + } + } + if (i < len) { + uint8_t val = rle_encoded[i++]; + for (size_t j = 0; j < count; j++) { + decoded.push_back(val); + } + } + else { + std::cerr << "Error: Unexpected end of data during RLE decoding." + << std::endl; + return ParseStatus::RleBroken; + } + } + else { + decoded.push_back(byte); + } + } + return ParseStatus::Success; +} + +ParseStatus check_crc(std::vector &data, uint32_t expected_crc32) +{ + uint32_t crc32 = -1; + for (size_t i = 0; i < data.size(); ++i) { + crc32 = (crc32 ^ (data[i] & 0xFF)); + for (int j = 0; j < 8; j++) { + crc32 = (crc32 >> 1) ^ ((crc32 & 1) ? CRC32POLY_LE : 0); + } + } + crc32 = ~crc32; + if (crc32 != expected_crc32) { + std::cerr << "Error: CRC32 mismatch!" << std::endl; + std::cerr << "Calculated: " << std::hex << crc32 << std::endl; + std::cerr << "Found: " << std::hex << expected_crc32 << std::dec << std::endl; + std::cerr << "Do you want to continue? (y/N): "; + char response; + std::cin.get(response); + if (response != 'y' && response != 'Y') { + return ParseStatus::CrcInvalid; + } + } + return ParseStatus::Success; +} + +ParseStatus read_decode(std::vector &res, uint32_t &crc32) +{ + + std::vector rle_encoded; + ParseStatus ret = read_b64(rle_encoded); + if (ret != ParseStatus::Success) { + return ret; + } + ret = decode_rle(rle_encoded, res); + if (ret != ParseStatus::Success) { + return ret; + } + if (res.size() < sizeof(crc32)) { + std::cerr << "Error: Data too short!" << std::endl; + return ParseStatus::CoredumpCorrupted; + } + + memcpy(&crc32, &res.data()[res.size() - sizeof(crc32)], sizeof(crc32)); + res.resize(res.size() - sizeof(crc32)); + if (res[EI_DATA] == ELFDATA2MSB) { + crc32 = be32toh(crc32); + } + ret = check_crc(res, crc32); + return ret; +} + +ParseStatus watch_stdin(std::vector &data, additional_info &info) +{ + std::string line; + + while (std::getline(std::cin, line)) { + if (line.find("_COREDUMP_START_") != std::string::npos) { + data.clear(); + + if (!std::getline(std::cin, line, ':')) { + std::cerr << "Error: Missing first line with process and exception!" + << std::endl; + return ParseStatus::CoredumpCorrupted; + } + line.erase(std::remove(line.begin(), line.end(), '\n'), line.end()); + info.process_name = line; + + if (!std::getline(std::cin, line, ';')) { + std::cerr << "Error: Missing first line with process and exception!" + << std::endl; + return ParseStatus::CoredumpCorrupted; + } + line.erase(std::remove(line.begin(), line.end(), '\n'), line.end()); + info.exception = line; + + return read_decode(data, info.crc32); + } + } + return ParseStatus::EndOfFileReached; +} + +ParseStatus parse_dump(std::vector &mem_data, std::string output_file) +{ + FILE *ofs = fopen(output_file.c_str(), "wb"); + if (!ofs) { + std::cerr << "Error: Unable to open output file." << std::endl; + return ParseStatus::CoredumpCorrupted; + } + + fwrite(mem_data.data(), mem_data.size(), 1, ofs); + + std::cerr << "Total bytes written to " << output_file << ": " + << mem_data.size() << std::endl; + + fclose(ofs); + return ParseStatus::Success; +} + +std::filesystem::path get_output_path(std::filesystem::path output_dir, + additional_info info) +{ + int i = 0; + std::string process = std::filesystem::path(info.process_name).filename().string(); + std::filesystem::path output_file = + output_dir / (process + "." + std::to_string(info.crc32) + "." + std::to_string(i) + ".core"); + while (std::filesystem::exists(output_file)) { + ++i; + output_file = + output_dir / (process + "." + std::to_string(info.crc32) + "." + std::to_string(i) + ".core"); + } + return output_file; +} + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + std::cerr << "Error: Invalid number of arguments." << std::endl; + std::cerr << "Usage: " << argv[0] << " [expected process name]" << std::endl; + return 1; + } + std::filesystem::path output_dir = argv[1]; + if (!std::filesystem::exists(output_dir)) { + std::cerr << "Error: Output directory does not exist: " << output_dir + << std::endl; + return 1; + } + + std::string expected_process_name; + if (argc > 2) { + expected_process_name = argv[2]; + } + + std::optional first_output_file; + + std::cerr << "Watching stdin for data..." << std::endl; + std::vector mem_data; + additional_info info; + while (true) { + ParseStatus ret = watch_stdin(mem_data, info); + if (ret == ParseStatus::EndOfFileReached) { + std::cerr << "EOF reached." << std::endl; + break; + } + + if (ret != ParseStatus::Success) { + std::cerr << "Failed to decode coredump" << std::endl; + continue; + } + + std::cerr << "\n\nParsing coredump for process: " << info.process_name + << " (Exception: " << info.exception << ")" << std::endl; + auto output_file = get_output_path(output_dir, info); + parse_dump(mem_data, output_file); + if (expected_process_name.empty() || + std::filesystem::path(info.process_name).filename().string() == + expected_process_name) { + std::cout << output_file.string() << std::endl; + return 0; + } + if (!first_output_file.has_value()) { + first_output_file = output_file; + } + } + if (first_output_file.has_value()) { + if (!expected_process_name.empty()) { + std::cerr << "No process matched the expected name: " + << expected_process_name + << " using first found coredump file." + << std::endl; + } + std::cout << first_output_file->string() << std::endl; + return 0; + } + else { + std::cerr << "No valid coredump found." << std::endl; + return 1; + } +} diff --git a/coredump_parser/readme.md b/coredump_parser/readme.md new file mode 100644 index 0000000..b5976e5 --- /dev/null +++ b/coredump_parser/readme.md @@ -0,0 +1,71 @@ +# Coredump Parser + +## Debugging using coredump + +### Preparation + +Install a suitable GDB (e.g., `gdb-multiarch` from apt) + +Install required python packages: `sudo apt install python3-pyelftools` + +Run Phoenix build for host-generic-pc target +``` +TARGET=host-generic-pc ./phoenix-rtos-build/build.sh all +``` + +### GDB usage + +Add loading script to `.gdbinit`: + +```bash + source /_build/host-generic-pc/scripts/gdb-phoenix-core.py +``` + +Alternatively, use gdb parameter *\--command=\/_build/host-generic-pc/scripts/gdb-phoenix-core.py* + +Now, inside GDB, there will be `phoenix-core` command available, allowing you to load coredump, core elf and symbols. + +```bash +usage: phoenix-core [-h] [-c CORE] [-s SYMBOL] + +options: + -h, --help show this help message and exit + -c CORE, --core CORE Path to the core file or to text containing dump (by default stdin will be used) + -s SYMBOL, --symbol SYMBOL, --sym SYMBOL + Path to the symbol file (by default in gdb currently loaded symbol file will be used) +``` + +Example usages (with script added to `.gdbinit`): + +```bash +# gdb-multiarch _build/ia32-generic-qemu/prog/psh +... +(gdb) phoenix-core -c + + +(gdb) ph -c dump.txt -s _build/ia32-generic-qemu/prog/test_program +``` + +Example with elf core file (look at Tracing Guide to obtain file from QEMU image): + +```bash +(gdb) phoenix-core -c test_program.153 -s _build/ia32-generic-qemu/prog/test_program +``` + + +### GDB usage without builtin Python interpreter + +In case your GDB installation doesn't support python interpreter, you can run the script directly from command line and forward stdout to gdb input. + +It will output commands that you can run inside your GDB, eg: +``` +{ python3 /_build/host-generic-pc/scripts/gdb-phoenix-core.py -c -s ; cat; } | gdb-multiarch +``` + + +## Manual Parser Usage (without Python script) +In case you want to manually parse coredump into elf file, use binary `_build/host-generic-pc/prog/coredump_parser ` with text dump on standard input. An ELF core file will be created in the specified directory if the dump is valid. + +You can load this core file in GDB manually using `--core` argument or `core-file` command. + +Please note that `core-file` will not work for NOMMU targets, because load addresses are unmatched with symbol elf virtual addresses.