From 22fc522f5a46b61464975ea6301241a0b5342b81 Mon Sep 17 00:00:00 2001 From: Kenneth Giusti Date: Tue, 16 May 2023 16:14:26 -0400 Subject: [PATCH 1/3] ISSUE-1083: panic handler for router crashes Part 1: very basic stack unwind + register dump to stderr on crash To be done: - unit test! - display mappings for link shared libraries (e.g. proton...) - document how to use the output for crash debug Adds requirement for libunwind library. --- .github/workflows/build.yaml | 2 +- CMakeLists.txt | 1 + Containerfile | 10 +- cmake/Findlibunwind.cmake | 31 +++ include/qpid/dispatch/threading.h | 1 + packaging/skupper-router.spec.rpkg | 3 + router/CMakeLists.txt | 2 + router/src/main.c | 4 + router/src/panic.c | 332 +++++++++++++++++++++++++++++ run.py.in | 1 + src/CMakeLists.txt | 6 + src/posix/threading.c | 17 +- tests/thread_test.c | 13 +- 13 files changed, 412 insertions(+), 11 deletions(-) create mode 100644 cmake/Findlibunwind.cmake create mode 100644 router/src/panic.c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index b6edbc145..00871f31d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -846,7 +846,7 @@ jobs: dnf config-manager --set-enabled powertools dnf install --setopt=tsflags=nodocs --setopt=install_weak_deps=False -y epel-release 'dnf-command(copr)' 'dnf-command(builddep)' dnf copr enable -y clime/rpkg-util - dnf install --setopt=tsflags=nodocs --setopt=install_weak_deps=False -y git rpkg + dnf install --setopt=tsflags=nodocs --setopt=install_weak_deps=False -y git rpkg libunwind-devel - uses: actions/checkout@v3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 60b09e9c5..03136d5b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,6 +154,7 @@ find_package(LibWebSockets 4.0.1 REQUIRED) ## find_library(dw_lib dw DOC "libdw used to symbolize QD_MEMORY_DEBUG backtraces") +find_package(libunwind) # google benchmark tests are disabled by default OPTION(BUILD_BENCHMARKS "Enable building and running benchmarks with Google Benchmark" OFF) diff --git a/Containerfile b/Containerfile index a440bd33c..a3e9c3ff5 100644 --- a/Containerfile +++ b/Containerfile @@ -19,12 +19,13 @@ FROM registry.access.redhat.com/ubi9/ubi-minimal:latest as builder -RUN microdnf -y --setopt=install_weak_deps=0 --setopt=tsflags=nodocs install \ +RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \ + && microdnf -y --setopt=install_weak_deps=0 --setopt=tsflags=nodocs install \ rpm-build \ gcc gcc-c++ make cmake \ cyrus-sasl-devel openssl-devel libuuid-devel \ python3-devel python3-pip \ - libnghttp2-devel \ + libnghttp2-devel libunwind-devel \ wget tar patch findutils git libasan libubsan libtsan \ && microdnf clean all -y @@ -43,11 +44,12 @@ RUN tar zxpf /qpid-proton-image.tar.gz --one-top-level=/image && tar zxpf /skupp FROM registry.access.redhat.com/ubi9/ubi-minimal:latest # gdb and sanitizers are part of final image as they can be used as debug options for Skupper -RUN microdnf -y --setopt=install_weak_deps=0 --setopt=tsflags=nodocs install \ +RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm \ + && microdnf -y --setopt=install_weak_deps=0 --setopt=tsflags=nodocs install \ glibc \ cyrus-sasl-lib cyrus-sasl-plain cyrus-sasl-gssapi openssl \ python3 \ - libnghttp2 \ + libnghttp2 libunwind \ gdb libasan libubsan libtsan \ gettext hostname iputils \ shadow-utils \ diff --git a/cmake/Findlibunwind.cmake b/cmake/Findlibunwind.cmake new file mode 100644 index 000000000..a7e35f279 --- /dev/null +++ b/cmake/Findlibunwind.cmake @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Sets LIBUNWIND_LIBRARY to libunwind if it exists +# Sets LIBUNWIND_INCLUDE_DIRS to directory containing libunwind.h header + +find_library(LIBUNWIND_LIBRARY unwind DOC "libunwind is used to dump the stack on crash") + +find_path(LIBUNWIND_INCLUDE_DIRS libunwind.h + HINTS "${CMAKE_INSTALL_PREFIX}/include" + PATHS "/usr/include") + +if (NOT (LIBUNWIND_INCLUDE_DIRS AND LIBUNWIND_LIBRARY)) + message(STATUS "libunwind library not found: stack dump on crash disabled") +endif() diff --git a/include/qpid/dispatch/threading.h b/include/qpid/dispatch/threading.h index 53381be1f..4f7f33da5 100644 --- a/include/qpid/dispatch/threading.h +++ b/include/qpid/dispatch/threading.h @@ -65,5 +65,6 @@ sys_thread_t *sys_thread(const char *thread_name, void *(*run_function) (void *) void sys_thread_free(sys_thread_t *thread); void sys_thread_join(sys_thread_t *thread); sys_thread_t *sys_thread_self(void); +const char *sys_thread_name(sys_thread_t *thread); // use self if thread == 0 #endif diff --git a/packaging/skupper-router.spec.rpkg b/packaging/skupper-router.spec.rpkg index 3e741e41a..d73a5f5b2 100644 --- a/packaging/skupper-router.spec.rpkg +++ b/packaging/skupper-router.spec.rpkg @@ -42,6 +42,7 @@ %global proton_minimum_version 0.34.0 %global libwebsockets_minimum_version 3.0.1 %global libnghttp2_minimum_version 1.33.0 +%global libunwind_minimum_version 1.3.1 Name: skupper-router Version: {{{ git_dir_version }}} @@ -57,6 +58,7 @@ Requires: libwebsockets >= %{libwebsockets_minimum_version} Requires: libnghttp2 >= %{libnghttp2_minimum_version} Requires: cyrus-sasl-plain Requires: cyrus-sasl-gssapi +Requires: libunwind >= %{libunwind_minimum_version} BuildRequires: gcc BuildRequires: gcc-c++ @@ -67,6 +69,7 @@ BuildRequires: python3-devel BuildRequires: python3-setuptools BuildRequires: libwebsockets-devel >= %{libwebsockets_minimum_version} BuildRequires: libnghttp2-devel >= %{libnghttp2_minimum_version} +BuildRequires: libunwind-devel >= %{libunwind_minimum_version} # man pages --help BuildRequires: asciidoc BuildRequires: python3-qpid-proton >= %{proton_minimum_version} diff --git a/router/CMakeLists.txt b/router/CMakeLists.txt index fb7642a40..f6046544f 100644 --- a/router/CMakeLists.txt +++ b/router/CMakeLists.txt @@ -30,9 +30,11 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) ## set(router_SOURCES src/main.c + src/panic.c ) add_executable(skrouterd ${router_SOURCES}) target_link_libraries(skrouterd skupper-router) +target_link_options(skrouterd PUBLIC LINKER:-Map=skrouterd.map) install(TARGETS skrouterd RUNTIME DESTINATION sbin) diff --git a/router/src/main.c b/router/src/main.c index 76c29590e..156f91de9 100644 --- a/router/src/main.c +++ b/router/src/main.c @@ -39,6 +39,9 @@ static qd_dispatch_t *dispatch = 0; static qd_log_source_t *log_source = 0; static const char* argv0 = 0; +// Install the panic handler for fatal signals. see panic.c +extern void panic_handler_init(void); + /** * Configures the handler function. Specify SIG_IGN to ignore incoming signals. */ @@ -91,6 +94,7 @@ static void check(int fd) { static void main_process(const char *config_path, const char *python_pkgdir, bool test_hooks, int fd) { + panic_handler_init(); dispatch = qd_dispatch(python_pkgdir, test_hooks); check(fd); log_source = qd_log_source("MAIN"); /* Logging is initialized by qd_dispatch. */ diff --git a/router/src/panic.c b/router/src/panic.c new file mode 100644 index 000000000..7197517ac --- /dev/null +++ b/router/src/panic.c @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// +// Code for unwinding the stack and printing debug information when a non-recoverable signal occurs. +// + +#define _GNU_SOURCE // to get gettid() + +#include "config.h" + +#include "qpid/dispatch/threading.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LIBUNWIND +#define UNW_LOCAL_ONLY +#include +#endif + +static void panic_signal_handler(int signum, siginfo_t *siginfo, void *ucontext); + +// Define those signals which will be handled. +// +typedef struct { + int signal; + const char *name; +} panic_signal_info_t; + +// clang-format off +static const panic_signal_info_t panic_signals[] = { + { .signal = SIGABRT, .name = "SIGABRT" }, + { .signal = SIGBUS, .name = "SIGBUS" }, + { .signal = SIGFPE, .name = "SIGFPE" }, + { .signal = SIGILL, .name = "SIGILL" }, + { .signal = SIGSEGV, .name="SIGSEGV" }, + {0} +}; +// clang-format on + +/** + * Install the panic signal handler. This is done early in the router initialization so do not attempt to log or use the + * alloc pool, etc. This is not called during a signal handler so there is no need to avoid async signal unsafe calls. + */ +void panic_handler_init(void) +{ + if (getenv("SKUPPER_ROUTER_DISABLE_PANIC_HANDLER") == 0) { + struct sigaction sa = { + // use SA_RESETHAND since if the stack unwind fails the default signal handler (coredump) will be invoked + .sa_flags = SA_SIGINFO | SA_RESETHAND, + .sa_sigaction = panic_signal_handler, + }; + + sigemptyset(&sa.sa_mask); + + for (int i = 0; panic_signals[i].signal != 0; ++i) { + sigaction(panic_signals[i].signal, &sa, 0); + } + } +} + +// +// The remaining routines are invoked by a signal handler. They must not invoke any function that is not Async Signal +// Safe. See man signal(7) and man signal-safety(7). Ignore this sage advice at your on peril.. +// + +#define BACKTRACE_LIMIT 64 +#define BUFFER_SIZE 256 +static unsigned char buffer[BUFFER_SIZE]; + +// expected to be defined by the linker +// +extern char __executable_start[]; +extern char __etext[]; +extern char *program_invocation_name; + +// async signal safe +// +static void print(const char *str) +{ + size_t ignore = write(STDERR_FILENO, str, strlen((char *) str)); + (void) ignore; + fsync(STDERR_FILENO); +} + +// print a register as a hex string +static void print_reg(uintptr_t reg) +{ + int i = sizeof(reg) * 2; + unsigned char *ptr = &buffer[BUFFER_SIZE - 1]; + + *ptr = 0; + while (i--) { + uint8_t nybble = reg & 0x0F; + + *--ptr = nybble > 9 ? (nybble - 10) + 'a' : nybble + '0'; + reg >>= 4; + } + print((char *) ptr); +} + +// print a base-ten unsigned integer +static void print_uint(uintptr_t num) +{ + if (num == 0) + print("0"); + else { + unsigned char *ptr = &buffer[BUFFER_SIZE - 1]; + + *ptr = 0; + while (num > 0) { + *--ptr = (num % 10) + '0'; + num /= 10; + } + print((char *) ptr); + } +} + +#ifdef HAVE_LIBUNWIND + +static void print_libunwind_error(int err) +{ + print("ERROR: libunwind failed: "); + print(unw_strerror(err)); + print("\n"); +} + +static void print_registers(unw_cursor_t *cursor) +{ +#ifdef UNW_TARGET_X86_64 + unw_word_t rax, rbx, rcx, rdx, rdi, rsi, rbp, rsp, r8, r9, r10, r11, r12, r13, r14, r15; + + unw_get_reg(cursor, UNW_X86_64_RAX, &rax); + unw_get_reg(cursor, UNW_X86_64_RBX, &rbx); + unw_get_reg(cursor, UNW_X86_64_RCX, &rcx); + unw_get_reg(cursor, UNW_X86_64_RDX, &rdx); + unw_get_reg(cursor, UNW_X86_64_RDI, &rdi); + unw_get_reg(cursor, UNW_X86_64_RSI, &rsi); + unw_get_reg(cursor, UNW_X86_64_RBP, &rbp); + unw_get_reg(cursor, UNW_X86_64_RSP, &rsp); + unw_get_reg(cursor, UNW_X86_64_R8, &r8); + unw_get_reg(cursor, UNW_X86_64_R9, &r9); + unw_get_reg(cursor, UNW_X86_64_R10, &r10); + unw_get_reg(cursor, UNW_X86_64_R11, &r11); + unw_get_reg(cursor, UNW_X86_64_R12, &r12); + unw_get_reg(cursor, UNW_X86_64_R13, &r13); + unw_get_reg(cursor, UNW_X86_64_R14, &r14); + unw_get_reg(cursor, UNW_X86_64_R15, &r15); + + print(" Registers:\n"); + print(" RAX: 0x"); + print_reg(rax); + print(" RDI: 0x"); + print_reg(rdi); + print(" R11: 0x"); + print_reg(r11); + print("\n"); + + print(" RBX: 0x"); + print_reg(rbx); + print(" RBP: 0x"); + print_reg(rbp); + print(" R12: 0x"); + print_reg(r12); + print("\n"); + + print(" RCX: 0x"); + print_reg(rcx); + print(" R8: 0x"); + print_reg(r8); + print(" R13: 0x"); + print_reg(r13); + print("\n"); + + print(" RDX: 0x"); + print_reg(rdx); + print(" R9: 0x"); + print_reg(r9); + print(" R14: 0x"); + print_reg(r14); + print("\n"); + + print(" RSI: 0x"); + print_reg(rsi); + print(" R10: 0x"); + print_reg(r10); + print(" R15: 0x"); + print_reg(r15); + print("\n\n"); +#endif // UNW_TARGET_X86_64 +} + +static void print_backtrace(unw_context_t *context) +{ + unw_cursor_t cursor; + + int err = unw_init_local(&cursor, context); + + if (err) { + print_libunwind_error(err); + return; + } + + print("\nBacktrace:\n"); + + unw_word_t ip = {0}; + unw_word_t sp = {0}; + + for (int i = 0; i < BACKTRACE_LIMIT; i++) { + int ret = unw_step(&cursor); + + if (ret < 0) { + print_libunwind_error(ret); + break; + } + + if (ret == 0) { + break; + } + + unw_get_reg(&cursor, UNW_REG_IP, &ip); + unw_get_reg(&cursor, UNW_REG_SP, &sp); + + ptrdiff_t offset = ((char *) ip) - __executable_start; + print("["); + print_uint((uintptr_t) i); + print("] IP: 0x"); + print_reg(ip); + print(" offset: 0x"); + print_reg(offset); + print(" SP: 0x"); + print_reg(sp); + print("\n"); + print_registers(&cursor); + } +} + +#endif // HAVE_LIBUNWIND + +// signal handler +// +static void panic_signal_handler(int signum, siginfo_t *siginfo, void *ucontext) +{ + (void) siginfo; + (void) ucontext; + + print("\n*** SKUPPER-ROUTER FATAL ERROR ***\n"); // or "guru meditation error" (google it) + print("Version: "); + print(QPID_DISPATCH_VERSION); + print("\n"); + + print("Signal: "); + print_uint(signum); + for (int i = 0; panic_signals[i].name; ++i) { + if (panic_signals[i].signal == signum) { + print(" "); + print(panic_signals[i].name); + break; + } + } + print("\n"); + + // Process Info + + print("Process ID: "); + print_uint((uintptr_t) getpid()); + print(" ("); + print(program_invocation_name); + print(")\n"); + +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 29) + // Thread (note: is gettid() async-safe?) + + print("Thread ID: "); + print_uint((uintptr_t) gettid()); + print(" ("); + print(sys_thread_name(0)); + print(")\n"); +#endif + + // Text segment mapping + + print("Text segment start: 0x"); + print_reg((uintptr_t) __executable_start); + print(" end: 0x"); + print_reg((uintptr_t) __etext); + print("\n"); + +#ifdef HAVE_LIBUNWIND + + unw_context_t context; + + int err = unw_getcontext(&context); + if (err) { + print_libunwind_error(err); + return; + } + + // Registers + + // int index = print_registers(&context); + + // Backtrace + print_backtrace(&context); +#else + print("!!! libunwind not present: backtrace unavailable !!!\n"); +#endif + print("*** END ***\n"); +} diff --git a/run.py.in b/run.py.in index edcc3062c..92f3652c8 100755 --- a/run.py.in +++ b/run.py.in @@ -93,6 +93,7 @@ env_vars = { 'ASAN_OPTIONS': "${RUNTIME_ASAN_ENV_OPTIONS}", 'LSAN_OPTIONS': "${RUNTIME_LSAN_ENV_OPTIONS}", 'UBSAN_OPTIONS': "${RUNTIME_UBSAN_ENV_OPTIONS}", + 'SKUPPER_ROUTER_DISABLE_PANIC_HANDLER': 'YES', } os.environ.update(env_vars) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89765acd2..e8a4fb6b7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -174,6 +174,12 @@ endif() add_library(skupper-router OBJECT ${qpid_dispatch_SOURCES}) target_include_directories(skupper-router PRIVATE ${qpid_dispatch_INCLUDES}) + +if (LIBUNWIND_LIBRARY AND LIBUNWIND_INCLUDE_DIRS) + set(qpid_dispatch_LIBRARIES ${qpid_dispatch_LIBRARIES} ${LIBUNWIND_LIBRARY}) + target_compile_definitions(skupper-router PUBLIC "HAVE_LIBUNWIND") +endif() + target_link_libraries(skupper-router PRIVATE ${qpid_dispatch_LIBRARIES}) # check for various function availability diff --git a/src/posix/threading.c b/src/posix/threading.c index a3efee5d0..8db519c1c 100644 --- a/src/posix/threading.c +++ b/src/posix/threading.c @@ -136,6 +136,7 @@ struct sys_thread_t { pthread_t thread; void *(*f)(void *); void *arg; + char name[16]; }; // initialize the per-thread _self to a non-zero value. This dummy value will @@ -143,9 +144,8 @@ struct sys_thread_t { // of execution (which is not a pthread). Using a non-zero value provides a // way to distinguish a thread id from a zero (unset) value. // -static sys_thread_t _main_thread_id; -static __thread sys_thread_t *_self = &_main_thread_id; - +static sys_thread_t _main_thread_id = {.name = "main"}; +static __thread sys_thread_t *_self = &_main_thread_id; // bootstrap _self before calling thread's main function // @@ -163,16 +163,25 @@ sys_thread_t *sys_thread(const char *thread_name, void *(*run_function) (void *) sys_thread_t *thread = NEW(sys_thread_t); thread->f = run_function; thread->arg = arg; + strcpy(thread->name, thread_name); pthread_create(&(thread->thread), 0, _thread_init, (void*) thread); - pthread_setname_np(thread->thread, thread_name); + pthread_setname_np(thread->thread, thread->name); return thread; } +// note: called by signal handler, do not invoke async signal unsafe code in this function! See man signal(7) sys_thread_t *sys_thread_self(void) { return _self; } +// note: called by signal handler, do not invoke async signal unsafe code in this function! See man signal(7) +const char *sys_thread_name(sys_thread_t *thread) +{ + if (thread == 0) + thread = _self; + return thread->name; +} void sys_thread_free(sys_thread_t *thread) { diff --git a/tests/thread_test.c b/tests/thread_test.c index 3aa3ef139..d6b35fcf5 100644 --- a/tests/thread_test.c +++ b/tests/thread_test.c @@ -36,14 +36,18 @@ static sys_cond_t cond; static char *result; +#define THREAD_NAME_FMT "thrd %hu" -// for test_thread_id +// for test_thread_id. Note well: never set result to zero here or you'll lose the test failures! // void *thread_id_thread(void *arg) { intptr_t index = (intptr_t) arg; assert(index < thread_count); + char expected_name[16]; + snprintf(expected_name, 16, THREAD_NAME_FMT, (unsigned short) index); + sys_mutex_lock(&mutex); // check if self corresponds to my index in threads[] @@ -51,6 +55,8 @@ void *thread_id_thread(void *arg) result = "sys_thread_self returned zero!"; } else if (threads[index] != sys_thread_self()) { result = "sys_thread_self mismatch"; + } else if (strcmp(sys_thread_name(0), expected_name) != 0) { + result = "sys_thread_name mismatch"; } sys_mutex_unlock(&mutex); @@ -72,7 +78,7 @@ static char *test_thread_id(void *context) memset(threads, 0, sizeof(threads)); for (intptr_t i = 0; i < thread_count; ++i) { char thread_name[16]; - snprintf(thread_name, sizeof(thread_name), "tst_thrd %ld", i); + snprintf(thread_name, sizeof(thread_name), THREAD_NAME_FMT, (unsigned short) i); threads[i] = sys_thread(thread_name, thread_id_thread, (void *)i); } @@ -83,6 +89,9 @@ static char *test_thread_id(void *context) sys_thread_free(threads[i]); } + if (result) + return result; + // // test calling sys_thread_self() from the main context. This context // was not created by sys_thread(), however a dummy non-zero value is returned. From c283d48e9cf02b444d20d5d626eaea24b0162078 Mon Sep 17 00:00:00 2001 From: Kenneth Giusti Date: Fri, 19 May 2023 16:50:48 -0400 Subject: [PATCH 2/3] ISSUE-1083: add memory map offsets to stack dump --- router/src/panic.c | 175 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 138 insertions(+), 37 deletions(-) diff --git a/router/src/panic.c b/router/src/panic.c index 7197517ac..a5e88be6d 100644 --- a/router/src/panic.c +++ b/router/src/panic.c @@ -28,10 +28,15 @@ #include "qpid/dispatch/threading.h" #include +#include #include +#include +#include #include +#include #include #include +#include #include #include #include @@ -43,6 +48,10 @@ static void panic_signal_handler(int signum, siginfo_t *siginfo, void *ucontext); +// expected to be defined by the linker +// +extern char *program_invocation_name; + // Define those signals which will be handled. // typedef struct { @@ -61,6 +70,63 @@ static const panic_signal_info_t panic_signals[] = { }; // clang-format on +// Memory map including mapped file path +// +typedef struct { + uintptr_t base_address; + char path[PATH_MAX]; +} mem_map_t; + +#define MAX_MAP_ENTRIES 64 + +static mem_map_t mem_map[MAX_MAP_ENTRIES]; +static int mem_map_count = 0; + +// for sorting the map +// +static int map_compare(const void *arg1, const void *arg2) +{ + mem_map_t *entry1 = (mem_map_t *) arg1; + mem_map_t *entry2 = (mem_map_t *) arg2; + assert(entry1->base_address != entry2->base_address); + return entry1->base_address > entry2->base_address ? 1 : -1; +} + +static void lib_map_init(void) +{ + struct link_map *map = 0; + + void *handle = dlopen(NULL, RTLD_NOW); + if (handle) { + int rc = dlinfo(handle, RTLD_DI_LINKMAP, &map); + if (rc == 0) { + while (map) { + assert(mem_map_count < MAX_MAP_ENTIRIES); // update MAX_MAP_ENTRIES, too small! + + mem_map_t *entry = &mem_map[mem_map_count++]; + + entry->base_address = (uintptr_t) map->l_addr; + + // KAG: I'm not particularly sure what's going on here, but if map->l_name has strlen() == 0 apparently + // it means this executable + if (map->l_name) { + if (strlen(map->l_name) == 0) { + snprintf(entry->path, PATH_MAX, "%s", program_invocation_name); + } else { + snprintf(entry->path, PATH_MAX, "%s", map->l_name); + } + } else { + snprintf(entry->path, PATH_MAX, "?? UNKNOWN ??"); + } + map = map->l_next; + } + } + dlclose(handle); + } + + qsort(mem_map, mem_map_count, sizeof(mem_map[0]), map_compare); +} + /** * Install the panic signal handler. This is done early in the router initialization so do not attempt to log or use the * alloc pool, etc. This is not called during a signal handler so there is no need to avoid async signal unsafe calls. @@ -68,6 +134,7 @@ static const panic_signal_info_t panic_signals[] = { void panic_handler_init(void) { if (getenv("SKUPPER_ROUTER_DISABLE_PANIC_HANDLER") == 0) { + lib_map_init(); struct sigaction sa = { // use SA_RESETHAND since if the stack unwind fails the default signal handler (coredump) will be invoked .sa_flags = SA_SIGINFO | SA_RESETHAND, @@ -91,12 +158,6 @@ void panic_handler_init(void) #define BUFFER_SIZE 256 static unsigned char buffer[BUFFER_SIZE]; -// expected to be defined by the linker -// -extern char __executable_start[]; -extern char __etext[]; -extern char *program_invocation_name; - // async signal safe // static void print(const char *str) @@ -107,6 +168,8 @@ static void print(const char *str) } // print a register as a hex string +// async signal safe +// static void print_reg(uintptr_t reg) { int i = sizeof(reg) * 2; @@ -123,6 +186,8 @@ static void print_reg(uintptr_t reg) } // print a base-ten unsigned integer +// async signal safe +// static void print_uint(uintptr_t num) { if (num == 0) @@ -141,6 +206,28 @@ static void print_uint(uintptr_t num) #ifdef HAVE_LIBUNWIND +// given an address find it's mapping and offset +// async signal safe +// +static bool get_offset(uintptr_t address, uintptr_t *offset, const char **path) +{ + *offset = 0; + *path = 0; + + // map is sorted lowest address first + int index = mem_map_count; + while (index-- > 0) { + mem_map_t *entry = &mem_map[index]; + if (address >= entry->base_address) { + *offset = address - entry->base_address; + *path = entry->path; + return true; + } + } + + return false; +} + static void print_libunwind_error(int err) { print("ERROR: libunwind failed: "); @@ -209,8 +296,40 @@ static void print_registers(unw_cursor_t *cursor) print_reg(r10); print(" R15: 0x"); print_reg(r15); - print("\n\n"); + print("\n"); #endif // UNW_TARGET_X86_64 + + unw_word_t sp = {0}; + unw_get_reg(cursor, UNW_REG_SP, &sp); + + print(" SP: 0x"); + print_reg(sp); + print("\n\n"); +} + +static void print_stack_frame(int index, unw_cursor_t *cursor) +{ + unw_word_t ip = {0}; + + unw_get_reg(cursor, UNW_REG_IP, &ip); + + uintptr_t offset = 0; + const char *path = 0; + + print("["); + print_uint((uintptr_t) index); + print("] IP: 0x"); + print_reg(ip); + + if (get_offset((uintptr_t) ip, &offset, &path)) { + print(" ("); + print(path); + print(" + 0x"); + print_reg(offset); + print(")"); + } + print("\n"); + print_registers(cursor); } static void print_backtrace(unw_context_t *context) @@ -226,9 +345,6 @@ static void print_backtrace(unw_context_t *context) print("\nBacktrace:\n"); - unw_word_t ip = {0}; - unw_word_t sp = {0}; - for (int i = 0; i < BACKTRACE_LIMIT; i++) { int ret = unw_step(&cursor); @@ -241,20 +357,7 @@ static void print_backtrace(unw_context_t *context) break; } - unw_get_reg(&cursor, UNW_REG_IP, &ip); - unw_get_reg(&cursor, UNW_REG_SP, &sp); - - ptrdiff_t offset = ((char *) ip) - __executable_start; - print("["); - print_uint((uintptr_t) i); - print("] IP: 0x"); - print_reg(ip); - print(" offset: 0x"); - print_reg(offset); - print(" SP: 0x"); - print_reg(sp); - print("\n"); - print_registers(&cursor); + print_stack_frame(i, &cursor); } } @@ -303,13 +406,18 @@ static void panic_signal_handler(int signum, siginfo_t *siginfo, void *ucontext) // Text segment mapping - print("Text segment start: 0x"); - print_reg((uintptr_t) __executable_start); - print(" end: 0x"); - print_reg((uintptr_t) __etext); - print("\n"); +#ifndef HAVE_LIBUNWIND + print("Memory Map:\n"); + for (int i = 0; i < mem_map_count; ++i) { + print_reg(mem_map[i].base_address); + print(": "); + print(mem_map[i].path); + print("\n"); + } -#ifdef HAVE_LIBUNWIND + print("!!! libunwind not present: backtrace unavailable !!!\n"); + +#else // HAVE_LIBUNWIND unw_context_t context; @@ -319,14 +427,7 @@ static void panic_signal_handler(int signum, siginfo_t *siginfo, void *ucontext) return; } - // Registers - - // int index = print_registers(&context); - - // Backtrace print_backtrace(&context); -#else - print("!!! libunwind not present: backtrace unavailable !!!\n"); #endif print("*** END ***\n"); } From 8d1e04740ce0d144dedb7f550922c0ddfbf76053 Mon Sep 17 00:00:00 2001 From: Kenneth Giusti Date: Mon, 22 May 2023 09:51:09 -0400 Subject: [PATCH 3/3] fixup: Jiri's feedback and assert typo --- router/src/panic.c | 3 +-- run.py.in | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/router/src/panic.c b/router/src/panic.c index a5e88be6d..6612c92aa 100644 --- a/router/src/panic.c +++ b/router/src/panic.c @@ -101,7 +101,7 @@ static void lib_map_init(void) int rc = dlinfo(handle, RTLD_DI_LINKMAP, &map); if (rc == 0) { while (map) { - assert(mem_map_count < MAX_MAP_ENTIRIES); // update MAX_MAP_ENTRIES, too small! + assert(mem_map_count < MAX_MAP_ENTRIES); // update MAX_MAP_ENTRIES, too small! mem_map_t *entry = &mem_map[mem_map_count++]; @@ -136,7 +136,6 @@ void panic_handler_init(void) if (getenv("SKUPPER_ROUTER_DISABLE_PANIC_HANDLER") == 0) { lib_map_init(); struct sigaction sa = { - // use SA_RESETHAND since if the stack unwind fails the default signal handler (coredump) will be invoked .sa_flags = SA_SIGINFO | SA_RESETHAND, .sa_sigaction = panic_signal_handler, }; diff --git a/run.py.in b/run.py.in index 92f3652c8..edcc3062c 100755 --- a/run.py.in +++ b/run.py.in @@ -93,7 +93,6 @@ env_vars = { 'ASAN_OPTIONS': "${RUNTIME_ASAN_ENV_OPTIONS}", 'LSAN_OPTIONS': "${RUNTIME_LSAN_ENV_OPTIONS}", 'UBSAN_OPTIONS': "${RUNTIME_UBSAN_ENV_OPTIONS}", - 'SKUPPER_ROUTER_DISABLE_PANIC_HANDLER': 'YES', } os.environ.update(env_vars)