diff --git a/trace/.gitignore b/trace/.gitignore new file mode 100644 index 0000000..450e636 --- /dev/null +++ b/trace/.gitignore @@ -0,0 +1,4 @@ +proto +perfetto_trace_pb2.py +tmp +__pycache__ diff --git a/trace/collect_rtt_trace.py b/trace/collect_rtt_trace.py new file mode 100755 index 0000000..1b0af7e --- /dev/null +++ b/trace/collect_rtt_trace.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# +# Phoenix-RTOS +# +# Trace-over-RTT collector - runs OpenOCD with a given config and collects +# data from its RTT channel sockets +# +# NOTE: Assumes the config makes the OpenOCD expose the channels as follows: +# RTT_PORT_BASE + 2 * K -> meta_channelK +# RTT_PORT_BASE + 2 * K + 1 -> event_channelK +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch + +import os +import sys +import socket +import selectors +import errno +import time +import subprocess + +RTT_PORT_BASE = 18023 + + +class TraceOverRTTCollector: + cores = [] + + def connect_sockets(self, rtt_port_base): + retries = 5 + while True: + try: + meta_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + meta_sock.connect(("localhost", rtt_port_base)) + meta_sock.setblocking(False) + + events_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + events_sock.connect(("localhost", rtt_port_base + 1)) + events_sock.setblocking(False) + + rtt_port_base += 2 + self.cores.append({"meta_sock": meta_sock, "events_sock": events_sock}) + except (OSError) as e: + if e.errno == errno.ECONNREFUSED: + if len(self.cores) == 0: + if retries == 0: + print("Unable to connect to any RTT channel socket. Is OpenOCD configured correctly?") + raise + time.sleep(0.05) + retries -= 1 + continue + print(f"Connected to {2 * len(self.cores)} channels") + return + else: + raise + + def open_channel_files(self, output_dir): + os.makedirs(output_dir, exist_ok=True) + print(f"Saving traces to {os.path.realpath(output_dir)}") + + for (i, core) in enumerate(self.cores): + meta_file = open(os.path.join( + output_dir, f"channel_meta{i}"), "wb") + events_file = open(os.path.join( + output_dir, f"channel_event{i}"), "wb") + core["meta_file"] = meta_file + core["events_file"] = events_file + + def close_channel_files(self): + for core in self.cores: + core["meta_file"].close() + core["events_file"].close() + print("Files closed") + + wrote = dict() + total = 0 + + def read_from_socket(self, conn, mask, file): + BUF_SIZE = 1024 + while True: + try: + data = conn.recv(BUF_SIZE) + except (OSError) as e: + if e.errno == errno.EAGAIN: + break + else: + raise + else: + if not data: + break + file.write(data) + self.wrote[file.name] += len(data) + self.total += len(data) + + def init_stats(self): + for core in self.cores: + self.wrote[core["meta_file"].name] = 0 + self.wrote[core["events_file"].name] = 0 + + def register_sockets(self): + sel = selectors.DefaultSelector() + for core in self.cores: + sel.register(core["events_sock"], selectors.EVENT_READ, + (self.read_from_socket, core["events_file"])) + sel.register(core["meta_sock"], selectors.EVENT_READ, + (self.read_from_socket, core["meta_file"])) + return sel + + def poll(self, sel): + try: + print("Ready to gather events. Do ^C when the trace has finished") + + last = time.time() + last_total = self.total + rate_kbps = 0 + status_printed = False + + while True: + events_sock = sel.select() + for key, mask in events_sock: + (callback, file) = key.data + callback(key.fileobj, mask, file) + + now = time.time() + if now - last > 0.1: + rate_kbps = ((self.total - last_total) / + (now - last)) / 1024 + last = now + last_total = self.total + + if status_printed: + for _ in range(len(self.wrote) + 1): + sys.stdout.write("\x1b[1A\x1b[2K") + + print(f"Rate: {rate_kbps:.2f} KB/s") + for (filename, w) in self.wrote.items(): + print(f"{os.path.basename(filename)}: { + w / 1024:.2f} KB ") + status_printed = True + except KeyboardInterrupt: + print("") + + def run(self, ocd_config, output_dir): + p = subprocess.Popen(["openocd", "-f", ocd_config]) + print("OpenOCD started") + try: + self.connect_sockets(RTT_PORT_BASE) + self.open_channel_files(output_dir) + try: + self.init_stats() + sel = self.register_sockets() + self.poll(sel) + finally: + self.close_channel_files() + finally: + p.terminate() + print("OpenOCD stopped") + + +def main(): + if len(sys.argv) != 3: + print(f"usage: {sys.argv[0]} OPENOCD_CONFIG OUTPUT_DIR") + sys.exit(1) + + ocd_config = sys.argv[1] + output_dir = sys.argv[2] + + c = TraceOverRTTCollector() + c.run(ocd_config, output_dir) + + +if __name__ == "__main__": + main() diff --git a/trace/convert.sh b/trace/convert.sh new file mode 100755 index 0000000..20876d4 --- /dev/null +++ b/trace/convert.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# +# Utility script for converting CTF trace to perfetto protobuf +# +# Requires: babeltrace2 python3-bt2 protobuf-compiler python3-protobuf +# +# Usage: ./convert.sh CTF_DIR_PATH METADATA_PATH OUTPUT [OPTIONS] +# Options: +# -t - run perfetto in browser with local trace processing acceleration (may +# be useful for large traces) +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch + +if [ "$#" -eq 0 ]; then + echo "Usage: ./$(basename "$0") CTF_DIR_PATH METADATA_PATH OUTPUT [OPTIONS]" + echo "Options:" + echo " -t: run perfetto in browser with local trace processing acceleration (may be useful for large traces)" + echo "Example: ./$(basename "$0") my-ctf-trace ../../phoenix-rtos-kernel/perf/tsdl/metadata output.pftrace" + exit 1 +fi + +if ! command -v babeltrace2 >/dev/null 2>&1; then + echo "babeltrace2 not found" + exit 1 +fi + +if ! command -v protoc >/dev/null 2>&1; then + echo "protoc not found" + exit 1 +fi + +b_log() { + echo -e "\033[1;33m$1\033[0m" +} + +SCRIPT_DIR="$(dirname "$(realpath "$0")")" + +source "${SCRIPT_DIR}/trim_event_stream.subr" + +set -e + +CTF_DIR_PATH="${1?No ctf dir path given}" +METADATA_FILE_PATH="${2?No metadata path given}" +OUTPUT_PFTRACE="${3?No output given}" +OPT="${4}" + +TMP_DIR="${SCRIPT_DIR}/tmp" +TRACE_DIR="${TMP_DIR}/ctf-trace-$(date +%FT%T)" + +TRACE_PROCESSOR_URL="https://get.perfetto.dev/trace_processor" +TRACE_PROCESSOR_PATH="${TMP_DIR}/trace_processor" +PERFETTO_URL="https://ui.perfetto.dev/v50.1-2c4d2ffa7/" + +b_log "copying CTF data streams" + +mkdir -vp "${TRACE_DIR}" + +cp -v "${CTF_DIR_PATH}"/* "${TRACE_DIR}" + +echo -n "creating symlink to CTF metadata: " +ln -rvsf "${METADATA_FILE_PATH}" "${TRACE_DIR}" + +b_log "adding stream context" + +TMP_FILE="${TRACE_DIR}/tmp" + +for stream in "${TRACE_DIR}"/channel_*; do + filename="$(basename "${stream}")" + cpu="${filename//[!0-9]/}" + + echo "${filename} size: $(du -h "${stream}" | cut -f 1), cpu: ${cpu}" + + # shellcheck disable=2059 + { + printf "\x${cpu}" + cat "${stream}" + } >"${TMP_FILE}" + mv "${TMP_FILE}" "${stream}" +done + +b_log "trimming event streams" + +trim_event_stream "${TRACE_DIR}" + +CTF_TO_PROTO_DIR="${SCRIPT_DIR}/ctf_to_proto/" +PYTHON_SRC="${CTF_TO_PROTO_DIR}/src" + +CTF_TO_PROTO="${PYTHON_SRC}/ctf_to_proto.py" + +PROTO_SRC="${CTF_TO_PROTO_DIR}/proto" +PROTO_FILE_PATH="${PROTO_SRC}/perfetto_trace.proto" + +PROTO_URL="https://github.com/google/perfetto/raw/refs/heads/main/protos/perfetto/trace/perfetto_trace.proto" + +if [ ! -f "${PROTO_FILE_PATH}" ]; then + b_log "preparing ctf_to_proto.py" + mkdir -p "${PROTO_SRC}" + wget "${PROTO_URL}" -P "${PROTO_SRC}" + protoc --proto_path="${PROTO_SRC}" --python_out="${PYTHON_SRC}" "${PROTO_FILE_PATH}" +fi + +b_log "converting using ${CTF_TO_PROTO}" + +time "${CTF_TO_PROTO}" "${TRACE_DIR}" "${OUTPUT_PFTRACE}" + +echo "Resulting pftrace size: $(du -h "${OUTPUT_PFTRACE}" | cut -f 1)" + +if [ "${OPT}" == "-t" ]; then + b_log "running trace processor" + + if [ ! -f "${TRACE_PROCESSOR_PATH}" ]; then + echo "trace processor not found" + trace_processor_dir="$(dirname "${TRACE_PROCESSOR_PATH}")" + mkdir -p "${trace_processor_dir}" + (cd "${trace_processor_dir}" && curl -LO "${TRACE_PROCESSOR_URL}") + chmod +x "${TRACE_PROCESSOR_PATH}" + fi + + echo "Opening ${PERFETTO_URL} in browser" + if [[ $(type -P "google-chrome") ]]; then + google-chrome "${PERFETTO_URL}" 2>/dev/null & + else + xdg-open "${PERFETTO_URL}" 2>/dev/null & + fi + + echo "Press 'YES, use loaded trace' in the perfetto popup when asked" + + exec "${TRACE_PROCESSOR_PATH}" --httpd "${OUTPUT_PFTRACE}" 2>/dev/null +fi diff --git a/trace/ctf_to_proto/src/ctf_to_proto.py b/trace/ctf_to_proto/src/ctf_to_proto.py new file mode 100755 index 0000000..e4ec7cb --- /dev/null +++ b/trace/ctf_to_proto/src/ctf_to_proto.py @@ -0,0 +1,704 @@ +#!/usr/bin/env python3 + +# Phoenix-RTOS +# +# CTF to Perfetto trace protobuf converter +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch +# + +import time +import bt2 +import sys +from enum import Enum + +import perfetto_trace_pb2 +from perfetto_trace_pb2 import TrackEvent, CounterDescriptor + + +prtos_syscalls = [ + "debug", + "sys_mmap", + "sys_munmap", + "sys_fork", + "vforksvc", + "exec", + "spawnSyspage", + "sys_exit", + "sys_waitpid", + "threadJoin", + "getpid", + "getppid", + "gettid", + "beginthreadex", + "endthread", + "nsleep", + "phMutexCreate", + "phMutexLock", + "mutexTry", + "mutexUnlock", + "phCondCreate", + "phCondWait", + "condSignal", + "condBroadcast", + "resourceDestroy", + "interrupt", + "portCreate", + "portDestroy", + "portRegister", + "msgSend", + "msgRecv", + "msgRespond", + "lookup", + "gettime", + "settime", + "keepidle", + "mmdump", + "platformctl", + "wdgreload", + "threadsinfo", + "meminfo", + "perf_start", + "perf_read", + "perf_finish", + "syspageprog", + "va2pa", + "signalHandle", + "signalPost", + "signalMask", + "signalSuspend", + "priority", + "sys_read", + "sys_write", + "sys_open", + "sys_close", + "sys_link", + "sys_unlink", + "sys_fcntl", + "sys_ftruncate", + "sys_lseek", + "sys_dup", + "sys_dup2", + "sys_pipe", + "sys_mkfifo", + "sys_chmod", + "sys_fstat", + "sys_fsync", + "sys_accept", + "sys_accept4", + "sys_bind", + "sys_connect", + "sys_gethostname", + "sys_getpeername", + "sys_getsockname", + "sys_getsockopt", + "sys_listen", + "sys_recvfrom", + "sys_sendto", + "sys_recvmsg", + "sys_sendmsg", + "sys_socket", + "sys_socketpair", + "sys_shutdown", + "sys_sethostname", + "sys_setsockopt", + "sys_ioctl", + "sys_futimens", + "sys_poll", + "sys_tkill", + "sys_setpgid", + "sys_getpgid", + "sys_setpgrp", + "sys_getpgrp", + "sys_setsid", + "sys_spawn", + "release", + "sbi_putchar", + "sbi_getchar", + "sigreturn", + "sys_mprotect", + "sys_statvfs", + "sys_uname", + "perf_stop", +] + + +class SyntheticEvents(Enum): + INTERRUPT = "interrupt" + IN_LOCK_SET = "lockSet" + LOCKED = "locked" + SYSCALL = "syscall" + SCHED = "sched" + RUNNABLE = "runnable" + + +prtos_synthetic_events = { + SyntheticEvents.INTERRUPT: ("interrupt_enter", "interrupt_exit"), + SyntheticEvents.IN_LOCK_SET: ("lock_set_enter", "lock_set_exit"), + SyntheticEvents.LOCKED: ("lock_set_acquired", "lock_clear"), + SyntheticEvents.SYSCALL: ("syscall_enter", "syscall_exit"), + SyntheticEvents.SCHED: ("sched_enter", "sched_exit"), + SyntheticEvents.RUNNABLE: ("thread_waking", "thread_scheduling"), +} + + +def lower(x): + if isinstance(x, str) or isinstance(x, int) or isinstance(x, float): + return x + if isinstance(x, dict) or isinstance(x, bt2._StructureFieldConst): + return {lower(k): lower(v) for k, v in x.items()} + if isinstance(x, bt2._BoolValueConst) \ + or isinstance(x, bt2._BoolFieldConst): + return bool(x) + if isinstance(x, bt2._EnumerationFieldConst): + return repr(x) + if isinstance(x, bt2._IntegerValueConst) \ + or isinstance(x, bt2._IntegerFieldConst): + return int(x) + if isinstance(x, bt2._RealValueConst) \ + or isinstance(x, bt2._RealFieldConst): + return float(x) + if isinstance(x, bt2._StringValueConst) \ + or isinstance(x, bt2._StringFieldConst): + return str(x) + raise ValueError("Unexpected value from trace", x) + + +def put(str): + sys.stdout.write(str) + + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + +BATCH_SIZE = 100000 + +REAL_PROC_ID_BASE = 0 +CPU_PROC_ID = 100000 + +PACKET_SEQ = 1111222223 + +MERGE_PRIORITIES = True + +UNKNOWN_TID = 999999999 +KERNEL_TID = -1 + +uid = 42 # should be non-zero + + +def next_uid(): + global uid + + res = uid + uid += 1 + return res + + +class Emitter: + base_time_us = None + + synthetic_begin = {} + synthetic_end = {} + + initial_metadata_emitted = False + + tid_to_events_track_uid = dict() + tid_to_sched_track_uid = dict() + tid_to_prio_track_uid = dict() + + pid_to_uid = dict() + pid_to_prio_uid = dict() + + prev_cpu_event = dict() + prev_running_thread_event = dict() + + prev_cpu = dict() + + ongoing_events: dict[list[dict]] = dict() + + dest = None + + cpus_uid = None + cpu_uids = dict() + cpu_flow_ids = dict() + + kernel_uid = None + kernel_cpu_uids = dict() + + priorities_uid = None + + lock_names = dict() + + last_flush = None + events_total = 0 + + tid_curr_prio = dict() + + warn_unknown_threads = False + + def __init__(self): + for synthetic, (begin, end) in prtos_synthetic_events.items(): + self.synthetic_begin[begin] = [] + self.synthetic_end[end] = [] + + for synthetic, (begin, end) in prtos_synthetic_events.items(): + self.synthetic_begin[begin].append(synthetic) + self.synthetic_end[end].append(synthetic) + + @staticmethod + def tid_or_kernel(event): + if "tid" in event.payload_field: + return lower(event.payload_field["tid"]) + else: + return KERNEL_TID + + def event_us(self, msg): + us = msg.default_clock_snapshot.value + + if self.base_time_us is None: + # do assertions once - the clock config doesn't change in our case + assert msg.default_clock_snapshot.clock_class.name == "monotonic" + assert msg.default_clock_snapshot.clock_class.frequency == 1e6 + self.base_time_us = us + + return (us - self.base_time_us) * 1000 + + current_trace = perfetto_trace_pb2.Trace() + + def flush_current_trace(self): + event_count = len(self.current_trace.packet) + + self.dest.write(self.current_trace.SerializeToString()) + self.current_trace = perfetto_trace_pb2.Trace() + + now = time.time() + self.events_total += event_count + delta = now - self.last_flush + eprint(f"emitted {self.events_total} events" + f" ({event_count / delta:.2f} events/s)") + self.last_flush = now + + def print_trace_packets(self, packets): + for packet in packets: + self.current_trace.packet.append(packet) + + if (len(self.current_trace.packet) >= BATCH_SIZE): + self.flush_current_trace() + + def add_new_thread(self, **kwargs): + kwargs = {**{'ts': 0}, **kwargs} + + tid = kwargs['tid'] + pid = kwargs['pid'] + prio = kwargs['prio'] + + # may be tempting to use tid as track_uid, but in case of synthetic + # tracks this could create unnecessary mess + uid = next_uid() + sched_uid = next_uid() + events_uid = next_uid() + prio_uid = next_uid() + + self.tid_to_events_track_uid[tid] = events_uid + self.tid_to_sched_track_uid[tid] = sched_uid + self.tid_to_prio_track_uid[tid] = prio_uid + + self.ongoing_events[tid] = dict() + + packets = [] + + if pid not in self.pid_to_uid: + name = lower(kwargs['name']) + packet = perfetto_trace_pb2.TracePacket() + process_uid = next_uid() + packet.track_descriptor.uuid = process_uid + packet.track_descriptor.process.pid = pid + packet.track_descriptor.process.process_name = \ + f"'{name}'" + packets.append(packet) + self.pid_to_uid[pid] = process_uid + + if not MERGE_PRIORITIES: + pid_prio_uid = next_uid() + packet = perfetto_trace_pb2.TracePacket() + packet.track_descriptor.uuid = pid_prio_uid + packet.track_descriptor.parent_uuid = self.priorities_uid + packet.track_descriptor.name = f"'{name}' {pid}" + packets.append(packet) + self.pid_to_prio_uid[pid] = pid_prio_uid + + eprint(f"add process: '{name}' {pid=}") + + root_packet = perfetto_trace_pb2.TracePacket() + root_packet.track_descriptor.uuid = uid + root_packet.track_descriptor.thread.pid = pid + root_packet.track_descriptor.thread.tid = tid + + sched_packet = perfetto_trace_pb2.TracePacket() + sched_packet.track_descriptor.uuid = sched_uid + sched_packet.track_descriptor.parent_uuid = uid + sched_packet.track_descriptor.name = "sched" + + events_packet = perfetto_trace_pb2.TracePacket() + events_packet.track_descriptor.uuid = events_uid + events_packet.track_descriptor.parent_uuid = uid + events_packet.track_descriptor.name = "events" + + prio_packet = perfetto_trace_pb2.TracePacket() + prio_packet.track_descriptor.uuid = prio_uid + prio_packet.track_descriptor.parent_uuid = \ + uid if MERGE_PRIORITIES else self.pid_to_prio_uid[pid] + prio_packet.track_descriptor.name = "prio" + prio_packet.track_descriptor.counter.unit_name = "prio" + + packets += [root_packet, sched_packet, events_packet, prio_packet] + + self.tid_curr_prio[tid] = prio + + self.print_trace_packets(packets) + + eprint(f"add thread: {tid=} {pid=} {prio=}") + + def add_ongoing_event(self, tid, packet): + event_name = packet.track_event.name + + if event_name not in self.ongoing_events[tid]: + self.ongoing_events[tid][event_name] = [] + + self.ongoing_events[tid][event_name].append(packet) + + def pop_ongoing_event(self, tid, packet): + key = packet.track_event.name + + if key not in self.ongoing_events[tid] \ + or not self.ongoing_events[tid][key]: + return None + + return self.ongoing_events[tid][key].pop() + + def end_ongoing_events(self, tid, ts): + for packets in self.ongoing_events[tid].values(): + for packet in packets: + packet.track_event.type = TrackEvent.Type.TYPE_SLICE_END + packet.timestamp = ts + self.print_trace_packets([packet]) + + self.ongoing_events[tid].clear() + + def emit_initial_metadata(self, event): + # emit "CPUs" track - its subtracks denote CPUs and show + # which kernel thread is currently scheduled on which CPU + + packets = [] + + self.cpus_uid = next_uid() + cpus_packet = perfetto_trace_pb2.TracePacket() + cpus_packet.track_descriptor.uuid = self.cpus_uid + cpus_packet.track_descriptor.name = "CPUs" + packets.append(cpus_packet) + + self.kernel_uid = next_uid() + kernel_packet = perfetto_trace_pb2.TracePacket() + kernel_packet.track_descriptor.uuid = self.kernel_uid + kernel_packet.track_descriptor.name = "KERNEL" + packets.append(kernel_packet) + + if not MERGE_PRIORITIES: + self.priorities_uid = next_uid() + priorities_packet = perfetto_trace_pb2.TracePacket() + priorities_packet.track_descriptor.uuid = self.priorities_uid + priorities_packet.track_descriptor.name = "Priorities" + packets.append(priorities_packet) + + self.print_trace_packets(packets) + + # Create dummy thread for stray threads + self.threads[UNKNOWN_TID] = {'pid': 999999999, 'name': 'UNKNOWN', 'prio': + 999, 'ts': 0} + + # Initialize kernel thread by hand without adding it to self.threads + # The kernel has its own special (not thread-like) track + self.ongoing_events[KERNEL_TID] = dict() + + def emit_kernel_cpu_if_new(self, cpu): + if cpu not in self.kernel_cpu_uids: + self.kernel_cpu_uids[cpu] = next_uid() + + kernel_cpu_packet = perfetto_trace_pb2.TracePacket() + kernel_cpu_packet.track_descriptor.uuid = self.kernel_cpu_uids[cpu] + kernel_cpu_packet.track_descriptor.parent_uuid = self.kernel_uid + kernel_cpu_packet.track_descriptor.name = f"CPU {cpu}" + + self.print_trace_packets([kernel_cpu_packet]) + + def emit_virtual_cpu_if_new(self, cpu): + if cpu not in self.cpu_uids: + self.cpu_uids[cpu] = next_uid() + self.cpu_flow_ids[cpu] = next_uid() + + cpu_packet = perfetto_trace_pb2.TracePacket() + cpu_packet.track_descriptor.uuid = self.cpu_uids[cpu] + cpu_packet.track_descriptor.parent_uuid = self.cpus_uid + cpu_packet.track_descriptor.name = f"CPU {cpu}" + + self.print_trace_packets([cpu_packet]) + + def update_cpu_virtual_thread(self, msg: bt2._EventMessageConst, cpu): + tid = self.tid_or_kernel(msg.event) + + tname = lower(f"{self.get_thread(tid)['name']} {tid}") + + self.emit_virtual_cpu_if_new(cpu) + + if cpu not in self.prev_cpu_event or \ + self.prev_cpu_event[cpu].track_event.name != tname: + + packets = [] + + if cpu in self.prev_cpu_event: + prev_packet = self.prev_cpu_event[cpu] + prev_packet.timestamp = self.event_us(msg) + prev_packet.track_event.type = TrackEvent.Type.TYPE_SLICE_END + packets.append(prev_packet) + + packet = perfetto_trace_pb2.TracePacket() + packet.timestamp = self.event_us(msg) + packet.track_event.type = TrackEvent.Type.TYPE_SLICE_BEGIN + packet.track_event.name = tname + packet.track_event.track_uuid = self.cpu_uids[cpu] + packet.trusted_packet_sequence_id = PACKET_SEQ + packets.append(packet) + + self.prev_cpu_event[cpu] = packet + + self.print_trace_packets(packets) + + self.update_running_thread(msg, cpu) + + def update_running_thread(self, msg: bt2._EventMessageConst, cpu): + tid = self.tid_or_kernel(msg.event) + + packets = [] + + if cpu in self.prev_running_thread_event: + packet = self.prev_running_thread_event[cpu] + + packet.timestamp = self.event_us(msg) + packet.track_event.type = TrackEvent.Type.TYPE_SLICE_END + + packets.append(packet) + + packet = perfetto_trace_pb2.TracePacket() + + packet.timestamp = self.event_us(msg) + 1 + packet.track_event.type = TrackEvent.Type.TYPE_SLICE_BEGIN + packet.track_event.name = f"running:cpu{cpu}" + packet.track_event.track_uuid = self.tid_to_sched_track_uid[tid] + packet.track_event.flow_ids.append(self.cpu_flow_ids[cpu]) + packet.trusted_packet_sequence_id = PACKET_SEQ + + packets.append(packet) + + self.prev_running_thread_event[cpu] = packet + + self.print_trace_packets(packets) + + def get_lock_name(self, msg): + lock_id = int(msg.event.payload_field["lid"]) + if lock_id in self.lock_names: + return self.lock_names[lock_id] + else: + return f"0x{lock_id:x}" + + def emit_prio_change( + self, + tid: int, + prio: int, + ts: int + ): + packet = perfetto_trace_pb2.TracePacket() + packet.timestamp = ts + packet.track_event.type = TrackEvent.Type.TYPE_COUNTER + packet.track_event.counter_value = prio + packet.track_event.track_uuid = self.tid_to_prio_track_uid[tid] + packet.trusted_packet_sequence_id = PACKET_SEQ + return packet + + first_event = True + + tid_emitted = set() + threads = dict() + + def get_thread(self, tid): + if tid not in self.threads: + tid = UNKNOWN_TID + self.warn_unknown_threads = True + return self.threads[tid] + + def emit_event( + self, + msg: bt2._EventMessageConst, + name: str | SyntheticEvents, + phase: TrackEvent.Type, + ): + if not self.initial_metadata_emitted: + self.emit_initial_metadata(msg.event) + self.initial_metadata_emitted = True + + event = msg.event + args = dict(event.payload_field) + tid = self.tid_or_kernel(event) + ts = self.event_us(msg) + track_uuid = None + cpu = event['cpu'] + update_cpu = False + flow_id = None + shift_by_ns = False + + if name == "thread_create": + self.threads[tid] = {'pid': args['pid'], 'name': args['name'], 'prio': + args['prio'], 'ts': ts} + return # meta event + + if tid != KERNEL_TID and tid not in self.tid_emitted: + t = self.get_thread(tid) + self.add_new_thread( + tid=tid, pid=t['pid'], name=t['name'], + prio=t['prio'], ts=ts) + event = self.emit_prio_change( + tid, self.tid_curr_prio[tid], ts) + self.print_trace_packets([event]) + self.tid_emitted.add(tid) + + event_name = name.value if type(name) is SyntheticEvents else name + + match name: + case SyntheticEvents.SYSCALL: + n = args["n"] + event_name = "syscall:" + prtos_syscalls[n] + case SyntheticEvents.INTERRUPT: + irq = args["irq"] + event_name = f"irq:{irq}" + case SyntheticEvents.IN_LOCK_SET: + lock_name = self.get_lock_name(msg) + event_name = "lockSet:" + lock_name + case SyntheticEvents.LOCKED: + lock_name = self.get_lock_name(msg) + event_name = "locked:" + lock_name + flow_id = int(args["lid"]) + if phase == TrackEvent.Type.TYPE_SLICE_BEGIN: + # WORKAROUND: perfetto doesn't like IN_LOCK_SET END + # having the same ts as LOCKED BEGIN, resulting in + # LOCKED event not showing up on the timeline + shift_by_ns = True + case SyntheticEvents.RUNNABLE: + track_uuid = self.tid_to_sched_track_uid[tid] + if phase == TrackEvent.Type.TYPE_SLICE_END: + update_cpu = True + case "lock_name": + lock_id = int(args["lid"]) + lock_name = str(args["name"]) + self.lock_names[lock_id] = lock_name + return # meta event + case "thread_priority": + self.print_trace_packets( + [self.emit_prio_change(tid, args['priority'], ts)]) + return # meta event + case "thread_end": + self.end_ongoing_events(tid, ts) + case str(n) if "lock_" in n: + lock_name = self.get_lock_name(msg) + event_name += "(" + lock_name + ")" + + if tid == KERNEL_TID: + self.emit_kernel_cpu_if_new(cpu) + track_uuid = self.kernel_cpu_uids[cpu] + + if not track_uuid: + track_uuid = self.tid_to_events_track_uid[tid] + + packet = perfetto_trace_pb2.TracePacket() + packet.timestamp = ts + (1 if shift_by_ns else 0) + packet.track_event.type = phase + packet.track_event.name = event_name + packet.track_event.track_uuid = track_uuid + packet.trusted_packet_sequence_id = PACKET_SEQ + + if flow_id: + packet.track_event.flow_ids.append(flow_id) + + if phase == TrackEvent.Type.TYPE_SLICE_BEGIN: + self.add_ongoing_event(tid, packet) + + skip = False + + if phase == TrackEvent.Type.TYPE_SLICE_END: + if not self.pop_ongoing_event(tid, packet): + # Not all synthetic END events will have a corresponding BEGIN, + # e.g. threads may be scheduled (thread_scheduling) without + # being woken up (thread_waking). In such case, don't emit END + # event as not to confuse parsers + skip = True + + if not skip: + self.print_trace_packets([packet]) + + if update_cpu: + self.update_cpu_virtual_thread(msg, cpu) + + def emit_events(self, msg): + for begin in self.synthetic_begin.get(msg.event.name, []): + self.emit_event(msg, begin, TrackEvent.Type.TYPE_SLICE_BEGIN) + return + + for end in self.synthetic_end.get(msg.event.name, []): + self.emit_event(msg, end, TrackEvent.Type.TYPE_SLICE_END) + return + + self.emit_event(msg, msg.event.name, TrackEvent.Type.TYPE_INSTANT) + + def convert(self, path, output_path): + eprint("converting CTF to perfetto") + + start = time.time() + + self.dest = open(output_path, "wb") + + msg_it = bt2.TraceCollectionMessageIterator(path) + + self.last_flush = time.time() + + for msg in msg_it: + if hasattr(msg, "event"): + self.emit_events(msg) + + self.flush_current_trace() + + self.dest.close() + + stop = time.time() + eprint(f"finished in {stop - start:.2f} s") + + if self.warn_unknown_threads: + eprint( + "WARN: there were threads missing metadata - they will be marked as UNKNOWN") + + +def main(): + if len(sys.argv) < 3: + sys.stderr.write( + "usage: " + sys.argv[0] + " [ctf path] [output path]\n") + sys.exit(1) + + ctf_path = sys.argv[1] + output_path = sys.argv[2] + + e = Emitter() + + e.convert(ctf_path, output_path) + + +if __name__ == "__main__": + main() diff --git a/trace/get_rootfs_offset.sh b/trace/get_rootfs_offset.sh new file mode 100755 index 0000000..6b0eddf --- /dev/null +++ b/trace/get_rootfs_offset.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# +# Script for calculating rootfs partition start offset from disk image. Useful +# when mounting the QEMU disk image +# +# Example: +# DISK_PATH="../../_boot/ia32-generic-qemu/hd0.disk" +# OFS="$(./get_rootfs_offset.sh "${DISK_PATH}")" +# # The offset can be then passed to `mount` like so: +# sudo mount -o loop,offset="${OFS}" "${DISK_PATH}" /mnt +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch + +set -e + +disk_path="${1?}" +disk_name="$(basename "${disk_path}")" + +fdisk_output="$(fdisk -l "${disk_path}")" + +start_ofs="$(echo "${fdisk_output}" | grep "${disk_name}1" | awk \{'print $3'\})" +sector_size="$(echo "${fdisk_output}" | grep "Sector size" | awk \{'print $7'\})" + +echo $((start_ofs * sector_size)) diff --git a/trace/ia32_convert.sh b/trace/ia32_convert.sh new file mode 100755 index 0000000..31cfced --- /dev/null +++ b/trace/ia32_convert.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# Utility script for converting CTF trace to perfetto protobuf from ia32-generic-qemu image +# +# Usage: ./ia32_convert.sh DISK_IMG_PATH ROOTFS_CTF_DIR_PATH METADATA_PATH OUTPUT [OPTIONS] +# Options: +# -t - run perfetto in browser with local trace processing acceleration (may +# be useful for large traces) +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch + +set -e + +if [ "$#" -eq 0 ]; then + echo "Usage: ./$(basename "$0") DISK_IMG_PATH ROOTFS_CTF_DIR_PATH METADATA_PATH OUTPUT [OPTIONS]" + echo "Options:" + echo " -t: run perfetto in browser with local trace processing acceleration (may be useful for large traces)" + echo "Example: ./$(basename "$0") ../../_boot/ia32-generic-qemu/hd0.disk /trace output.pftrace" + exit 1 +fi + +b_log() { + echo -e "\033[1;33m$1\033[0m" +} + +DISK_PATH="${1?no disk path given}" +CTF_DIR_PATH="${2?No ctf dir path given}" +METADATA_FILE_PATH="${3?No metadata path given}" +OUTPUT_PFTRACE="${4?No output given}" +OPT="${5}" + +b_log "gathering trace from disk image" + +loop_dev="$(udisksctl loop-setup -f "${DISK_PATH}" -o $((4096 * 512)) | awk 'NF{ print $NF }' | sed 's/\.//g')" +mounted_rootfs_path="$(udisksctl mount -b "${loop_dev}" | awk 'NF{ print $NF }' | sed 's/\.//g')" + +echo "${DISK_PATH} mounted to ${mounted_rootfs_path} (loop_dev=${loop_dev})" + +function cleanup { + udisksctl unmount -b "${loop_dev}" + udisksctl loop-delete -b "${loop_dev}" + echo "unmounted ${mounted_rootfs_path}, deleted loop_dev=${loop_dev}" +} + +trap cleanup EXIT + +./convert.sh "${mounted_rootfs_path}/${CTF_DIR_PATH}" "${METADATA_FILE_PATH}" "${OUTPUT_PFTRACE}" "${OPT}" diff --git a/trace/trim_event_stream.subr b/trace/trim_event_stream.subr new file mode 100755 index 0000000..d054383 --- /dev/null +++ b/trace/trim_event_stream.subr @@ -0,0 +1,129 @@ +#!/bin/bash +# +# CTF event trace file trimmer. Sometimes the recorded trace contains partial +# events at front and/or end, confusing babeltrace completely. This script +# trims the file until babeltrace stops complaining +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch + +work_dir=$(mktemp -d -t "ctf_trim_XXXX") +echo "created ${work_dir}" + +if [[ ! "${work_dir}" || ! -d "${work_dir}" ]]; then + echo "failed to create work dir" + exit 1 +fi + +function cleanup { + rm -rf "${work_dir}" + echo "deleted ${work_dir}" +} + +trap cleanup EXIT + + +# trim_event_stream(trace_dir_to_fix) +trim_event_stream() { + MAX_TRIM_ITERS=50 # should be more than max CTF packet size + MAX_START_TRIM_ITERS=l5 + + trace_dir_to_fix=${1?no input path} + + trace_dir="${work_dir}/trace" + mkdir -p "${trace_dir}" + + cp "${trace_dir_to_fix}/metadata" "${trace_dir}" + + tmp_file="${work_dir}/tmp" + + ofs=0 + iter=0 + trim_iter=0 + + trim_at_start() { + event_channel_name="${1?no event channel name}" + + event_stream="${trace_dir}/${event_channel_name}" + tmp_event_stream="${work_dir}/${event_channel_name}_tmp" + + # trim at the beginning: remove two bytes, as first one is a context byte + # added by convert.sh and the second one is the real flawed byte + tail -c +3 "${event_stream}" >"${tmp_event_stream}" + mv "${tmp_event_stream}" "${event_stream}" + + # reinsert context byte + cpu="${event_channel_name//[!0-9]/}" + # shellcheck disable=2059 + { + printf "\x${cpu}" + cat "${event_stream}" + } >"${tmp_file}" + mv "${tmp_file}" "${event_stream}" + + ofs=$((ofs + 1)) + } + + fix_event_channel() { + event_channel_name="${1?no event channel name}" + + event_stream="${trace_dir}/${event_channel_name}" + tmp_event_stream="${work_dir}/${event_channel_name}_tmp" + + cp "${trace_dir_to_fix}/${event_channel_name}" "${trace_dir}" + + cat "${event_stream}" >"${tmp_event_stream}" + mv "${tmp_event_stream}" "${event_stream}" + + while :; do + if [ "${trim_iter}" = "${MAX_START_TRIM_ITERS}" ]; then + echo "trace is still bad after ${MAX_START_TRIM_ITERS} iterations, aborting" + echo "last babeltrace2 complaint:" + babeltrace2 "${trace_dir}" + exit 1 + fi + + if [ "${iter}" = "${MAX_TRIM_ITERS}" ]; then + # babeltrace may be lucky and parse some random bytes as one package + # despite wrong start offset, so its output will be nonempty. It + # won't succeed any further, so it will always fail on next packages and + # trimming the end of the trace won't help. We can trim the start + # again, as it is most likely incorrect + trim_at_start "${event_channel_name}" + iter=0 + trim_iter=$((trim_iter + 1)) + fi + + if [ "$(du -b "${event_stream}" | cut -f1)" = 0 ]; then + echo "empty file after ${iter} iterations, aborting" + exit 1 + fi + + if res=$(babeltrace2 "${trace_dir}" 2>/dev/null); then + if [ "${iter}" = "0" ]; then + echo "${event_channel_name} OK" + else + echo "${event_channel_name} FIXED: start=${ofs} len=$(du -b "${event_stream}" | cut -f1)" + fi + cp "${event_stream}" "${trace_dir_to_fix}/${event_channel_name}" + return + fi + + if [ -n "${res}" ]; then + # trim at the end + head -c -1 "${event_stream}" >"${tmp_event_stream}" + mv "${tmp_event_stream}" "${event_stream}" + else + trim_at_start "${event_channel_name}" + fi + + iter=$((iter + 1)) + + echo -e "\e[1A\e[K${event_channel_name} trimming: start=${ofs} len=$(du -b "${event_stream}" | cut -f1)" + done + } + + for channel_path in "${trace_dir_to_fix}"/channel_event*; do + fix_event_channel "$(basename "${channel_path}")" + done +}