From 5c9e6a81bf6c02f4129b9124f3ecaf120a21a73e Mon Sep 17 00:00:00 2001 From: gernest Date: Sun, 8 Oct 2023 23:30:52 +0300 Subject: [PATCH] Add support for tracing function calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #150 **NOTE** There is no actionable insight provided on the issue. I believe I have no business implementing this (Probably someone is already working on this internally). I got too excited when I came across this project and had a lot of idle time so I decided to take this challenge. There is a lot of shortcomings on this PR ( again, sorry to bother you guys), however I think I figured out the fundamentals of having the feature introduced. I will go into more details on this. What is missing ? ---- - Tests : there are no unit tests included because I wasn't sure which direction you had in mind regarding this feature. I however made sure to make minimal changes to ensure someone can review the changes in one sitting. - `JSON` and `YAML` output: Only table output is implemented showing function duration and function name - Top down rendering like in https://justine.lol/ftrace/ : Reading segments logs returns them in an opposite order from `ftrace`. Sorting can fix this, in order to minimize changes this commit doesn't sort the call stack Output looks like ``` ... # omitted text for brevity 936ns .sync_atomic.CompareAndSwapUint64 3.415µs .internal_poll.__fdMutex_.rwunlock 4.344µs .internal_poll.__FD_.writeUnlock 5.166µs .internal_poll.__FD_.Write.func1 23.386µs .internal_poll.__FD_.Write 24.455µs .os.__File_.Write 25.934µs .bufio.__Writer_.Flush 27.46µs .main.main.func1 48.527µs .main.main 199ns .runtime_internal_atomic.Load 160ns .runtime_internal_atomic.Load 901ns .runtime.runExitHooks ``` - Limiting: Generated trace can be long, there is no way to limit rendered output - Sampling : No way to sample the traced functions - Fine tune which functions to trace: Currently we trace everything including system calls which may be redundant considering we already have api for tracing system calls Adding support for this is straightforward so I had to de prioritize. How it works ? --- wazero has experimental api for hooking function calls https://pkg.go.dev/github.com/tetratelabs/wazero@v1.5.0/experimental#FunctionListenerFactory We take advantage of this to provide custom implementation of this api and store the collected data in our segment registry. For now same segment (0) is used for system calls and function calls , Initially I had separate segment for function calls segment(1) but I had to make a lot of changes to glue things together. I settled with the same segment but it should be possible to switch to separate segments. I am more than happy to own the issue if I get feedback on which direction you had in mind . But also I won't mind if this PR is closed. Usage --- ``` $ timecraft run testdata/go/get.wasm https://eo3qh2ncelpc9q0.m.pipedream.net 79642983-87b9-492c-b24e-b78397d28362 Hello, World! ``` ``` $ timecraft trace f 79642983-87b9-492c-b24e-b78397d28362 ... # truncated for brevity 3.303µs .internal_poll.__FD_.writeUnlock 3.896µs .internal_poll.__FD_.Write.func1 17.796µs .internal_poll.__FD_.Write 18.578µs .os.__File_.Write 20.593µs .bufio.__Writer_.Flush 21.898µs .main.main.func1 38.941µs .main.main 164ns .runtime_internal_atomic.Load 144ns .runtime_internal_atomic.Load 734ns .runtime.runExitHooks ``` This project has the highest quality go code base on github (apart from the go standard library). I came across this on saturday and found my way to implement this feature on sunday. Thanks for the project. Signed-off-by: gernest --- describe.go | 2 +- internal/timecraft/process.go | 13 ++++ internal/timecraft/runtime.go | 17 +++-- internal/timemachine/funccall/format.go | 54 ++++++++++++++++ internal/timemachine/funccall/log.go | 86 +++++++++++++++++++++++++ internal/timemachine/funccall/read.go | 56 ++++++++++++++++ profile.go | 2 +- replay.go | 2 +- run.go | 2 +- trace.go | 27 ++++++-- 10 files changed, 247 insertions(+), 14 deletions(-) create mode 100644 internal/timemachine/funccall/format.go create mode 100644 internal/timemachine/funccall/log.go create mode 100644 internal/timemachine/funccall/read.go diff --git a/describe.go b/describe.go index a92ba62c..c6087e20 100644 --- a/describe.go +++ b/describe.go @@ -184,7 +184,7 @@ func describeModule(ctx context.Context, reg *timemachine.Registry, id string, c return nil, err } - runtime, err := timecraft.NewRuntime(ctx, config) + runtime, ctx, err := timecraft.NewRuntime(ctx, config) if err != nil { return nil, err } diff --git a/internal/timecraft/process.go b/internal/timecraft/process.go index 7b35c9a5..0df09b22 100644 --- a/internal/timecraft/process.go +++ b/internal/timecraft/process.go @@ -21,12 +21,14 @@ import ( "github.com/stealthrocket/timecraft/internal/object" "github.com/stealthrocket/timecraft/internal/sandbox" "github.com/stealthrocket/timecraft/internal/timemachine" + "github.com/stealthrocket/timecraft/internal/timemachine/funccall" "github.com/stealthrocket/timecraft/internal/timemachine/wasicall" "github.com/stealthrocket/wasi-go" "github.com/stealthrocket/wasi-go/imports" "github.com/stealthrocket/wasi-go/imports/wasi_snapshot_preview1" "github.com/stealthrocket/wazergo" "github.com/tetratelabs/wazero" + "github.com/tetratelabs/wazero/experimental" ) // ProcessManager runs WebAssembly modules. @@ -316,6 +318,17 @@ func (pm *ProcessManager) Start(moduleSpec ModuleSpec, logSpec *LogSpec, parentI panic(err) // caught/handled by wazero } }) + + funcListenerFactory := pm.ctx.Value(experimental.FunctionListenerFactoryKey{}).(*funccall.Factory) + funcListenerFactory.Enable(func(id int, ts time.Time, data []byte) { + b.Reset(logSpec.StartTime) + b.SetTimestamp(ts) + b.SetFunctionID(id) + b.SetFunctionCall(data) + if err := recordWriter.WriteRecord(&b); err != nil { + panic(err) // caught/handled by wazero + } + }) } else { processID = uuid.New() } diff --git a/internal/timecraft/runtime.go b/internal/timecraft/runtime.go index 94f39cd3..afb2a7f3 100644 --- a/internal/timecraft/runtime.go +++ b/internal/timecraft/runtime.go @@ -4,12 +4,14 @@ import ( "context" "fmt" + "github.com/stealthrocket/timecraft/internal/timemachine/funccall" "github.com/tetratelabs/wazero" + "github.com/tetratelabs/wazero/experimental" ) // NewRuntime constructs a wazero.Runtime that's configured according // to the provided timecraft Config. -func NewRuntime(ctx context.Context, config *Config) (wazero.Runtime, error) { +func NewRuntime(ctx context.Context, config *Config) (wazero.Runtime, context.Context, error) { runtimeConfig := wazero.NewRuntimeConfig() var cache wazero.CompilationCache @@ -18,17 +20,24 @@ func NewRuntime(ctx context.Context, config *Config) (wazero.Runtime, error) { // user but still go ahead with the runtime instantiation. path, err := cachePath.Resolve() if err != nil { - return nil, fmt.Errorf("failed to resolve timecraft cache location: %w", err) + return nil, nil, fmt.Errorf("failed to resolve timecraft cache location: %w", err) } else { cache, err = createCacheDirectory(path) if err != nil { - return nil, fmt.Errorf("failed to create timecraft cache directory: %w", err) + return nil, nil, fmt.Errorf("failed to create timecraft cache directory: %w", err) } else { runtimeConfig = runtimeConfig.WithCompilationCache(cache) } } } + // Initialize wazero with a disabled FunctionListenerFactory implementation. We + // will later decide to enable this when processing module. + // + // We have to return this context and pass it down stream so we can have access + // to *funccall.Factory initialized with this runtime. + ctx = context.WithValue(ctx, experimental.FunctionListenerFactoryKey{}, &funccall.Factory{}) + runtime := wazero.NewRuntimeWithConfig(ctx, runtimeConfig) if cache != nil { runtime = &runtimeWithCompilationCache{ @@ -36,7 +45,7 @@ func NewRuntime(ctx context.Context, config *Config) (wazero.Runtime, error) { cache: cache, } } - return runtime, nil + return runtime, ctx, nil } type runtimeWithCompilationCache struct { diff --git a/internal/timemachine/funccall/format.go b/internal/timemachine/funccall/format.go new file mode 100644 index 00000000..88679973 --- /dev/null +++ b/internal/timemachine/funccall/format.go @@ -0,0 +1,54 @@ +package funccall + +import ( + "errors" + "fmt" + "io" + "strings" + "text/tabwriter" + + "github.com/stealthrocket/timecraft/internal/stream" + "github.com/stealthrocket/timecraft/internal/timemachine" +) + +func Format(o io.Writer, reader stream.Reader[timemachine.Record]) error { + tr := TraceReader{Records: reader} + traces := make([]Trace, 1<<10) + w := tabwriter.NewWriter(o, 0, 0, 1, ' ', tabwriter.AlignRight) + fmt.Fprintln(w, "duration\t symbol ") + fmt.Fprintln(w, "---\t --- ") + var s strings.Builder + write := func(n int) { + chunk := traces[:n] + for x := range chunk { + t := &chunk[x] + fmt.Fprintf(w, "%v\t %s\n", t.Duration, formatSymbol(&s, t)) + } + w.Flush() + } + + for { + n, err := tr.Read(traces) + if err != nil { + if errors.Is(err, io.EOF) { + write(n) + return nil + } + return err + } + write(n) + } +} + +func formatSymbol(s *strings.Builder, t *Trace) string { + if t.Depth == 0 { + return t.Name + } + s.Reset() + s.Grow(t.Depth) + for i := 0; i < t.Depth; i++ { + s.WriteByte(' ') + } + s.WriteString(t.Name) + return s.String() +} diff --git a/internal/timemachine/funccall/log.go b/internal/timemachine/funccall/log.go new file mode 100644 index 00000000..08163275 --- /dev/null +++ b/internal/timemachine/funccall/log.go @@ -0,0 +1,86 @@ +package funccall + +import ( + "context" + "encoding/binary" + "time" + + "github.com/tetratelabs/wazero/api" + "github.com/tetratelabs/wazero/experimental" +) + +// This number is arbitrary. For now we use the same segment as system calls , +// any number larger than the largest system call will suffice. +const ID = 100 + +type Factory struct { + enabled bool + start stack[time.Time] + write func(id int, ts time.Time, data []byte) +} + +func (f *Factory) Enable(w func(id int, ts time.Time, data []byte)) { + f.enabled = true + f.write = w +} + +var _ experimental.FunctionListenerFactory = (*Factory)(nil) + +func (f *Factory) NewFunctionListener(fnd api.FunctionDefinition) experimental.FunctionListener { + return &listener{ + f: f, + } +} + +type listener struct { + f *Factory + buffer []byte +} + +var _ experimental.FunctionListener = (*listener)(nil) + +func (l *listener) Before(ctx context.Context, + mod api.Module, def api.FunctionDefinition, params []uint64, _ experimental.StackIterator) { + if !l.f.enabled { + return + } + l.f.start.push(time.Now()) +} + +func (l *listener) After(ctx context.Context, mod api.Module, def api.FunctionDefinition, results []uint64) { + if !l.f.enabled { + return + } + start := l.f.start.pop().(time.Time) + l.buffer = l.buffer[:0] + + // 4 bytes - depth ( for indentation) + // 8 bytes - duration + // followed by the function name + l.buffer = binary.BigEndian.AppendUint32(l.buffer, uint32(l.f.start.size())) + l.buffer = binary.BigEndian.AppendUint64(l.buffer, uint64(time.Since(start))) + l.buffer = append(l.buffer, []byte(def.DebugName())...) + l.f.write(ID, start, l.buffer) +} + +func (l *listener) Abort(ctx context.Context, mod api.Module, def api.FunctionDefinition, _ error) { + if !l.f.enabled { + return + } + l.f.start.pop() +} + +type stack[T any] struct { + slice []any +} + +func (s *stack[T]) push(v T) { s.slice = append(s.slice, v) } + +func (s *stack[T]) pop() any { + i := len(s.slice) - 1 + v := s.slice[i] + s.slice = s.slice[:i] + return v +} + +func (s *stack[T]) size() int { return len(s.slice) } diff --git a/internal/timemachine/funccall/read.go b/internal/timemachine/funccall/read.go new file mode 100644 index 00000000..8f78d241 --- /dev/null +++ b/internal/timemachine/funccall/read.go @@ -0,0 +1,56 @@ +package funccall + +import ( + "encoding/binary" + "errors" + "io" + "slices" + "time" + + "github.com/stealthrocket/timecraft/internal/stream" + "github.com/stealthrocket/timecraft/internal/timemachine" +) + +type Trace struct { + Start time.Time + Duration time.Duration + Depth int + Name string +} + +type TraceReader struct { + Records stream.Reader[timemachine.Record] + records []timemachine.Record +} + +func (r *TraceReader) Read(traces []Trace) (n int, err error) { + r.records = slices.Grow(r.records, len(traces))[:len(traces)] + for { + rn, err := stream.ReadFull(r.Records, r.records) + for i := 0; i < rn; i++ { + record := &r.records[i] + if record.FunctionID == ID { + // read depth + data := record.FunctionCall + depth := binary.BigEndian.Uint32(data) + data = data[4:] + //read duration + duration := binary.BigEndian.Uint64(data) + name := data[8:] + traces[n] = Trace{ + Start: record.Time, + Duration: time.Duration(duration), + Depth: int(depth), + Name: string(name), + } + n++ + } + } + if n > 0 || err != nil { + if errors.Is(err, io.ErrUnexpectedEOF) { + err = io.EOF + } + return n, err + } + } +} diff --git a/profile.go b/profile.go index ddd43ea1..0957792d 100644 --- a/profile.go +++ b/profile.go @@ -99,7 +99,7 @@ func profile(ctx context.Context, args []string) error { if err != nil { return err } - runtime, err := timecraft.NewRuntime(ctx, config) + runtime, ctx, err := timecraft.NewRuntime(ctx, config) if err != nil { return err } diff --git a/replay.go b/replay.go index 13bcf4fb..b710b575 100644 --- a/replay.go +++ b/replay.go @@ -48,7 +48,7 @@ func replay(ctx context.Context, args []string) error { if err != nil { return err } - runtime, err := timecraft.NewRuntime(ctx, config) + runtime, ctx, err := timecraft.NewRuntime(ctx, config) if err != nil { return err } diff --git a/run.go b/run.go index 794d2ffb..e4add00e 100644 --- a/run.go +++ b/run.go @@ -94,7 +94,7 @@ func run(ctx context.Context, args []string) error { if err != nil { return err } - runtime, err := timecraft.NewRuntime(ctx, config) + runtime, ctx, err := timecraft.NewRuntime(ctx, config) if err != nil { return err } diff --git a/trace.go b/trace.go index 935c6a76..bb49b0c0 100644 --- a/trace.go +++ b/trace.go @@ -16,6 +16,7 @@ import ( "github.com/stealthrocket/timecraft/internal/stream" "github.com/stealthrocket/timecraft/internal/timecraft" "github.com/stealthrocket/timecraft/internal/timemachine" + "github.com/stealthrocket/timecraft/internal/timemachine/funccall" ) const traceUsage = ` @@ -33,7 +34,7 @@ Options: type layer struct { typ string alt []string - trace func(io.Writer, outputFormat, string, stream.Reader[tracing.Event]) error + trace func(io.Writer, outputFormat, string, stream.Reader[timemachine.Record]) error } var layers = [...]layer{ @@ -47,6 +48,11 @@ var layers = [...]layer{ alt: []string{"req", "reqs", "requests"}, trace: traceRequest, }, + { + typ: "functions", + alt: []string{"f", "func", "funcs"}, + trace: traceFunctions, + }, } func findLayer(typ string) (*layer, error) { @@ -81,6 +87,7 @@ func trace(ctx context.Context, args []string) error { $ timecraft trace network ... $ timecraft trace request ... + $ timecraft trace function ... `) return exitCode(2) } @@ -129,12 +136,11 @@ func trace(ctx context.Context, args []string) error { format = "%+v" } - return layer.trace(os.Stdout, output, format, &tracing.EventReader{ - Records: timemachine.NewLogRecordReader(logReader), - }) + return layer.trace(os.Stdout, output, format, timemachine.NewLogRecordReader(logReader)) } -func traceNetwork(w io.Writer, output outputFormat, format string, events stream.Reader[tracing.Event]) error { +func traceNetwork(w io.Writer, output outputFormat, format string, records stream.Reader[timemachine.Record]) error { + events := &tracing.EventReader{Records: records} var writer stream.WriteCloser[tracing.Event] switch output { case "json": @@ -152,7 +158,8 @@ func traceNetwork(w io.Writer, output outputFormat, format string, events stream return err } -func traceRequest(w io.Writer, output outputFormat, format string, events stream.Reader[tracing.Event]) error { +func traceRequest(w io.Writer, output outputFormat, format string, records stream.Reader[timemachine.Record]) error { + events := &tracing.EventReader{Records: records} var writer stream.WriteCloser[tracing.Exchange] switch output { case "json": @@ -177,3 +184,11 @@ func traceRequest(w io.Writer, output outputFormat, format string, events stream }) return err } + +func traceFunctions(w io.Writer, output outputFormat, format string, records stream.Reader[timemachine.Record]) error { + switch output { + case "json", "yaml": + return fmt.Errorf("%s output is not implemented for function tracing", output) + } + return funccall.Format(w, records) +}