Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit baa60cf

Browse files
committedJul 1, 2024
feature: allow configuration for Go x/trace.FlightRecorder
Signed-off-by: Sandor Szücs <sandor.szuecs@zalando.de>
1 parent 00480c8 commit baa60cf

File tree

9 files changed

+253
-37
lines changed

9 files changed

+253
-37
lines changed
 

‎config/config.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ type Config struct {
8484
BlockProfileRate int `yaml:"block-profile-rate"`
8585
MutexProfileFraction int `yaml:"mutex-profile-fraction"`
8686
MemProfileRate int `yaml:"memory-profile-rate"`
87+
FlightRecorderTargetURL string `yaml:"flight-recorder-target-url"`
8788
DebugGcMetrics bool `yaml:"debug-gc-metrics"`
8889
RuntimeMetrics bool `yaml:"runtime-metrics"`
8990
ServeRouteMetrics bool `yaml:"serve-route-metrics"`
@@ -369,6 +370,7 @@ func NewConfig() *Config {
369370

370371
// logging, metrics, tracing:
371372
flag.BoolVar(&cfg.EnablePrometheusMetrics, "enable-prometheus-metrics", false, "*Deprecated*: use metrics-flavour. Switch to Prometheus metrics format to expose metrics")
373+
flag.BoolVar(&cfg.EnablePrometheusStartLabel, "enable-prometheus-start-label", false, "adds start label to each prometheus counter with the value of counter creation timestamp as unix nanoseconds")
372374
flag.StringVar(&cfg.OpenTracing, "opentracing", "noop", "list of arguments for opentracing (space separated), first argument is the tracer implementation")
373375
flag.StringVar(&cfg.OpenTracingInitialSpan, "opentracing-initial-span", "ingress", "set the name of the initial, pre-routing, tracing span")
374376
flag.StringVar(&cfg.OpenTracingExcludedProxyTags, "opentracing-excluded-proxy-tags", "", "set tags that should be excluded from spans created for proxy operation. must be a comma-separated list of strings.")
@@ -382,7 +384,7 @@ func NewConfig() *Config {
382384
flag.IntVar(&cfg.BlockProfileRate, "block-profile-rate", 0, "block profile sample rate, see runtime.SetBlockProfileRate")
383385
flag.IntVar(&cfg.MutexProfileFraction, "mutex-profile-fraction", 0, "mutex profile fraction rate, see runtime.SetMutexProfileFraction")
384386
flag.IntVar(&cfg.MemProfileRate, "memory-profile-rate", 0, "memory profile rate, see runtime.SetMemProfileRate, keeps default 512 kB")
385-
flag.BoolVar(&cfg.EnablePrometheusStartLabel, "enable-prometheus-start-label", false, "adds start label to each prometheus counter with the value of counter creation timestamp as unix nanoseconds")
387+
flag.StringVar(&cfg.FlightRecorderTargetURL, "flight-recorder-target-url", "", "sets the flight recorder target URL that is used to write out the trace to.")
386388
flag.BoolVar(&cfg.DebugGcMetrics, "debug-gc-metrics", false, "enables reporting of the Go garbage collector statistics exported in debug.GCStats")
387389
flag.BoolVar(&cfg.RuntimeMetrics, "runtime-metrics", true, "enables reporting of the Go runtime statistics exported in runtime and specifically runtime.MemStats")
388390
flag.BoolVar(&cfg.ServeRouteMetrics, "serve-route-metrics", false, "enables reporting total serve time metrics for each route")
@@ -755,6 +757,7 @@ func (c *Config) ToOptions() skipper.Options {
755757
EnableProfile: c.EnableProfile,
756758
BlockProfileRate: c.BlockProfileRate,
757759
MutexProfileFraction: c.MutexProfileFraction,
760+
FlightRecorderTargetURL: c.FlightRecorderTargetURL,
758761
EnableDebugGcMetrics: c.DebugGcMetrics,
759762
EnableRuntimeMetrics: c.RuntimeMetrics,
760763
EnableServeRouteMetrics: c.ServeRouteMetrics,

‎filters/builtin/builtin.go

+1
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ func Filters() []filters.Spec {
191191
diag.NewNormalResponseLatency(),
192192
diag.NewHistogramRequestLatency(),
193193
diag.NewHistogramResponseLatency(),
194+
diag.NewTrace(),
194195
tee.NewTee(),
195196
tee.NewTeeDeprecated(),
196197
tee.NewTeeNoFollow(),

‎filters/diag/trace.go

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package diag
2+
3+
import (
4+
"time"
5+
6+
log "github.com/sirupsen/logrus"
7+
"github.com/zalando/skipper/filters"
8+
)
9+
10+
func getDurationArg(a interface{}) (time.Duration, error) {
11+
if s, ok := a.(string); ok {
12+
return time.ParseDuration(s)
13+
}
14+
return 0, filters.ErrInvalidFilterParameters
15+
}
16+
17+
type traceSpec struct{}
18+
19+
type trace struct {
20+
d time.Duration
21+
}
22+
23+
// NewTrace creates a filter specification for the trace() filter
24+
func NewTrace() filters.Spec {
25+
return &traceSpec{}
26+
}
27+
28+
func (*traceSpec) Name() string {
29+
return filters.TraceName
30+
}
31+
32+
func (ts *traceSpec) CreateFilter(args []interface{}) (filters.Filter, error) {
33+
if len(args) != 1 {
34+
return nil, filters.ErrInvalidFilterParameters
35+
}
36+
37+
d, err := getDurationArg(args[0])
38+
if err != nil {
39+
log.Warnf("d failed on creation of trace(): %v", err)
40+
return nil, filters.ErrInvalidFilterParameters
41+
}
42+
43+
return &trace{
44+
d: d,
45+
}, nil
46+
}
47+
48+
func (tr *trace) Request(ctx filters.FilterContext) {
49+
ctx.StateBag()[filters.TraceName] = tr.d
50+
}
51+
52+
func (*trace) Response(filters.FilterContext) {}

‎filters/filters.go

+1
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ const (
272272
NormalResponseLatencyName = "normalResponseLatency"
273273
HistogramRequestLatencyName = "histogramRequestLatency"
274274
HistogramResponseLatencyName = "histogramResponseLatency"
275+
TraceName = "trace"
275276
LogBodyName = "logBody"
276277
LogHeaderName = "logHeader"
277278
TeeName = "tee"

‎go.mod

+1-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ require (
4949
github.com/yuin/gopher-lua v1.1.1
5050
go4.org/netipx v0.0.0-20220925034521-797b0c90d8ab
5151
golang.org/x/crypto v0.24.0
52-
golang.org/x/exp v0.0.0-20230905200255-921286631fa9
52+
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8
5353
golang.org/x/net v0.26.0
5454
golang.org/x/oauth2 v0.21.0
5555
golang.org/x/sync v0.7.0
@@ -170,7 +170,6 @@ require (
170170
golang.org/x/sys v0.21.0 // indirect
171171
golang.org/x/text v0.16.0 // indirect
172172
golang.org/x/tools v0.22.0 // indirect
173-
golang.org/x/exp v0.0.0-20240314144324-c7f7c6466f7f // indirect
174173
gonum.org/v1/gonum v0.8.2 // indirect
175174
google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect
176175
google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect

‎go.sum

+2-4
Original file line numberDiff line numberDiff line change
@@ -528,10 +528,8 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL
528528
golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
529529
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
530530
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
531-
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
532-
golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
533-
golang.org/x/exp v0.0.0-20240314144324-c7f7c6466f7f h1:3CW0unweImhOzd5FmYuRsD4Y4oQFKZIjAnKbjV4WIrw=
534-
golang.org/x/exp v0.0.0-20240314144324-c7f7c6466f7f/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
531+
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
532+
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
535533
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
536534
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
537535
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=

‎proxy/flightrecorder_test.go

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package proxy_test
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
"net/http/httptest"
9+
"testing"
10+
11+
"github.com/zalando/skipper/eskip"
12+
"github.com/zalando/skipper/filters"
13+
"github.com/zalando/skipper/filters/diag"
14+
"github.com/zalando/skipper/proxy"
15+
"github.com/zalando/skipper/proxy/proxytest"
16+
xtrace "golang.org/x/exp/trace"
17+
)
18+
19+
func TestFlightRecorder(t *testing.T) {
20+
service := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
21+
if r.Method != "PUT" {
22+
w.WriteHeader(http.StatusMethodNotAllowed)
23+
w.Write([]byte(http.StatusText(http.StatusMethodNotAllowed)))
24+
return
25+
}
26+
27+
var buf bytes.Buffer
28+
n, err := io.Copy(&buf, r.Body)
29+
if err != nil {
30+
t.Fatalf("Failed to copy data: %v", err)
31+
}
32+
if n < 100 {
33+
t.Fatalf("Failed to write enough data: %d bytes", n)
34+
}
35+
w.WriteHeader(http.StatusCreated)
36+
w.Write([]byte(http.StatusText(http.StatusCreated)))
37+
38+
}))
39+
defer service.Close()
40+
41+
backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))
42+
defer backend.Close()
43+
44+
flightRecorder := xtrace.NewFlightRecorder()
45+
flightRecorder.Start()
46+
47+
spec := diag.NewTrace()
48+
fr := make(filters.Registry)
49+
fr.Register(spec)
50+
51+
doc := fmt.Sprintf(`r: * -> trace("20µs") -> "%s"`, backend.URL)
52+
rr := eskip.MustParse(doc)
53+
54+
pr := proxytest.WithParams(fr, proxy.Params{
55+
FlightRecorder: flightRecorder,
56+
FlightRecorderTargetURL: service.URL,
57+
}, rr...)
58+
defer pr.Close()
59+
60+
rsp, err := pr.Client().Get(pr.URL)
61+
if err != nil {
62+
t.Fatalf("Failed to GET %q: %v", pr.URL, err)
63+
}
64+
defer rsp.Body.Close()
65+
_, err = io.ReadAll(rsp.Body)
66+
if err != nil {
67+
t.Fatalf("Failed to read body: %v", err)
68+
}
69+
70+
switch rsp.StatusCode {
71+
case 200, 201, 204:
72+
// ok
73+
default:
74+
t.Fatalf("Failed to get status OK: %d", rsp.StatusCode)
75+
}
76+
}

‎proxy/proxy.go

+70-30
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"runtime"
1919
"strconv"
2020
"strings"
21-
"sync"
2221
"time"
2322
"unicode/utf8"
2423

@@ -362,6 +361,13 @@ type Params struct {
362361

363362
// PassiveHealthCheck defines the parameters for the healthy endpoints checker.
364363
PassiveHealthCheck *PassiveHealthCheck
364+
365+
// FlightRecorder is a started instance of https://pkg.go.dev/golang.org/x/exp/trace#FlightRecorder
366+
FlightRecorder *trace.FlightRecorder
367+
368+
// FlightRecorderTargetURL is the target to write the trace
369+
// to. Supported targets are http URL and file URL.
370+
FlightRecorderTargetURL string
365371
}
366372

367373
type (
@@ -457,8 +463,7 @@ type Proxy struct {
457463
hostname string
458464
onPanicSometimes rate.Sometimes
459465
flightRecorder *trace.FlightRecorder
460-
traceOnce sync.Once
461-
tooLong time.Duration
466+
flightRecorderURL *url.URL
462467
}
463468

464469
// proxyError is used to wrap errors during proxying and to indicate
@@ -850,13 +855,15 @@ func WithParams(p Params) *Proxy {
850855
maxUnhealthyEndpointsRatio: p.PassiveHealthCheck.MaxUnhealthyEndpointsRatio,
851856
}
852857
}
853-
// TODO(sszuecs): expose an option to start it
854-
fr := trace.NewFlightRecorder()
855-
//fr.SetPeriod(d)
856-
//fr.SetSize(bytes int)
857-
err := fr.Start()
858-
if err != nil {
859-
println("Failed to start FlightRecorder:", err.Error())
858+
859+
var frURL *url.URL
860+
if p.FlightRecorder != nil {
861+
var err error
862+
frURL, err = url.Parse(p.FlightRecorderTargetURL)
863+
if err != nil {
864+
p.FlightRecorder.Stop()
865+
p.FlightRecorder = nil
866+
}
860867
}
861868

862869
return &Proxy{
@@ -887,32 +894,62 @@ func WithParams(p Params) *Proxy {
887894
clientTLS: tr.TLSClientConfig,
888895
hostname: hostname,
889896
onPanicSometimes: rate.Sometimes{First: 3, Interval: 1 * time.Minute},
890-
flightRecorder: fr,
891-
traceOnce: sync.Once{},
892-
tooLong: 250 * time.Millisecond,
897+
flightRecorder: p.FlightRecorder,
898+
flightRecorderURL: frURL,
893899
}
894900
}
895901

896902
func (p *Proxy) writeTraceIfTooSlow(ctx *context) {
897-
p.log.Infof("write trace if too slow: %s > %s", time.Since(ctx.startServe), p.tooLong)
898-
if time.Since(ctx.startServe) > p.tooLong {
899-
p.log.Info("too slow")
900-
// Do it only once for simplicitly, but you can take more than one.
901-
p.traceOnce.Do(func() {
902-
p.log.Info("write trace because we were too slow")
903-
// Grab the snapshot.
904-
var b bytes.Buffer
905-
_, err := p.flightRecorder.WriteTo(&b)
906-
if err != nil {
907-
p.log.Errorf("Failed to write flightrecorder data: %v", err)
903+
if p.flightRecorder == nil || p.flightRecorderURL == nil {
904+
return
905+
}
906+
907+
var d time.Duration
908+
if e, ok := ctx.StateBag()[filters.TraceName]; ok {
909+
d = e.(time.Duration)
910+
}
911+
if d < 1*time.Microsecond {
912+
return
913+
}
914+
915+
p.log.Infof("write trace if too slow: %s > %s", time.Since(ctx.startServe), d)
916+
if time.Since(ctx.startServe) > d {
917+
var b bytes.Buffer
918+
_, err := p.flightRecorder.WriteTo(&b)
919+
if err != nil {
920+
p.log.Errorf("Failed to write flightrecorder data: %v", err)
921+
return
922+
}
923+
924+
switch p.flightRecorderURL.Scheme {
925+
case "file":
926+
if err := os.WriteFile(p.flightRecorderURL.Path, b.Bytes(), 0o644); err != nil {
927+
p.log.Errorf("Failed to write file trace.out: %v", err)
908928
return
929+
} else {
930+
p.log.Infof("FlightRecorder wrote %d bytes to trace file %q", b.Len(), p.flightRecorderURL.Path)
909931
}
910-
// Write it to a file.
911-
if err := os.WriteFile("trace.out", b.Bytes(), 0o755); err != nil {
912-
p.log.Errorf("Failed to write trace.out: %v", err)
913-
return
932+
case "http", "https":
933+
req, err := http.NewRequest("PUT", p.flightRecorderURL.String(), &b)
934+
if err != nil {
935+
p.log.Errorf("Failed to create request to %q to send a trace: %v", p.flightRecorderURL.String(), err)
914936
}
915-
})
937+
938+
rsp, err := p.roundTripper.RoundTrip(req)
939+
if err != nil {
940+
p.log.Errorf("Failed to write trace to %q: %v", p.flightRecorderURL.String(), err)
941+
} else {
942+
rsp.Body.Close()
943+
}
944+
switch rsp.StatusCode {
945+
case 200, 201, 204:
946+
p.log.Infof("Successful send of a trace to %q", p.flightRecorderURL.String())
947+
default:
948+
p.log.Errorf("Failed to get successful response from %s: (%d) %s", p.flightRecorderURL.String(), rsp.StatusCode, rsp.Status)
949+
}
950+
default:
951+
p.log.Errorf("Failed to write trace, unknown FlightRecorderURL scheme %q", p.flightRecorderURL.Scheme)
952+
}
916953
}
917954
}
918955

@@ -1686,7 +1723,10 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
16861723
func (p *Proxy) Close() error {
16871724
close(p.quit)
16881725
p.registry.Close()
1689-
p.flightRecorder.Stop()
1726+
if p.flightRecorder != nil {
1727+
p.flightRecorder.Stop()
1728+
}
1729+
16901730
return nil
16911731
}
16921732

0 commit comments

Comments
 (0)
Please sign in to comment.