Skip to content

Commit 22c3e06

Browse files
committed
feature: flightrecorder to enable Go trace
feature: allow configuration for Go x/trace.FlightRecorder pass default period to define what means skipper to be slow Signed-off-by: Sandor Szücs <[email protected]>
1 parent bd87e31 commit 22c3e06

File tree

6 files changed

+289
-13
lines changed

6 files changed

+289
-13
lines changed

config/config.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ type Config struct {
8484
BlockProfileRate int `yaml:"block-profile-rate"`
8585
MutexProfileFraction int `yaml:"mutex-profile-fraction"`
8686
MemProfileRate int `yaml:"memory-profile-rate"`
87+
FlightRecorderTargetURL string `yaml:"flight-recorder-target-url"`
8788
DebugGcMetrics bool `yaml:"debug-gc-metrics"`
8889
RuntimeMetrics bool `yaml:"runtime-metrics"`
8990
ServeRouteMetrics bool `yaml:"serve-route-metrics"`
@@ -369,6 +370,7 @@ func NewConfig() *Config {
369370

370371
// logging, metrics, tracing:
371372
flag.BoolVar(&cfg.EnablePrometheusMetrics, "enable-prometheus-metrics", false, "*Deprecated*: use metrics-flavour. Switch to Prometheus metrics format to expose metrics")
373+
flag.BoolVar(&cfg.EnablePrometheusStartLabel, "enable-prometheus-start-label", false, "adds start label to each prometheus counter with the value of counter creation timestamp as unix nanoseconds")
372374
flag.StringVar(&cfg.OpenTracing, "opentracing", "noop", "list of arguments for opentracing (space separated), first argument is the tracer implementation")
373375
flag.StringVar(&cfg.OpenTracingInitialSpan, "opentracing-initial-span", "ingress", "set the name of the initial, pre-routing, tracing span")
374376
flag.StringVar(&cfg.OpenTracingExcludedProxyTags, "opentracing-excluded-proxy-tags", "", "set tags that should be excluded from spans created for proxy operation. must be a comma-separated list of strings.")
@@ -382,7 +384,7 @@ func NewConfig() *Config {
382384
flag.IntVar(&cfg.BlockProfileRate, "block-profile-rate", 0, "block profile sample rate, see runtime.SetBlockProfileRate")
383385
flag.IntVar(&cfg.MutexProfileFraction, "mutex-profile-fraction", 0, "mutex profile fraction rate, see runtime.SetMutexProfileFraction")
384386
flag.IntVar(&cfg.MemProfileRate, "memory-profile-rate", 0, "memory profile rate, see runtime.SetMemProfileRate, keeps default 512 kB")
385-
flag.BoolVar(&cfg.EnablePrometheusStartLabel, "enable-prometheus-start-label", false, "adds start label to each prometheus counter with the value of counter creation timestamp as unix nanoseconds")
387+
flag.StringVar(&cfg.FlightRecorderTargetURL, "flight-recorder-target-url", "", "sets the flight recorder target URL that is used to write out the trace to.")
386388
flag.BoolVar(&cfg.DebugGcMetrics, "debug-gc-metrics", false, "enables reporting of the Go garbage collector statistics exported in debug.GCStats")
387389
flag.BoolVar(&cfg.RuntimeMetrics, "runtime-metrics", true, "enables reporting of the Go runtime statistics exported in runtime and specifically runtime.MemStats")
388390
flag.BoolVar(&cfg.ServeRouteMetrics, "serve-route-metrics", false, "enables reporting total serve time metrics for each route")
@@ -755,6 +757,7 @@ func (c *Config) ToOptions() skipper.Options {
755757
EnableProfile: c.EnableProfile,
756758
BlockProfileRate: c.BlockProfileRate,
757759
MutexProfileFraction: c.MutexProfileFraction,
760+
FlightRecorderTargetURL: c.FlightRecorderTargetURL,
758761
EnableDebugGcMetrics: c.DebugGcMetrics,
759762
EnableRuntimeMetrics: c.RuntimeMetrics,
760763
EnableServeRouteMetrics: c.ServeRouteMetrics,

go.mod

+6-4
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ require (
4949
github.com/yuin/gopher-lua v1.1.1
5050
go4.org/netipx v0.0.0-20220925034521-797b0c90d8ab
5151
golang.org/x/crypto v0.27.0
52-
golang.org/x/exp v0.0.0-20230905200255-921286631fa9
52+
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0
5353
golang.org/x/net v0.29.0
5454
golang.org/x/oauth2 v0.23.0
5555
golang.org/x/sync v0.8.0
@@ -169,10 +169,10 @@ require (
169169
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
170170
go.uber.org/atomic v1.9.0 // indirect
171171
go.uber.org/automaxprocs v1.5.3 // indirect
172-
golang.org/x/mod v0.19.0 // indirect
172+
golang.org/x/mod v0.21.0 // indirect
173173
golang.org/x/sys v0.25.0 // indirect
174174
golang.org/x/text v0.18.0 // indirect
175-
golang.org/x/tools v0.23.0 // indirect
175+
golang.org/x/tools v0.25.0 // indirect
176176
gonum.org/v1/gonum v0.8.2 // indirect
177177
google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094 // indirect
178178
google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
@@ -182,4 +182,6 @@ require (
182182
sigs.k8s.io/yaml v1.4.0 // indirect
183183
)
184184

185-
go 1.22
185+
go 1.22.0
186+
187+
toolchain go1.23.0

go.sum

+6-6
Original file line numberDiff line numberDiff line change
@@ -538,8 +538,8 @@ golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL
538538
golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
539539
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
540540
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
541-
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
542-
golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
541+
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk=
542+
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY=
543543
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
544544
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
545545
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
@@ -552,8 +552,8 @@ golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
552552
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
553553
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
554554
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
555-
golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8=
556-
golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
555+
golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
556+
golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
557557
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
558558
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
559559
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -638,8 +638,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY
638638
golang.org/x/tools v0.0.0-20200908211811-12e1bf57a112/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
639639
golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
640640
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
641-
golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg=
642-
golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI=
641+
golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE=
642+
golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg=
643643
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
644644
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
645645
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

proxy/flightrecorder_test.go

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package proxy_test
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"io"
7+
"net/http"
8+
"net/http/httptest"
9+
"testing"
10+
11+
"github.com/zalando/skipper/eskip"
12+
"github.com/zalando/skipper/filters"
13+
"github.com/zalando/skipper/filters/diag"
14+
"github.com/zalando/skipper/proxy"
15+
"github.com/zalando/skipper/proxy/proxytest"
16+
xtrace "golang.org/x/exp/trace"
17+
)
18+
19+
func TestFlightRecorder(t *testing.T) {
20+
service := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
21+
if r.Method != "PUT" {
22+
w.WriteHeader(http.StatusMethodNotAllowed)
23+
w.Write([]byte(http.StatusText(http.StatusMethodNotAllowed)))
24+
return
25+
}
26+
27+
var buf bytes.Buffer
28+
n, err := io.Copy(&buf, r.Body)
29+
if err != nil {
30+
t.Fatalf("Failed to copy data: %v", err)
31+
}
32+
if n < 100 {
33+
t.Fatalf("Failed to write enough data: %d bytes", n)
34+
}
35+
w.WriteHeader(http.StatusCreated)
36+
w.Write([]byte(http.StatusText(http.StatusCreated)))
37+
38+
}))
39+
defer service.Close()
40+
41+
backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {}))
42+
defer backend.Close()
43+
44+
flightRecorder := xtrace.NewFlightRecorder()
45+
flightRecorder.Start()
46+
47+
spec := diag.NewTrace()
48+
fr := make(filters.Registry)
49+
fr.Register(spec)
50+
51+
doc := fmt.Sprintf(`r: * -> trace("20µs") -> "%s"`, backend.URL)
52+
rr := eskip.MustParse(doc)
53+
54+
pr := proxytest.WithParams(fr, proxy.Params{
55+
FlightRecorder: flightRecorder,
56+
FlightRecorderTargetURL: service.URL,
57+
}, rr...)
58+
defer pr.Close()
59+
60+
rsp, err := pr.Client().Get(pr.URL)
61+
if err != nil {
62+
t.Fatalf("Failed to GET %q: %v", pr.URL, err)
63+
}
64+
defer rsp.Body.Close()
65+
_, err = io.ReadAll(rsp.Body)
66+
if err != nil {
67+
t.Fatalf("Failed to read body: %v", err)
68+
}
69+
70+
switch rsp.StatusCode {
71+
case 200, 201, 204:
72+
// ok
73+
default:
74+
t.Fatalf("Failed to get status OK: %d", rsp.StatusCode)
75+
}
76+
}

proxy/proxy.go

+147-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"unicode/utf8"
2323

2424
"golang.org/x/exp/maps"
25+
"golang.org/x/exp/trace"
2526
"golang.org/x/time/rate"
2627

2728
ot "github.com/opentracing/opentracing-go"
@@ -360,6 +361,18 @@ type Params struct {
360361

361362
// PassiveHealthCheck defines the parameters for the healthy endpoints checker.
362363
PassiveHealthCheck *PassiveHealthCheck
364+
365+
// FlightRecorder is a started instance of https://pkg.go.dev/golang.org/x/exp/trace#FlightRecorder
366+
FlightRecorder *trace.FlightRecorder
367+
368+
// FlightRecorderTargetURL is the target to write the trace
369+
// to. Supported targets are http URL and file URL.
370+
FlightRecorderTargetURL string
371+
372+
// FlightRecorderPeriod is the time.Duration that is used for
373+
// a slow skipper. If skipper is detected to be slow it tries
374+
// to write out a trace as configured by the FlightRecorderTargetURL.
375+
FlightRecorderPeriod time.Duration
363376
}
364377

365378
type (
@@ -454,6 +467,10 @@ type Proxy struct {
454467
clientTLS *tls.Config
455468
hostname string
456469
onPanicSometimes rate.Sometimes
470+
flightRecorder *trace.FlightRecorder
471+
flightRecorderURL *url.URL
472+
flightRecorderPeriod time.Duration
473+
flightRecorderCH chan struct{}
457474
}
458475

459476
// proxyError is used to wrap errors during proxying and to indicate
@@ -845,6 +862,48 @@ func WithParams(p Params) *Proxy {
845862
maxUnhealthyEndpointsRatio: p.PassiveHealthCheck.MaxUnhealthyEndpointsRatio,
846863
}
847864
}
865+
866+
log := &logging.DefaultLog{}
867+
868+
var (
869+
frURL *url.URL
870+
// buffered channel size 10k to allow unblocked requests
871+
frChannel = make(chan struct{}, 10240)
872+
)
873+
if p.FlightRecorder != nil {
874+
var err error
875+
frURL, err = url.Parse(p.FlightRecorderTargetURL)
876+
if err != nil {
877+
p.FlightRecorder.Stop()
878+
p.FlightRecorder = nil
879+
} else {
880+
go func() {
881+
d := 7 * 24 * time.Hour
882+
last := time.Now()
883+
884+
for {
885+
select {
886+
case <-frChannel:
887+
// range through all notifications until 1ms there is no notification
888+
d = time.Millisecond
889+
continue
890+
case <-quit:
891+
p.FlightRecorder.Stop()
892+
return
893+
case <-time.After(d):
894+
if time.Since(last) >= time.Hour {
895+
writeTrace(p.FlightRecorder, frURL, log, tr)
896+
}
897+
last = time.Now()
898+
899+
// reset d
900+
d = 7 * 24 * time.Hour
901+
}
902+
}
903+
}()
904+
}
905+
}
906+
848907
return &Proxy{
849908
routing: p.Routing,
850909
registry: p.EndpointRegistry,
@@ -864,7 +923,7 @@ func WithParams(p Params) *Proxy {
864923
maxLoops: p.MaxLoopbacks,
865924
breakers: p.CircuitBreakers,
866925
limiters: p.RateLimiters,
867-
log: &logging.DefaultLog{},
926+
log: log,
868927
defaultHTTPStatus: defaultHTTPStatus,
869928
tracing: newProxyTracing(p.OpenTracing),
870929
accessLogDisabled: p.AccessLogDisabled,
@@ -873,6 +932,89 @@ func WithParams(p Params) *Proxy {
873932
clientTLS: tr.TLSClientConfig,
874933
hostname: hostname,
875934
onPanicSometimes: rate.Sometimes{First: 3, Interval: 1 * time.Minute},
935+
flightRecorder: p.FlightRecorder,
936+
flightRecorderURL: frURL,
937+
flightRecorderPeriod: p.FlightRecorderPeriod,
938+
flightRecorderCH: frChannel,
939+
}
940+
}
941+
942+
func (p *Proxy) writeTraceIfTooSlow(ctx *context, span ot.Span) {
943+
took := time.Since(ctx.startServe)
944+
span.SetTag("proxy.took", took)
945+
946+
d := p.flightRecorderPeriod
947+
if d < 1*time.Millisecond && d > took {
948+
return
949+
}
950+
951+
// signal too slow
952+
p.flightRecorderCH <- struct{}{}
953+
}
954+
955+
func writeTraceTo(log logging.Logger, flightRecorder *trace.FlightRecorder, w io.Writer) (int, error) {
956+
n, err := flightRecorder.WriteTo(w)
957+
if err != nil {
958+
switch err {
959+
case trace.ErrSnapshotActive:
960+
return 0, fmt.Errorf("flightRecorder already in progress")
961+
default:
962+
return 0, fmt.Errorf("failed to write FlightRecorder data: %w", err)
963+
}
964+
} else {
965+
log.Infof("FlightRecorder wrote %d bytes", n)
966+
}
967+
968+
return n, err
969+
}
970+
971+
func writeTrace(flightRecorder *trace.FlightRecorder, flightRecorderURL *url.URL, log logging.Logger, roundTripper http.RoundTripper) {
972+
if flightRecorder == nil || flightRecorderURL == nil {
973+
return
974+
}
975+
976+
log.Info("write trace")
977+
978+
switch flightRecorderURL.Scheme {
979+
case "file":
980+
fd, err := os.Open(flightRecorderURL.Path)
981+
if err != nil {
982+
log.Errorf("Failed to write file %q: %v", err, flightRecorderURL.Path)
983+
return
984+
}
985+
986+
_, err = writeTraceTo(log, flightRecorder, fd)
987+
if err != nil {
988+
log.Errorf("Failed to write trace file %q: %v", flightRecorderURL.Path, err)
989+
}
990+
991+
case "http", "https":
992+
var b bytes.Buffer
993+
_, err := writeTraceTo(log, flightRecorder, &b)
994+
if err != nil {
995+
log.Errorf("Failed to write trace into in-memory buffer: %v", err)
996+
return
997+
}
998+
999+
req, err := http.NewRequest("PUT", flightRecorderURL.String(), &b)
1000+
if err != nil {
1001+
log.Errorf("Failed to create request to %q to send a trace: %v", flightRecorderURL.String(), err)
1002+
}
1003+
1004+
rsp, err := roundTripper.RoundTrip(req)
1005+
if err != nil {
1006+
log.Errorf("Failed to write trace to %q: %v", flightRecorderURL.String(), err)
1007+
} else {
1008+
rsp.Body.Close()
1009+
}
1010+
switch rsp.StatusCode {
1011+
case 200, 201, 204:
1012+
log.Infof("Successful send of a trace to %q", flightRecorderURL.String())
1013+
default:
1014+
log.Errorf("Failed to get successful response from %s: (%d) %s", flightRecorderURL.String(), rsp.StatusCode, rsp.Status)
1015+
}
1016+
default:
1017+
log.Errorf("Failed to write trace, unknown FlightRecorderURL scheme %q", flightRecorderURL.Scheme)
8761018
}
8771019
}
8781020

@@ -1003,7 +1145,8 @@ func (p *Proxy) makeBackendRequest(ctx *context, requestContext stdlibcontext.Co
10031145
proxySpanOpts := []ot.StartSpanOption{ot.Tags{
10041146
SpanKindTag: SpanKindClient,
10051147
}}
1006-
if parentSpan := ot.SpanFromContext(req.Context()); parentSpan != nil {
1148+
parentSpan := ot.SpanFromContext(req.Context())
1149+
if parentSpan != nil {
10071150
proxySpanOpts = append(proxySpanOpts, ot.ChildOf(parentSpan.Context()))
10081151
}
10091152
ctx.proxySpan = p.tracing.tracer.StartSpan(spanName, proxySpanOpts...)
@@ -1024,6 +1167,8 @@ func (p *Proxy) makeBackendRequest(ctx *context, requestContext stdlibcontext.Co
10241167
ctx.proxySpan.LogKV("http_roundtrip", StartEvent)
10251168
req = injectClientTrace(req, ctx.proxySpan)
10261169

1170+
p.writeTraceIfTooSlow(ctx, parentSpan)
1171+
10271172
response, err := roundTripper.RoundTrip(req)
10281173
if endpointMetrics != nil {
10291174
endpointMetrics.IncRequests(routing.IncRequestsOptions{FailedRoundTrip: err != nil})

0 commit comments

Comments
 (0)