Skip to content

Commit 083bc5e

Browse files
committed
feature: allow configuration for Go x/trace.FlightRecorder
Signed-off-by: Sandor Szücs <[email protected]>
1 parent 1502297 commit 083bc5e

File tree

5 files changed

+240
-145
lines changed

5 files changed

+240
-145
lines changed

config/config.go

+59-44
Original file line numberDiff line numberDiff line change
@@ -69,50 +69,55 @@ type Config struct {
6969
CompressEncodings *listFlag `yaml:"compress-encodings"`
7070

7171
// logging, metrics, profiling, tracing:
72-
EnablePrometheusMetrics bool `yaml:"enable-prometheus-metrics"`
73-
OpenTracing string `yaml:"opentracing"`
74-
OpenTracingInitialSpan string `yaml:"opentracing-initial-span"`
75-
OpenTracingExcludedProxyTags string `yaml:"opentracing-excluded-proxy-tags"`
76-
OpenTracingDisableFilterSpans bool `yaml:"opentracing-disable-filter-spans"`
77-
OpentracingLogFilterLifecycleEvents bool `yaml:"opentracing-log-filter-lifecycle-events"`
78-
OpentracingLogStreamEvents bool `yaml:"opentracing-log-stream-events"`
79-
OpentracingBackendNameTag bool `yaml:"opentracing-backend-name-tag"`
80-
MetricsListener string `yaml:"metrics-listener"`
81-
MetricsPrefix string `yaml:"metrics-prefix"`
82-
EnableProfile bool `yaml:"enable-profile"`
83-
BlockProfileRate int `yaml:"block-profile-rate"`
84-
MutexProfileFraction int `yaml:"mutex-profile-fraction"`
85-
MemProfileRate int `yaml:"memory-profile-rate"`
86-
DebugGcMetrics bool `yaml:"debug-gc-metrics"`
87-
RuntimeMetrics bool `yaml:"runtime-metrics"`
88-
ServeRouteMetrics bool `yaml:"serve-route-metrics"`
89-
ServeRouteCounter bool `yaml:"serve-route-counter"`
90-
ServeHostMetrics bool `yaml:"serve-host-metrics"`
91-
ServeHostCounter bool `yaml:"serve-host-counter"`
92-
ServeMethodMetric bool `yaml:"serve-method-metric"`
93-
ServeStatusCodeMetric bool `yaml:"serve-status-code-metric"`
94-
BackendHostMetrics bool `yaml:"backend-host-metrics"`
95-
AllFiltersMetrics bool `yaml:"all-filters-metrics"`
96-
CombinedResponseMetrics bool `yaml:"combined-response-metrics"`
97-
RouteResponseMetrics bool `yaml:"route-response-metrics"`
98-
RouteBackendErrorCounters bool `yaml:"route-backend-error-counters"`
99-
RouteStreamErrorCounters bool `yaml:"route-stream-error-counters"`
100-
RouteBackendMetrics bool `yaml:"route-backend-metrics"`
101-
RouteCreationMetrics bool `yaml:"route-creation-metrics"`
102-
MetricsUseExpDecaySample bool `yaml:"metrics-exp-decay-sample"`
103-
HistogramMetricBucketsString string `yaml:"histogram-metric-buckets"`
104-
HistogramMetricBuckets []float64 `yaml:"-"`
105-
DisableMetricsCompat bool `yaml:"disable-metrics-compat"`
106-
ApplicationLog string `yaml:"application-log"`
107-
ApplicationLogLevel log.Level `yaml:"-"`
108-
ApplicationLogLevelString string `yaml:"application-log-level"`
109-
ApplicationLogPrefix string `yaml:"application-log-prefix"`
110-
ApplicationLogJSONEnabled bool `yaml:"application-log-json-enabled"`
111-
AccessLog string `yaml:"access-log"`
112-
AccessLogDisabled bool `yaml:"access-log-disabled"`
113-
AccessLogJSONEnabled bool `yaml:"access-log-json-enabled"`
114-
AccessLogStripQuery bool `yaml:"access-log-strip-query"`
115-
SuppressRouteUpdateLogs bool `yaml:"suppress-route-update-logs"`
72+
EnablePrometheusMetrics bool `yaml:"enable-prometheus-metrics"`
73+
OpenTracing string `yaml:"opentracing"`
74+
OpenTracingInitialSpan string `yaml:"opentracing-initial-span"`
75+
OpenTracingExcludedProxyTags string `yaml:"opentracing-excluded-proxy-tags"`
76+
OpenTracingDisableFilterSpans bool `yaml:"opentracing-disable-filter-spans"`
77+
OpentracingLogFilterLifecycleEvents bool `yaml:"opentracing-log-filter-lifecycle-events"`
78+
OpentracingLogStreamEvents bool `yaml:"opentracing-log-stream-events"`
79+
OpentracingBackendNameTag bool `yaml:"opentracing-backend-name-tag"`
80+
MetricsListener string `yaml:"metrics-listener"`
81+
MetricsPrefix string `yaml:"metrics-prefix"`
82+
EnableProfile bool `yaml:"enable-profile"`
83+
BlockProfileRate int `yaml:"block-profile-rate"`
84+
MutexProfileFraction int `yaml:"mutex-profile-fraction"`
85+
MemProfileRate int `yaml:"memory-profile-rate"`
86+
EnableFlightRecorder bool `yaml:"enable-flight-recorder"`
87+
FlightRecorderSize int `yaml:"flight-recorder-size"`
88+
FlightRecorderPeriod time.Duration `yaml:"flight-recorder-period"`
89+
FlightRecorderProxyTookTooLong time.Duration `yaml:"flight-recorder-proxy-took-too-long"`
90+
FlightRecorderTargetURL string `yaml:"flight-recorder-target-url"`
91+
DebugGcMetrics bool `yaml:"debug-gc-metrics"`
92+
RuntimeMetrics bool `yaml:"runtime-metrics"`
93+
ServeRouteMetrics bool `yaml:"serve-route-metrics"`
94+
ServeRouteCounter bool `yaml:"serve-route-counter"`
95+
ServeHostMetrics bool `yaml:"serve-host-metrics"`
96+
ServeHostCounter bool `yaml:"serve-host-counter"`
97+
ServeMethodMetric bool `yaml:"serve-method-metric"`
98+
ServeStatusCodeMetric bool `yaml:"serve-status-code-metric"`
99+
BackendHostMetrics bool `yaml:"backend-host-metrics"`
100+
AllFiltersMetrics bool `yaml:"all-filters-metrics"`
101+
CombinedResponseMetrics bool `yaml:"combined-response-metrics"`
102+
RouteResponseMetrics bool `yaml:"route-response-metrics"`
103+
RouteBackendErrorCounters bool `yaml:"route-backend-error-counters"`
104+
RouteStreamErrorCounters bool `yaml:"route-stream-error-counters"`
105+
RouteBackendMetrics bool `yaml:"route-backend-metrics"`
106+
RouteCreationMetrics bool `yaml:"route-creation-metrics"`
107+
MetricsUseExpDecaySample bool `yaml:"metrics-exp-decay-sample"`
108+
HistogramMetricBucketsString string `yaml:"histogram-metric-buckets"`
109+
HistogramMetricBuckets []float64 `yaml:"-"`
110+
DisableMetricsCompat bool `yaml:"disable-metrics-compat"`
111+
ApplicationLog string `yaml:"application-log"`
112+
ApplicationLogLevel log.Level `yaml:"-"`
113+
ApplicationLogLevelString string `yaml:"application-log-level"`
114+
ApplicationLogPrefix string `yaml:"application-log-prefix"`
115+
ApplicationLogJSONEnabled bool `yaml:"application-log-json-enabled"`
116+
AccessLog string `yaml:"access-log"`
117+
AccessLogDisabled bool `yaml:"access-log-disabled"`
118+
AccessLogJSONEnabled bool `yaml:"access-log-json-enabled"`
119+
AccessLogStripQuery bool `yaml:"access-log-strip-query"`
120+
SuppressRouteUpdateLogs bool `yaml:"suppress-route-update-logs"`
116121

117122
// route sources:
118123
EtcdUrls string `yaml:"etcd-urls"`
@@ -378,6 +383,11 @@ func NewConfig() *Config {
378383
flag.IntVar(&cfg.BlockProfileRate, "block-profile-rate", 0, "block profile sample rate, see runtime.SetBlockProfileRate")
379384
flag.IntVar(&cfg.MutexProfileFraction, "mutex-profile-fraction", 0, "mutex profile fraction rate, see runtime.SetMutexProfileFraction")
380385
flag.IntVar(&cfg.MemProfileRate, "memory-profile-rate", 0, "memory profile rate, see runtime.SetMemProfileRate, keeps default 512 kB")
386+
flag.BoolVar(&cfg.EnableFlightRecorder, "enable-flight-recorder", false, "enable flightrecorder Go tracer")
387+
flag.IntVar(&cfg.FlightRecorderSize, "flight-recorder-size", 0, "max flight-recorder trace data size")
388+
flag.DurationVar(&cfg.FlightRecorderPeriod, "flight-recorder-period", 0, "sets the approximate time duration that the flight recorder's circular buffer represents.")
389+
flag.DurationVar(&cfg.FlightRecorderProxyTookTooLong, "flight-recorder-proxy-took-too-long", 0, "sets the threshold, if proxy took longer than that the flight recorder will write out a trace.")
390+
flag.StringVar(&cfg.FlightRecorderTargetURL, "flight-recorder-target-url", "", "sets the flight recorder target URL that is used to write out the trace to.")
381391
flag.BoolVar(&cfg.DebugGcMetrics, "debug-gc-metrics", false, "enables reporting of the Go garbage collector statistics exported in debug.GCStats")
382392
flag.BoolVar(&cfg.RuntimeMetrics, "runtime-metrics", true, "enables reporting of the Go runtime statistics exported in runtime and specifically runtime.MemStats")
383393
flag.BoolVar(&cfg.ServeRouteMetrics, "serve-route-metrics", false, "enables reporting total serve time metrics for each route")
@@ -745,6 +755,11 @@ func (c *Config) ToOptions() skipper.Options {
745755
EnableProfile: c.EnableProfile,
746756
BlockProfileRate: c.BlockProfileRate,
747757
MutexProfileFraction: c.MutexProfileFraction,
758+
EnableFlightRecorder: c.EnableFlightRecorder,
759+
FlightRecorderSize: c.FlightRecorderSize,
760+
FlightRecorderPeriod: c.FlightRecorderPeriod,
761+
FlightRecorderProxyTookTooLong: c.FlightRecorderProxyTookTooLong,
762+
FlightRecorderTargetURL: c.FlightRecorderTargetURL,
748763
EnableDebugGcMetrics: c.DebugGcMetrics,
749764
EnableRuntimeMetrics: c.RuntimeMetrics,
750765
EnableServeRouteMetrics: c.ServeRouteMetrics,

filters/builtin/builtin.go

+1
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ func Filters() []filters.Spec {
189189
diag.NewNormalResponseLatency(),
190190
diag.NewHistogramRequestLatency(),
191191
diag.NewHistogramResponseLatency(),
192+
diag.NewTrace(),
192193
tee.NewTee(),
193194
tee.NewTeeDeprecated(),
194195
tee.NewTeeNoFollow(),

filters/filters.go

+1
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ const (
264264
NormalResponseLatencyName = "normalResponseLatency"
265265
HistogramRequestLatencyName = "histogramRequestLatency"
266266
HistogramResponseLatencyName = "histogramResponseLatency"
267+
TraceName = "trace"
267268
LogBodyName = "logBody"
268269
LogHeaderName = "logHeader"
269270
TeeName = "tee"

proxy/proxy.go

+116-76
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"runtime"
1919
"strconv"
2020
"strings"
21-
"sync"
2221
"time"
2322
"unicode/utf8"
2423

@@ -318,6 +317,16 @@ type Params struct {
318317

319318
// PassiveHealthCheck defines the parameters for the healthy endpoints checker.
320319
PassiveHealthCheck *PassiveHealthCheck
320+
321+
// FlightRecorder is a started instance of https://pkg.go.dev/golang.org/x/exp/trace#FlightRecorder
322+
FlightRecorder *trace.FlightRecorder
323+
324+
// FlightRecorderTargetURL is the target to write the trace
325+
// to. Supported targets are http URL and file URL.
326+
FlightRecorderTargetURL string
327+
328+
// FlightRecorderProxyTookTooLong defines the threshold when to write out a trace
329+
FlightRecorderProxyTookTooLong time.Duration
321330
}
322331

323332
type (
@@ -387,34 +396,34 @@ type PriorityRoute interface {
387396
// Proxy instances implement Skipper proxying functionality. For
388397
// initializing, see the WithParams the constructor and Params.
389398
type Proxy struct {
390-
experimentalUpgrade bool
391-
experimentalUpgradeAudit bool
392-
accessLogDisabled bool
393-
maxLoops int
394-
defaultHTTPStatus int
395-
routing *routing.Routing
396-
registry *routing.EndpointRegistry
397-
fadein *fadeIn
398-
heathlyEndpoints *healthyEndpoints
399-
roundTripper http.RoundTripper
400-
priorityRoutes []PriorityRoute
401-
flags Flags
402-
metrics metrics.Metrics
403-
quit chan struct{}
404-
flushInterval time.Duration
405-
breakers *circuit.Registry
406-
limiters *ratelimit.Registry
407-
log logging.Logger
408-
tracing *proxyTracing
409-
upgradeAuditLogOut io.Writer
410-
upgradeAuditLogErr io.Writer
411-
auditLogHook chan struct{}
412-
clientTLS *tls.Config
413-
hostname string
414-
onPanicSometimes rate.Sometimes
415-
flightRecorder *trace.FlightRecorder
416-
traceOnce sync.Once
417-
tooLong time.Duration
399+
experimentalUpgrade bool
400+
experimentalUpgradeAudit bool
401+
accessLogDisabled bool
402+
maxLoops int
403+
defaultHTTPStatus int
404+
routing *routing.Routing
405+
registry *routing.EndpointRegistry
406+
fadein *fadeIn
407+
heathlyEndpoints *healthyEndpoints
408+
roundTripper http.RoundTripper
409+
priorityRoutes []PriorityRoute
410+
flags Flags
411+
metrics metrics.Metrics
412+
quit chan struct{}
413+
flushInterval time.Duration
414+
breakers *circuit.Registry
415+
limiters *ratelimit.Registry
416+
log logging.Logger
417+
tracing *proxyTracing
418+
upgradeAuditLogOut io.Writer
419+
upgradeAuditLogErr io.Writer
420+
auditLogHook chan struct{}
421+
clientTLS *tls.Config
422+
hostname string
423+
onPanicSometimes rate.Sometimes
424+
flightRecorder *trace.FlightRecorder
425+
flightRecorderURL *url.URL
426+
flightRecorderProxyTookTooLong time.Duration
418427
}
419428

420429
// proxyError is used to wrap errors during proxying and to indicate
@@ -801,13 +810,15 @@ func WithParams(p Params) *Proxy {
801810
endpointRegistry: p.EndpointRegistry,
802811
}
803812
}
804-
// TODO(sszuecs): expose an option to start it
805-
fr := trace.NewFlightRecorder()
806-
//fr.SetPeriod(d)
807-
//fr.SetSize(bytes int)
808-
err := fr.Start()
809-
if err != nil {
810-
println("Failed to start FlightRecorder:", err.Error())
813+
814+
var frURL *url.URL
815+
if p.FlightRecorder != nil {
816+
var err error
817+
frURL, err = url.Parse(p.FlightRecorderTargetURL)
818+
if err != nil {
819+
p.FlightRecorder.Stop()
820+
p.FlightRecorder = nil
821+
}
811822
}
812823

813824
return &Proxy{
@@ -817,53 +828,82 @@ func WithParams(p Params) *Proxy {
817828
rnd: rand.New(loadbalancer.NewLockedSource()),
818829
endpointRegistry: p.EndpointRegistry,
819830
},
820-
heathlyEndpoints: healthyEndpointsChooser,
821-
roundTripper: p.CustomHttpRoundTripperWrap(tr),
822-
priorityRoutes: p.PriorityRoutes,
823-
flags: p.Flags,
824-
metrics: m,
825-
quit: quit,
826-
flushInterval: p.FlushInterval,
827-
experimentalUpgrade: p.ExperimentalUpgrade,
828-
experimentalUpgradeAudit: p.ExperimentalUpgradeAudit,
829-
maxLoops: p.MaxLoopbacks,
830-
breakers: p.CircuitBreakers,
831-
limiters: p.RateLimiters,
832-
log: &logging.DefaultLog{},
833-
defaultHTTPStatus: defaultHTTPStatus,
834-
tracing: newProxyTracing(p.OpenTracing),
835-
accessLogDisabled: p.AccessLogDisabled,
836-
upgradeAuditLogOut: os.Stdout,
837-
upgradeAuditLogErr: os.Stderr,
838-
clientTLS: tr.TLSClientConfig,
839-
hostname: hostname,
840-
onPanicSometimes: rate.Sometimes{First: 3, Interval: 1 * time.Minute},
841-
flightRecorder: fr,
842-
traceOnce: sync.Once{},
843-
tooLong: 250 * time.Millisecond,
831+
heathlyEndpoints: healthyEndpointsChooser,
832+
roundTripper: p.CustomHttpRoundTripperWrap(tr),
833+
priorityRoutes: p.PriorityRoutes,
834+
flags: p.Flags,
835+
metrics: m,
836+
quit: quit,
837+
flushInterval: p.FlushInterval,
838+
experimentalUpgrade: p.ExperimentalUpgrade,
839+
experimentalUpgradeAudit: p.ExperimentalUpgradeAudit,
840+
maxLoops: p.MaxLoopbacks,
841+
breakers: p.CircuitBreakers,
842+
limiters: p.RateLimiters,
843+
log: &logging.DefaultLog{},
844+
defaultHTTPStatus: defaultHTTPStatus,
845+
tracing: newProxyTracing(p.OpenTracing),
846+
accessLogDisabled: p.AccessLogDisabled,
847+
upgradeAuditLogOut: os.Stdout,
848+
upgradeAuditLogErr: os.Stderr,
849+
clientTLS: tr.TLSClientConfig,
850+
hostname: hostname,
851+
onPanicSometimes: rate.Sometimes{First: 3, Interval: 1 * time.Minute},
852+
flightRecorder: p.FlightRecorder,
853+
flightRecorderURL: frURL,
854+
flightRecorderProxyTookTooLong: p.FlightRecorderProxyTookTooLong,
844855
}
845856
}
846857

847858
func (p *Proxy) writeTraceIfTooSlow(ctx *context) {
848-
p.log.Infof("write trace if too slow: %s > %s", time.Since(ctx.startServe), p.tooLong)
849-
if time.Since(ctx.startServe) > p.tooLong {
850-
p.log.Info("too slow")
851-
// Do it only once for simplicitly, but you can take more than one.
852-
p.traceOnce.Do(func() {
853-
p.log.Info("write trace because we were too slow")
854-
// Grab the snapshot.
855-
var b bytes.Buffer
856-
_, err := p.flightRecorder.WriteTo(&b)
857-
if err != nil {
858-
p.log.Errorf("Failed to write flightrecorder data: %v", err)
859+
if p.flightRecorder == nil || p.flightRecorderURL == nil {
860+
return
861+
}
862+
863+
d := p.flightRecorderProxyTookTooLong
864+
if e, ok := ctx.StateBag()[filters.TraceName]; ok {
865+
d = e.(time.Duration)
866+
}
867+
if d < 1*time.Microsecond {
868+
return
869+
}
870+
871+
p.log.Infof("write trace if too slow: %s > %s", time.Since(ctx.startServe), d)
872+
if time.Since(ctx.startServe) > d {
873+
var b bytes.Buffer
874+
_, err := p.flightRecorder.WriteTo(&b)
875+
if err != nil {
876+
p.log.Errorf("Failed to write flightrecorder data: %v", err)
877+
return
878+
}
879+
880+
switch p.flightRecorderURL.Scheme {
881+
case "file":
882+
if err := os.WriteFile(p.flightRecorderURL.Path, b.Bytes(), 0o644); err != nil {
883+
p.log.Errorf("Failed to write file trace.out: %v", err)
859884
return
885+
} else {
886+
p.log.Infof("FlightRecorder wrote %d bytes to trace file %q", b.Len(), p.flightRecorderURL.Path)
860887
}
861-
// Write it to a file.
862-
if err := os.WriteFile("trace.out", b.Bytes(), 0o755); err != nil {
863-
p.log.Errorf("Failed to write trace.out: %v", err)
864-
return
888+
case "http", "https":
889+
req, err := http.NewRequest("PUT", p.flightRecorderURL.String(), &b)
890+
if err != nil {
891+
p.log.Errorf("Failed to create request to %q to send a trace: %v", p.flightRecorderURL.String(), err)
865892
}
866-
})
893+
894+
rsp, err := http.DefaultClient.Do(req)
895+
if err != nil {
896+
p.log.Errorf("Failed to write trace to %q: %v", p.flightRecorderURL.String(), err)
897+
}
898+
switch rsp.StatusCode {
899+
case 200, 201, 204:
900+
p.log.Infof("Successful send of a trace to %q", p.flightRecorderURL.String())
901+
default:
902+
p.log.Errorf("Failed to get successful response from %s: (%d) %s", p.flightRecorderURL.String(), rsp.StatusCode, rsp.Status)
903+
}
904+
default:
905+
p.log.Errorf("Failed to write trace, unknown FlightRecorderURL %q", p.flightRecorderURL.Scheme)
906+
}
867907
}
868908
}
869909

0 commit comments

Comments
 (0)