Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ddtrace/tracer/civisibility_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ func (w *ciVisibilityTraceWriter) flush() {
requestCompressedType = telemetry.CompressedRequestCompressedType
}
telemetry.EndpointPayloadRequests(telemetry.TestCycleEndpointType, requestCompressedType)

for attempt := 0; attempt <= w.config.sendRetries; attempt++ {
sendRetries := w.config.internalConfig.SendRetries()
for attempt := 0; attempt <= sendRetries; attempt++ {
stats := p.stats()
size, count = stats.size, stats.itemCount
log.Debug("ciVisibilityTraceWriter: sending payload: size: %d events: %d\n", size, count)
Expand All @@ -122,7 +122,7 @@ func (w *ciVisibilityTraceWriter) flush() {
log.Debug("ciVisibilityTraceWriter: sent events after %d attempts", attempt+1)
return
}
log.Error("ciVisibilityTraceWriter: failure sending events (attempt %d of %d): %v", attempt+1, w.config.sendRetries+1, err.Error())
log.Error("ciVisibilityTraceWriter: failure sending events (attempt %d of %d): %v", attempt+1, sendRetries+1, err.Error())
p.reset()
time.Sleep(w.config.retryInterval)
}
Expand Down
3 changes: 2 additions & 1 deletion ddtrace/tracer/civisibility_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"testing"
"time"

internalconfig "github.com/DataDog/dd-trace-go/v2/internal/config"
"github.com/stretchr/testify/assert"
"github.com/tinylib/msgp/msgp"
)
Expand Down Expand Up @@ -90,7 +91,7 @@ func TestCiVisibilityTraceWriterFlushRetries(t *testing.T) {
}
c, err := newTestConfig(func(c *config) {
c.transport = p
c.sendRetries = test.configRetries
c.internalConfig.SetSendRetries(test.configRetries, internalconfig.OriginCode)
c.retryInterval = test.retryInterval
})
assert.NoError(err)
Expand Down
6 changes: 1 addition & 5 deletions ddtrace/tracer/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,6 @@ type config struct {
// output instead of using the agent. This is used in Lambda environments.
logToStdout bool

// sendRetries is the number of times a trace or CI Visibility payload send is retried upon
// failure.
sendRetries int

// retryInterval is the interval between agent connection retries. It has no effect if sendRetries is not set
retryInterval time.Duration

Expand Down Expand Up @@ -1026,7 +1022,7 @@ func WithLambdaMode(enabled bool) StartOption {
// most `retries` times.
func WithSendRetries(retries int) StartOption {
return func(c *config) {
c.sendRetries = retries
c.internalConfig.SetSendRetries(retries, telemetry.OriginCode)
}
}

Expand Down
4 changes: 2 additions & 2 deletions ddtrace/tracer/option_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ func TestTracerOptionsDefaults(t *testing.T) {
t.Run("trace-retries", func(t *testing.T) {
c, err := newTestConfig()
assert.NoError(t, err)
assert.Equal(t, 0, c.sendRetries)
assert.Equal(t, 0, c.internalConfig.SendRetries())
assert.Equal(t, time.Millisecond, c.retryInterval)
})
}
Expand All @@ -929,7 +929,7 @@ func TestTraceRetry(t *testing.T) {
t.Run("sendRetries", func(t *testing.T) {
c, err := newTestConfig(WithSendRetries(10))
assert.NoError(t, err)
assert.Equal(t, 10, c.sendRetries)
assert.Equal(t, 10, c.internalConfig.SendRetries())
})
t.Run("retryInterval", func(t *testing.T) {
c, err := newTestConfig(WithRetryInterval(10))
Expand Down
2 changes: 1 addition & 1 deletion ddtrace/tracer/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func startTelemetry(c *config) telemetry.Client {
{Name: "stats_computation_enabled", Value: c.canComputeStats()},
{Name: "dogstatsd_port", Value: c.agent.StatsdPort},
{Name: "lambda_mode", Value: c.logToStdout},
{Name: "send_retries", Value: c.sendRetries},
{Name: "send_retries", Value: c.internalConfig.SendRetries()},
{Name: "retry_interval", Value: c.retryInterval},
{Name: "trace_startup_logs_enabled", Value: c.logStartup},
{Name: "service", Value: c.serviceName},
Expand Down
5 changes: 3 additions & 2 deletions ddtrace/tracer/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ func (h *agentTraceWriter) flush() {

stats := p.stats()
var err error
for attempt := 0; attempt <= h.config.sendRetries; attempt++ {
sendRetries := h.config.internalConfig.SendRetries()
for attempt := 0; attempt <= sendRetries; attempt++ {
log.Debug("Attempt to send payload: size: %d traces: %d\n", stats.size, stats.itemCount)
var rc io.ReadCloser
rc, err = h.config.transport.send(p)
Expand All @@ -146,7 +147,7 @@ func (h *agentTraceWriter) flush() {
}

if attempt+1%5 == 0 {
log.Error("failure sending traces (attempt %d of %d): %v", attempt+1, h.config.sendRetries+1, err.Error())
log.Error("failure sending traces (attempt %d of %d): %v", attempt+1, sendRetries+1, err.Error())
}
p.reset()
time.Sleep(h.config.retryInterval)
Expand Down
38 changes: 26 additions & 12 deletions ddtrace/tracer/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ import (
"testing"
"time"

internalconfig "github.com/DataDog/dd-trace-go/v2/internal/config"
"github.com/DataDog/dd-trace-go/v2/internal/log"
"github.com/DataDog/dd-trace-go/v2/internal/processtags"
"github.com/DataDog/dd-trace-go/v2/internal/statsdtest"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -372,6 +374,9 @@ func (t *failingTransport) send(p payload) (io.ReadCloser, error) {
}

func TestTraceWriterFlushRetries(t *testing.T) {
// Reload process tags to ensure consistent state (previous tests may have disabled them)
processtags.Reload()
Comment on lines +377 to +378
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For changes to this test, see: https://dd.slack.com/archives/C03D7LNP0TD/p1765313037158839

by newly importing internalconfig "github.com/DataDog/dd-trace-go/v2/internal/config", I believe that triggered a dependency tree that caused processtags init() function to run earlier than it did before. As a result, processtags.enabled evaluated to true (by default), and process tags were appended onto span metadata, causing the payload size to increase from 185 bytes to 308.


testcases := []struct {
configRetries int
retryInterval time.Duration
Expand All @@ -395,18 +400,11 @@ func TestTraceWriterFlushRetries(t *testing.T) {
{configRetries: 2, retryInterval: 2 * time.Millisecond, failCount: 2, tracesSent: true, expAttempts: 3},
}

sentCounts := map[string]int64{
"datadog.tracer.decode_error": 1,
"datadog.tracer.flush_bytes": 185,
"datadog.tracer.flush_traces": 1,
"datadog.tracer.queue.enqueued.traces": 1,
}
droppedCounts := map[string]int64{
"datadog.tracer.queue.enqueued.traces": 1,
"datadog.tracer.traces_dropped": 1,
}

ss := []*Span{makeSpan(0)}

// Capture expected payload size from first test run to be OS-agnostic and forward-compatible
var expectedPayloadSize int64

for _, test := range testcases {
name := fmt.Sprintf("%d-%d-%t-%d", test.configRetries, test.failCount, test.tracesSent, test.expAttempts)
t.Run(name, func(t *testing.T) {
Expand All @@ -417,14 +415,20 @@ func TestTraceWriterFlushRetries(t *testing.T) {
}
c, err := newTestConfig(func(c *config) {
c.transport = p
c.sendRetries = test.configRetries
c.internalConfig.SetSendRetries(test.configRetries, internalconfig.OriginCode)
c.retryInterval = test.retryInterval
})
assert.Nil(err)
var statsd statsdtest.TestStatsdClient

h := newAgentTraceWriter(c, newPrioritySampler(), &statsd)
h.add(ss)

// Capture payload size from first test run for dynamic assertions
if expectedPayloadSize == 0 {
expectedPayloadSize = int64(h.payload.size())
}

start := time.Now()
h.flush()
h.wg.Wait()
Expand All @@ -435,8 +439,18 @@ func TestTraceWriterFlushRetries(t *testing.T) {

assert.Equal(1, len(statsd.TimingCalls()))
if test.tracesSent {
sentCounts := map[string]int64{
"datadog.tracer.decode_error": 1,
"datadog.tracer.flush_bytes": expectedPayloadSize,
"datadog.tracer.flush_traces": 1,
"datadog.tracer.queue.enqueued.traces": 1,
}
assert.Equal(sentCounts, statsd.Counts())
} else {
droppedCounts := map[string]int64{
"datadog.tracer.queue.enqueued.traces": 1,
"datadog.tracer.traces_dropped": 1,
}
assert.Equal(droppedCounts, statsd.Counts())
}
if test.configRetries > 0 && test.failCount > 1 {
Expand Down
17 changes: 17 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ type Config struct {
ciVisibilityAgentless bool
logDirectory string
traceRateLimitPerSecond float64
// sendRetries is the number of times a trace or CI Visibility payload send is retried upon
// failure.
sendRetries int
}

// loadConfig initializes and returns a new config by reading from all configured sources.
Expand Down Expand Up @@ -98,6 +101,7 @@ func loadConfig() *Config {
cfg.ciVisibilityAgentless = provider.getBool("DD_CIVISIBILITY_AGENTLESS_ENABLED", false)
cfg.logDirectory = provider.getString("DD_TRACE_LOG_DIRECTORY", "")
cfg.traceRateLimitPerSecond = provider.getFloat("DD_TRACE_RATE_LIMIT", 0.0)
cfg.sendRetries = provider.getInt("DD_TRACE_SEND_RETRIES", 0)

return cfg
}
Expand Down Expand Up @@ -134,3 +138,16 @@ func (c *Config) SetDebug(enabled bool, origin telemetry.Origin) {
c.debug = enabled
telemetry.RegisterAppConfig("DD_TRACE_DEBUG", enabled, origin)
}

func (c *Config) SendRetries() int {
c.mu.RLock()
defer c.mu.RUnlock()
return c.sendRetries
}

func (c *Config) SetSendRetries(retries int, origin telemetry.Origin) {
c.mu.Lock()
defer c.mu.Unlock()
c.sendRetries = retries
telemetry.RegisterAppConfig("DD_TRACE_SEND_RETRIES", retries, origin)
}
1 change: 1 addition & 0 deletions internal/env/supported_configurations.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions internal/env/supported_configurations.json
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,9 @@
"DD_TRACE_SARAMA_ANALYTICS_ENABLED": [
"A"
],
"DD_TRACE_SEND_RETRIES": [
"A"
],
"DD_TRACE_SOURCE_HOSTNAME": [
"A"
],
Expand Down
Loading