Skip to content

Commit 2b2be1e

Browse files
Add pending traces report in tracer flares (#8053)
* adding tracerflare.addtext * adding class to handle tracerdump rebasing * updating flare to implement reporter and store only root spans * updating PR comments * rebasing * changing getSpans header * Adding DumpElement, DumpDrain, and DumpSupplier to extract PendingTraces * addressing PR comments * limiting number of tracers in tracer flare and sorting by oldest first * removing unused log * updating comparator * saving changes * initial implementation of test * updating test * feat(core): Use prepare for flare to signal dump element * feat(core): Update test prevent the span to be written early * fix(core): Fix tests * fix(core): Reduce scope * fix(core): Revert drain limit * feat(core): Refactor action elements * adding support for json encoding of traces * cleanup * renaming file * making TraceDumpJsonExporter a Writer * nit changes * updating test to match changes * addressing PR comments * final unit tests changes --------- Co-authored-by: Bruce Bujon <[email protected]>
1 parent e455ca7 commit 2b2be1e

File tree

6 files changed

+247
-14
lines changed

6 files changed

+247
-14
lines changed

dd-trace-core/build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ excludedClassesCoverage += [
2020
'datadog.trace.common.writer.RemoteMapper.NoopRemoteMapper',
2121
'datadog.trace.core.monitor.DDAgentStatsDConnection',
2222
'datadog.trace.core.monitor.LoggingStatsDClient',
23-
'datadog.trace.core.PendingTraceBuffer.DelayingPendingTraceBuffer.FlushElement',
23+
'datadog.trace.core.PendingTraceBuffer.DelayingPendingTraceBuffer.CommandElement',
2424
'datadog.trace.core.StatusLogger',
2525
// covered with CI Visibility smoke tests
2626
'datadog.trace.core.StreamingTraceCollector',
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package datadog.trace.common.writer;
2+
3+
import com.squareup.moshi.JsonAdapter;
4+
import com.squareup.moshi.Moshi;
5+
import com.squareup.moshi.Types;
6+
import datadog.trace.api.flare.TracerFlare;
7+
import datadog.trace.core.DDSpan;
8+
import java.io.IOException;
9+
import java.util.Collection;
10+
import java.util.List;
11+
import java.util.zip.ZipOutputStream;
12+
13+
public class TraceDumpJsonExporter implements Writer {
14+
15+
private static final JsonAdapter<Collection<DDSpan>> TRACE_ADAPTER =
16+
new Moshi.Builder()
17+
.add(DDSpanJsonAdapter.buildFactory(false))
18+
.build()
19+
.adapter(Types.newParameterizedType(Collection.class, DDSpan.class));
20+
private StringBuilder dumpText;
21+
private ZipOutputStream zip;
22+
23+
public TraceDumpJsonExporter(ZipOutputStream zip) {
24+
this.zip = zip;
25+
dumpText = new StringBuilder();
26+
}
27+
28+
public void write(final Collection<DDSpan> trace) {
29+
dumpText.append(TRACE_ADAPTER.toJson(trace));
30+
dumpText.append('\n');
31+
}
32+
33+
@Override
34+
public void write(List<DDSpan> trace) {
35+
// Do nothing
36+
}
37+
38+
@Override
39+
public void start() {
40+
// do nothing
41+
}
42+
43+
@Override
44+
public boolean flush() {
45+
try {
46+
TracerFlare.addText(zip, "pending_traces.txt", dumpText.toString());
47+
} catch (IOException e) {
48+
// do nothing
49+
}
50+
return true;
51+
}
52+
53+
@Override
54+
public void close() {
55+
// do nothing
56+
}
57+
58+
@Override
59+
public void incrementDropCounts(int spanCount) {}
60+
}

dd-trace-core/src/main/java/datadog/trace/core/PendingTrace.java

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import datadog.trace.core.CoreTracer.ConfigSnapshot;
88
import datadog.trace.core.monitor.HealthMetrics;
99
import java.util.ArrayList;
10+
import java.util.Collection;
1011
import java.util.List;
1112
import java.util.concurrent.ConcurrentLinkedDeque;
1213
import java.util.concurrent.TimeUnit;
@@ -448,4 +449,8 @@ public static long getDurationNano(CoreSpan<?> span) {
448449
PendingTrace trace = (PendingTrace) traceCollector;
449450
return trace.getLastWriteTime() - span.getStartTime();
450451
}
452+
453+
Collection<DDSpan> getSpans() {
454+
return spans;
455+
}
451456
}

dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java

+100-6
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,22 @@
33
import static datadog.trace.util.AgentThreadFactory.AgentThread.TRACE_MONITOR;
44
import static datadog.trace.util.AgentThreadFactory.THREAD_JOIN_TIMOUT_MS;
55
import static datadog.trace.util.AgentThreadFactory.newAgentThread;
6+
import static java.util.Comparator.comparingLong;
67

78
import datadog.communication.ddagent.SharedCommunicationObjects;
89
import datadog.trace.api.Config;
10+
import datadog.trace.api.flare.TracerFlare;
911
import datadog.trace.api.time.TimeSource;
12+
import datadog.trace.common.writer.TraceDumpJsonExporter;
1013
import datadog.trace.core.monitor.HealthMetrics;
14+
import java.io.IOException;
15+
import java.util.ArrayList;
16+
import java.util.Comparator;
17+
import java.util.List;
1118
import java.util.concurrent.TimeUnit;
1219
import java.util.concurrent.atomic.AtomicInteger;
20+
import java.util.function.Predicate;
21+
import java.util.zip.ZipOutputStream;
1322
import org.jctools.queues.MessagePassingQueue;
1423
import org.jctools.queues.MpscBlockingConsumerArrayQueue;
1524
import org.slf4j.Logger;
@@ -47,13 +56,16 @@ private static class DelayingPendingTraceBuffer extends PendingTraceBuffer {
4756
private static final long FORCE_SEND_DELAY_MS = TimeUnit.SECONDS.toMillis(5);
4857
private static final long SEND_DELAY_NS = TimeUnit.MILLISECONDS.toNanos(500);
4958
private static final long SLEEP_TIME_MS = 100;
59+
private static final CommandElement FLUSH_ELEMENT = new CommandElement();
60+
private static final CommandElement DUMP_ELEMENT = new CommandElement();
5061

5162
private final MpscBlockingConsumerArrayQueue<Element> queue;
5263
private final Thread worker;
5364
private final TimeSource timeSource;
5465

5566
private volatile boolean closed = false;
5667
private final AtomicInteger flushCounter = new AtomicInteger(0);
68+
private final AtomicInteger dumpCounter = new AtomicInteger(0);
5769

5870
private final LongRunningTracesTracker runningTracesTracker;
5971

@@ -78,6 +90,7 @@ public void enqueue(Element pendingTrace) {
7890

7991
@Override
8092
public void start() {
93+
TracerFlare.addReporter(new TracerDump(this));
8194
worker.start();
8295
}
8396

@@ -108,10 +121,10 @@ public void flush() {
108121
if (worker.isAlive()) {
109122
int count = flushCounter.get();
110123
int loop = 1;
111-
boolean signaled = queue.offer(FlushElement.FLUSH_ELEMENT);
124+
boolean signaled = queue.offer(FLUSH_ELEMENT);
112125
while (!closed && !signaled) {
113126
yieldOrSleep(loop++);
114-
signaled = queue.offer(FlushElement.FLUSH_ELEMENT);
127+
signaled = queue.offer(FLUSH_ELEMENT);
115128
}
116129
int newCount = flushCounter.get();
117130
while (!closed && count >= newCount) {
@@ -130,9 +143,44 @@ public void accept(Element pendingTrace) {
130143
}
131144
}
132145

133-
private static final class FlushElement implements Element {
134-
static FlushElement FLUSH_ELEMENT = new FlushElement();
146+
private static final class DumpDrain
147+
implements MessagePassingQueue.Consumer<Element>, MessagePassingQueue.Supplier<Element> {
148+
private static final DumpDrain DUMP_DRAIN = new DumpDrain();
149+
private static final int MAX_DUMPED_TRACES = 50;
135150

151+
private static final Comparator<Element> TRACE_BY_START_TIME =
152+
comparingLong(trace -> trace.getRootSpan().getStartTime());
153+
private static final Predicate<Element> NOT_PENDING_TRACE =
154+
element -> !(element instanceof PendingTrace);
155+
156+
private volatile List<Element> data = new ArrayList<>();
157+
private int index = 0;
158+
159+
@Override
160+
public void accept(Element pendingTrace) {
161+
data.add(pendingTrace);
162+
}
163+
164+
@Override
165+
public Element get() {
166+
if (index < data.size()) {
167+
return data.get(index++);
168+
}
169+
return null; // Should never reach here or else queue may break according to
170+
// MessagePassingQueue docs
171+
}
172+
173+
public List<Element> collectTraces() {
174+
List<Element> traces = data;
175+
data = new ArrayList<>();
176+
traces.removeIf(NOT_PENDING_TRACE);
177+
// Storing oldest traces first
178+
traces.sort(TRACE_BY_START_TIME);
179+
return traces;
180+
}
181+
}
182+
183+
private static final class CommandElement implements Element {
136184
@Override
137185
public long oldestFinishedTime() {
138186
return 0;
@@ -180,13 +228,21 @@ public void run() {
180228
pendingTrace = queue.take(); // block until available;
181229
}
182230

183-
if (pendingTrace instanceof FlushElement) {
231+
if (pendingTrace == FLUSH_ELEMENT) {
184232
// Since this is an MPSC queue, the drain needs to be called on the consumer thread
185233
queue.drain(WriteDrain.WRITE_DRAIN);
186234
flushCounter.incrementAndGet();
187235
continue;
188236
}
189237

238+
if (pendingTrace == DUMP_ELEMENT) {
239+
queue.fill(
240+
DumpDrain.DUMP_DRAIN,
241+
queue.drain(DumpDrain.DUMP_DRAIN, DumpDrain.MAX_DUMPED_TRACES));
242+
dumpCounter.incrementAndGet();
243+
continue;
244+
}
245+
190246
// The element is no longer in the queue
191247
pendingTrace.setEnqueued(false);
192248

@@ -208,7 +264,7 @@ public void run() {
208264
// Trace has been unmodified long enough, go ahead and write whatever is finished.
209265
pendingTrace.write();
210266
} else {
211-
// Trace is too new. Requeue it and sleep to avoid a hot loop.
267+
// Trace is too new. Requeue it and sleep to avoid a hot loop.
212268
enqueue(pendingTrace);
213269
Thread.sleep(SLEEP_TIME_MS);
214270
}
@@ -277,4 +333,42 @@ public static PendingTraceBuffer discarding() {
277333
public abstract void flush();
278334

279335
public abstract void enqueue(Element pendingTrace);
336+
337+
private static class TracerDump implements TracerFlare.Reporter {
338+
private final DelayingPendingTraceBuffer buffer;
339+
340+
private TracerDump(DelayingPendingTraceBuffer buffer) {
341+
this.buffer = buffer;
342+
}
343+
344+
@Override
345+
public void prepareForFlare() {
346+
if (buffer.worker.isAlive()) {
347+
int count = buffer.dumpCounter.get();
348+
int loop = 1;
349+
boolean signaled = buffer.queue.offer(DelayingPendingTraceBuffer.DUMP_ELEMENT);
350+
while (!buffer.closed && !signaled) {
351+
buffer.yieldOrSleep(loop++);
352+
signaled = buffer.queue.offer(DelayingPendingTraceBuffer.DUMP_ELEMENT);
353+
}
354+
int newCount = buffer.dumpCounter.get();
355+
while (!buffer.closed && count >= newCount) {
356+
buffer.yieldOrSleep(loop++);
357+
newCount = buffer.dumpCounter.get();
358+
}
359+
}
360+
}
361+
362+
@Override
363+
public void addReportToFlare(ZipOutputStream zip) throws IOException {
364+
TraceDumpJsonExporter writer = new TraceDumpJsonExporter(zip);
365+
for (Element e : DelayingPendingTraceBuffer.DumpDrain.DUMP_DRAIN.collectTraces()) {
366+
if (e instanceof PendingTrace) {
367+
PendingTrace trace = (PendingTrace) e;
368+
writer.write(trace.getSpans());
369+
}
370+
}
371+
writer.flush();
372+
}
373+
}
280374
}

dd-trace-core/src/test/groovy/datadog/trace/core/LongRunningTracesTrackerTest.groovy

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class LongRunningTracesTrackerTest extends DDSpecification {
168168

169169
PendingTrace newTraceToTrack() {
170170
PendingTrace trace = factory.create(DDTraceId.ONE)
171-
PendingTraceBufferTest::newSpanOf(trace, PrioritySampling.SAMPLER_KEEP)
171+
PendingTraceBufferTest::newSpanOf(trace, PrioritySampling.SAMPLER_KEEP, 0)
172172
return trace
173173
}
174174

0 commit comments

Comments
 (0)