Skip to content

Commit 7e7ba79

Browse files
committed
PoC of bursty profiles
(Similar to Go execution tracing approach)
1 parent a8b33d5 commit 7e7ba79

File tree

3 files changed

+131
-10
lines changed
  • dd-java-agent/agent-profiling
    • profiling-controller-jfr/src/main/resources/jfr
    • profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk
  • dd-trace-api/src/main/java/datadog/trace/api/config

3 files changed

+131
-10
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
jdk.ThreadSleep#enabled=true
2+
jdk.ThreadSleep#stackTrace=true
3+
jdk.ThreadSleep#threshold=0 ms
4+
jdk.ThreadPark#enabled=true
5+
jdk.ThreadPark#stackTrace=true
6+
jdk.ThreadPark#threshold=0 ms
7+
jdk.JavaMonitorEnter#enabled=true
8+
jdk.JavaMonitorEnter#stackTrace=true
9+
jdk.JavaMonitorEnter#threshold=0 ms
10+
jdk.JavaMonitorWait#enabled=true
11+
jdk.JavaMonitorWait#stackTrace=true
12+
jdk.JavaMonitorWait#threshold=0 ms
13+
jdk.FileForce#enabled=true
14+
jdk.FileForce#stackTrace=true
15+
jdk.FileForce#threshold=0 ms
16+
jdk.FileRead#enabled=true
17+
jdk.FileRead#stackTrace=true
18+
jdk.FileRead#threshold=0 ms
19+
jdk.FileWrite#enabled=true
20+
jdk.FileWrite#stackTrace=true
21+
jdk.FileWrite#threshold=0 ms
22+
jdk.SocketRead#enabled=true
23+
jdk.SocketRead#stackTrace=true
24+
jdk.SocketRead#threshold=0 ms
25+
jdk.SocketWrite#enabled=true
26+
jdk.SocketWrite#stackTrace=true
27+
jdk.SocketWrite#threshold=0 ms
28+
jdk.VirtualThreadPinned#enabled=true
29+
jdk.VirtualThreadPinned#stackTrace=true
30+
jdk.VirtualThreadPinned#threshold=0 ms
31+
32+
datadog.Timeline#enabled=true
33+
datadog.Timeline#threshold=0 ms
34+
datadog.QueueTime#enabled=true
35+
datadog.QueueTime#threshold=0 ms

Diff for: dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java

+89-10
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,7 @@
1818
import static com.datadog.profiling.controller.ProfilingSupport.*;
1919
import static com.datadog.profiling.controller.ProfilingSupport.isObjectCountParallelized;
2020
import static datadog.trace.api.Platform.isJavaVersionAtLeast;
21-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_ENABLED;
22-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_ENABLED_DEFAULT;
23-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_MODE;
24-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_MODE_DEFAULT;
25-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_QUEUEING_TIME_ENABLED;
26-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_QUEUEING_TIME_ENABLED_DEFAULT;
27-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS;
28-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS_DEFAULT;
29-
import static datadog.trace.api.config.ProfilingConfig.PROFILING_ULTRA_MINIMAL;
21+
import static datadog.trace.api.config.ProfilingConfig.*;
3022

3123
import com.datadog.profiling.controller.ConfigurationException;
3224
import com.datadog.profiling.controller.Controller;
@@ -43,11 +35,20 @@
4335
import datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionProfiling;
4436
import de.thetaphi.forbiddenapis.SuppressForbidden;
4537
import java.io.IOException;
38+
import java.io.InputStream;
4639
import java.nio.file.Files;
4740
import java.nio.file.Path;
4841
import java.time.Duration;
4942
import java.util.Collections;
5043
import java.util.Map;
44+
import java.util.Properties;
45+
import java.util.Random;
46+
import java.util.UUID;
47+
import java.util.concurrent.Executors;
48+
import java.util.concurrent.ScheduledExecutorService;
49+
import java.util.concurrent.TimeUnit;
50+
import java.util.stream.Collectors;
51+
import jdk.jfr.Recording;
5152
import org.slf4j.Logger;
5253
import org.slf4j.LoggerFactory;
5354

@@ -69,9 +70,83 @@ public final class OpenJdkController implements Controller {
6970
private final Map<String, String> recordingSettings;
7071
private final boolean jfrStackDepthApplied;
7172

73+
private final ScheduledExecutorService burstExecutor =
74+
Executors.newSingleThreadScheduledExecutor(
75+
r -> {
76+
Thread t = new Thread(r, "Burst Trace Scheduler");
77+
t.setDaemon(true);
78+
return t;
79+
});
80+
81+
private class ScheduledTask implements Runnable {
82+
private final Random rnd = new Random(UUID.randomUUID().getLeastSignificantBits());
83+
private boolean armed = false;
84+
private final Map<String, String> settings;
85+
private final Duration interval;
86+
private final Duration duration;
87+
88+
private ScheduledTask() {
89+
try {
90+
Properties props = new Properties();
91+
try (InputStream is = OpenJdkController.class.getResourceAsStream("/jfr/bursty.jfp")) {
92+
props.load(is);
93+
}
94+
settings =
95+
props.entrySet().stream()
96+
.collect(Collectors.toMap(e -> (String) e.getKey(), e -> (String) e.getValue()));
97+
this.interval =
98+
Duration.ofMillis(
99+
configProvider.getLong(
100+
PROFILING_BURST_INTERVAl_MS, PROFILING_BURST_INTERVAl_MS_DEFAULT));
101+
this.duration =
102+
Duration.ofMillis(
103+
configProvider.getLong(
104+
PROFILING_BURST_DURATION_MS, PROFILING_BURST_DURATION_MS_DEFAULT));
105+
} catch (IOException e) {
106+
throw new RuntimeException(e);
107+
}
108+
}
109+
110+
@Override
111+
public void run() {
112+
if (interval.toMillis() < 0) {
113+
// bursts are disabled
114+
return;
115+
}
116+
double u = rnd.nextDouble();
117+
long nextMs = Math.round(-(Math.log(u) * interval.toMillis())); // 5 mins average interval
118+
if (!armed) {
119+
armed = true;
120+
log.info("Scheduling bursty tracing in {} ms", nextMs);
121+
burstExecutor.schedule(this, nextMs, TimeUnit.MILLISECONDS);
122+
return;
123+
} else {
124+
nextMs += 15_000; // offset the next start by the max recording duration
125+
}
126+
// Let's start a new recording with extremely detailed latency events.
127+
// This recording is not persisted and is automatically discarded.
128+
// However, due to have settings-merging works in JFR the thresholds
129+
// for latency events will be lowered also for the main recording.
130+
// Also, the thresholds will be restored when this recording ends,
131+
// automatically.
132+
log.info("Burst Trace Scheduler is executing");
133+
Recording recording = new Recording();
134+
recording.setName("Burst Trace");
135+
recording.setSettings(settings);
136+
recording.setDuration(duration);
137+
recording.setToDisk(false);
138+
recording.setMaxSize(64 * 1024);
139+
recording.start();
140+
log.info("Scheduling next bursty tracing in {} ms", nextMs);
141+
burstExecutor.schedule(this, nextMs, TimeUnit.MILLISECONDS);
142+
}
143+
}
144+
72145
public static Controller instance(ConfigProvider configProvider)
73146
throws ConfigurationException, ClassNotFoundException {
74-
return new OpenJdkController(configProvider);
147+
OpenJdkController ctrl = new OpenJdkController(configProvider);
148+
ctrl.startBurstTracing();
149+
return ctrl;
75150
}
76151

77152
/**
@@ -320,4 +395,8 @@ private int getConfiguredStackDepth(ConfigProvider configProvider) {
320395
return configProvider.getInteger(
321396
ProfilingConfig.PROFILING_STACKDEPTH, ProfilingConfig.PROFILING_STACKDEPTH_DEFAULT);
322397
}
398+
399+
private void startBurstTracing() {
400+
burstExecutor.schedule(new ScheduledTask(), 0, TimeUnit.SECONDS);
401+
}
323402
}

Diff for: dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java

+7
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ public final class ProfilingConfig {
124124
public static final String PROFILING_DATADOG_PROFILER_SCHEDULING_EVENT_INTERVAL =
125125
"profiling.experimental.ddprof.scheduling.event.interval";
126126

127+
public static final String PROFILING_BURST_INTERVAl_MS = "profiling.experimental.burst.ms";
128+
public static final int PROFILING_BURST_INTERVAl_MS_DEFAULT = -1; // disabled
129+
130+
public static final String PROFILING_BURST_DURATION_MS =
131+
"profiling.experimental.burst.duration.ms";
132+
public static final int PROFILING_BURST_DURATION_MS_DEFAULT = 15 * 1000; // 15 seconds
133+
127134
public static final String PROFILING_DATADOG_PROFILER_LOG_LEVEL = "profiling.ddprof.loglevel";
128135

129136
public static final String PROFILING_DATADOG_PROFILER_LOG_LEVEL_DEFAULT = "NONE";

0 commit comments

Comments
 (0)