From 33e2446275e4ce3407434c1dd9ca0fc98fcae100 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Mon, 3 Nov 2025 08:45:49 +0100 Subject: [PATCH 01/18] Removes jctools usage for lock-free queues. --- dd-java-agent/agent-builder/gradle.lockfile | 1 - dd-java-agent/agent-debugger/gradle.lockfile | 1 - dd-java-agent/agent-llmobs/build.gradle | 1 - .../trace/llmobs/EvalProcessingWorker.java | 10 +- .../profiling-controller-jfr/build.gradle | 1 - ...nSubstitutionProcessorInstrumentation.java | 3 - ...dog_jctools_util_UnsafeRefArrayAccess.java | 12 - dd-trace-core/build.gradle | 1 - .../trace/common/metrics/Aggregator.java | 12 +- .../metrics/ConflatingMetricsAggregator.java | 8 +- .../trace/common/metrics/OkHttpSink.java | 4 +- .../common/writer/SpanSamplingWorker.java | 5 +- .../common/writer/TraceProcessingWorker.java | 21 +- .../trace/core/PendingTraceBuffer.java | 10 +- .../DefaultDataStreamsMonitoring.java | 2 +- .../writer/SpanSamplingWorkerTest.groovy | 6 +- gradle/dependencies.gradle | 1 - internal-api/build.gradle.kts | 2 + .../util/queue/JcToolsMPSCQueueBenchmark.java | 81 +++++ .../util/queue/JcToolsSPMCQueueBenchmark.java | 84 ++++++ .../util/queue/JcToolsSPSCQueueBenchmark.java | 71 +++++ .../trace/util/queue/MPSCQueueBenchmark.java | 78 +++++ .../trace/util/queue/SPMCQueueBenchmark.java | 83 +++++ .../trace/util/queue/SPSCQueueBenchmark.java | 70 +++++ .../java/datadog/trace/util/BitUtils.java | 34 +++ .../trace/util/queue/MpscArrayQueue.java | 284 ++++++++++++++++++ .../queue/MpscBlockingConsumerArrayQueue.java | 123 ++++++++ .../trace/util/queue/SpmcArrayQueue.java | 192 ++++++++++++ .../trace/util/queue/SpscArrayQueue.java | 194 ++++++++++++ .../datadog/trace/util/BitUtilsTest.groovy | 42 +++ .../trace/util/queue/AbstractQueueTest.groovy | 163 ++++++++++ .../util/queue/MpscArrayQueueTest.groovy | 61 ++++ .../MpscBlockingConsumerArrayQueueTest.groovy | 197 ++++++++++++ .../util/queue/SpmcArrayQueueTest.groovy | 59 ++++ .../util/queue/SpscArrayQueueTest.groovy | 42 +++ 35 files changed, 1898 insertions(+), 61 deletions(-) delete mode 100644 dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java create mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java create mode 100644 internal-api/src/main/java/datadog/trace/util/BitUtils.java create mode 100644 internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java create mode 100644 internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java create mode 100644 internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java create mode 100644 internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java create mode 100644 internal-api/src/test/groovy/datadog/trace/util/BitUtilsTest.groovy create mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy create mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy create mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy create mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy create mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/SpscArrayQueueTest.groovy diff --git a/dd-java-agent/agent-builder/gradle.lockfile b/dd-java-agent/agent-builder/gradle.lockfile index d1e26b993e8..642dfd9a2fd 100644 --- a/dd-java-agent/agent-builder/gradle.lockfile +++ b/dd-java-agent/agent-builder/gradle.lockfile @@ -109,7 +109,6 @@ org.jacoco:org.jacoco.agent:0.8.14=jacocoAgent,jacocoAnt org.jacoco:org.jacoco.ant:0.8.14=jacocoAnt org.jacoco:org.jacoco.core:0.8.14=jacocoAnt org.jacoco:org.jacoco.report:0.8.14=jacocoAnt -org.jctools:jctools-core:3.3.0=runtimeClasspath,testRuntimeClasspath org.junit.jupiter:junit-jupiter-api:5.12.2=testCompileClasspath,testRuntimeClasspath org.junit.jupiter:junit-jupiter-engine:5.12.2=testRuntimeClasspath org.junit.jupiter:junit-jupiter-params:5.12.2=testCompileClasspath,testRuntimeClasspath diff --git a/dd-java-agent/agent-debugger/gradle.lockfile b/dd-java-agent/agent-debugger/gradle.lockfile index 5281b239dcd..44bbe2e0df8 100644 --- a/dd-java-agent/agent-debugger/gradle.lockfile +++ b/dd-java-agent/agent-debugger/gradle.lockfile @@ -123,7 +123,6 @@ org.jacoco:org.jacoco.agent:0.8.14=jacocoAgent,jacocoAnt org.jacoco:org.jacoco.ant:0.8.14=jacocoAnt org.jacoco:org.jacoco.core:0.8.14=jacocoAnt org.jacoco:org.jacoco.report:0.8.14=jacocoAnt -org.jctools:jctools-core:3.3.0=testRuntimeClasspath org.jetbrains.intellij.deps:trove4j:1.0.20200330=testRuntimeClasspath org.jetbrains.kotlin:kotlin-compiler-embeddable:2.1.21=testCompileClasspath,testRuntimeClasspath org.jetbrains.kotlin:kotlin-daemon-embeddable:2.1.21=testRuntimeClasspath diff --git a/dd-java-agent/agent-llmobs/build.gradle b/dd-java-agent/agent-llmobs/build.gradle index b0a327bbcff..51886fe6449 100644 --- a/dd-java-agent/agent-llmobs/build.gradle +++ b/dd-java-agent/agent-llmobs/build.gradle @@ -24,7 +24,6 @@ minimumInstructionCoverage = 0.0 dependencies { api libs.slf4j - implementation libs.jctools implementation project(':communication') implementation project(':components:json') diff --git a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java index 1e17f90b22c..75b2edd7cf6 100644 --- a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java +++ b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java @@ -12,6 +12,7 @@ import datadog.communication.http.OkHttpUtils; import datadog.trace.api.Config; import datadog.trace.llmobs.domain.LLMObsEval; +import datadog.trace.util.queue.MpscArrayQueue; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; @@ -20,7 +21,6 @@ import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.RequestBody; -import org.jctools.queues.MpscBlockingConsumerArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,7 +34,7 @@ public class EvalProcessingWorker implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(EvalProcessingWorker.class); - private final MpscBlockingConsumerArrayQueue queue; + private final MpscArrayQueue queue; private final Thread serializerThread; public EvalProcessingWorker( @@ -43,7 +43,7 @@ public EvalProcessingWorker( final TimeUnit timeUnit, final SharedCommunicationObjects sco, Config config) { - this.queue = new MpscBlockingConsumerArrayQueue<>(capacity); + this.queue = new MpscArrayQueue<>(capacity); boolean isAgentless = config.isLlmObsAgentlessEnabled(); if (isAgentless && (config.getApiKey() == null || config.getApiKey().isEmpty())) { @@ -98,7 +98,7 @@ public static class EvalSerializingHandler implements Runnable { private static final Logger log = LoggerFactory.getLogger(EvalSerializingHandler.class); private static final int FLUSH_THRESHOLD = 50; - private final MpscBlockingConsumerArrayQueue queue; + private final MpscArrayQueue queue; private final long ticksRequiredToFlush; private long lastTicks; @@ -111,7 +111,7 @@ public static class EvalSerializingHandler implements Runnable { private final List buffer = new ArrayList<>(); public EvalSerializingHandler( - final MpscBlockingConsumerArrayQueue queue, + final MpscArrayQueue queue, final long flushInterval, final TimeUnit timeUnit, final HttpUrl submissionUrl, diff --git a/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle b/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle index 4e72e59a468..85af5ff6b12 100644 --- a/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle +++ b/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle @@ -14,7 +14,6 @@ testJvmConstraints { dependencies { api project(':dd-java-agent:agent-profiling:profiling-controller') - implementation libs.jctools implementation libs.slf4j annotationProcessor libs.autoservice.processor diff --git a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java index 18d41331a00..8ac20dbe71a 100644 --- a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java +++ b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java @@ -37,7 +37,6 @@ public void methodAdvice(MethodTransformer transformer) { public String[] helperClassNames() { return new String[] { packageName + ".Target_com_datadog_profiling_agent_ProcessContext", - packageName + ".Target_datadog_jctools_util_UnsafeRefArrayAccess", packageName + ".Target_org_datadog_jmxfetch_App", packageName + ".Target_org_datadog_jmxfetch_Status", packageName + ".Target_org_datadog_jmxfetch_reporter_JsonReporter", @@ -52,7 +51,6 @@ public String[] muzzleIgnoredClassNames() { "jdk.vm.ci.meta.ResolvedJavaField", // ignore helper class names as usual packageName + ".Target_com_datadog_profiling_agent_ProcessContext", - packageName + ".Target_datadog_jctools_util_UnsafeRefArrayAccess", packageName + ".Target_org_datadog_jmxfetch_App", packageName + ".Target_org_datadog_jmxfetch_Status", packageName + ".Target_org_datadog_jmxfetch_reporter_JsonReporter", @@ -63,7 +61,6 @@ public static class FindTargetClassesAdvice { @Advice.OnMethodExit(suppress = Throwable.class) public static void onExit(@Advice.Return(readOnly = false) List> result) { result.add(Target_com_datadog_profiling_agent_ProcessContext.class); - result.add(Target_datadog_jctools_util_UnsafeRefArrayAccess.class); result.add(Target_org_datadog_jmxfetch_App.class); result.add(Target_org_datadog_jmxfetch_Status.class); result.add(Target_org_datadog_jmxfetch_reporter_JsonReporter.class); diff --git a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java deleted file mode 100644 index e00ce7b1387..00000000000 --- a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java +++ /dev/null @@ -1,12 +0,0 @@ -package datadog.trace.instrumentation.graal.nativeimage; - -import com.oracle.svm.core.annotate.Alias; -import com.oracle.svm.core.annotate.RecomputeFieldValue; -import com.oracle.svm.core.annotate.TargetClass; - -@TargetClass(className = "datadog.jctools.util.UnsafeRefArrayAccess") -public final class Target_datadog_jctools_util_UnsafeRefArrayAccess { - @Alias - @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayIndexShift, declClass = Object[].class) - public static int REF_ELEMENT_SHIFT; -} diff --git a/dd-trace-core/build.gradle b/dd-trace-core/build.gradle index 7b111ed4e38..882d6b6cd06 100644 --- a/dd-trace-core/build.gradle +++ b/dd-trace-core/build.gradle @@ -74,7 +74,6 @@ dependencies { implementation libs.slf4j implementation libs.moshi - implementation libs.jctools implementation group: 'com.datadoghq', name: 'sketches-java', version: '0.8.3' diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java index 049ab311a97..00d0b6cec1d 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java @@ -4,14 +4,14 @@ import datadog.trace.common.metrics.SignalItem.StopSignal; import datadog.trace.core.util.LRUCache; +import datadog.trace.util.queue.MpscArrayQueue; import java.util.Iterator; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; -import org.jctools.queues.MessagePassingQueue; -import org.jctools.queues.MpscCompoundQueue; +import java.util.function.Consumer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +22,7 @@ final class Aggregator implements Runnable { private static final Logger log = LoggerFactory.getLogger(Aggregator.class); private final Queue batchPool; - private final MpscCompoundQueue inbox; + private final MpscArrayQueue inbox; private final LRUCache aggregates; private final ConcurrentMap pending; private final Set commonKeys; @@ -39,7 +39,7 @@ final class Aggregator implements Runnable { Aggregator( MetricWriter writer, Queue batchPool, - MpscCompoundQueue inbox, + MpscArrayQueue inbox, ConcurrentMap pending, final Set commonKeys, int maxAggregates, @@ -60,7 +60,7 @@ final class Aggregator implements Runnable { Aggregator( MetricWriter writer, Queue batchPool, - MpscCompoundQueue inbox, + MpscArrayQueue inbox, ConcurrentMap pending, final Set commonKeys, int maxAggregates, @@ -103,7 +103,7 @@ public void run() { log.debug("metrics aggregator exited"); } - private final class Drainer implements MessagePassingQueue.Consumer { + private final class Drainer implements Consumer { boolean stopped = false; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java index 9ca468c24b4..60c7ee446c8 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java @@ -33,6 +33,8 @@ import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.core.monitor.HealthMetrics; import datadog.trace.util.AgentTaskScheduler; +import datadog.trace.util.queue.MpscArrayQueue; +import datadog.trace.util.queue.SpmcArrayQueue; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -46,8 +48,6 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.function.Function; -import org.jctools.queues.MpscCompoundQueue; -import org.jctools.queues.SpmcArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,7 +93,7 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve private final ConcurrentHashMap pending; private final ConcurrentHashMap keys; private final Thread thread; - private final MpscCompoundQueue inbox; + private final MpscArrayQueue inbox; private final Sink sink; private final Aggregator aggregator; private final long reportingInterval; @@ -176,7 +176,7 @@ public ConflatingMetricsAggregator( long reportingInterval, TimeUnit timeUnit) { this.ignoredResources = ignoredResources; - this.inbox = new MpscCompoundQueue<>(queueSize); + this.inbox = new MpscArrayQueue<>(queueSize); this.batchPool = new SpmcArrayQueue<>(maxAggregates); this.pending = new ConcurrentHashMap<>(maxAggregates * 4 / 3); this.keys = new ConcurrentHashMap<>(); diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java index aa7a735f57d..b717a3dbcf1 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java @@ -10,6 +10,7 @@ import static java.util.concurrent.TimeUnit.SECONDS; import datadog.trace.util.AgentTaskScheduler; +import datadog.trace.util.queue.SpscArrayQueue; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collections; @@ -23,7 +24,6 @@ import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.RequestBody; -import org.jctools.queues.SpscArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,7 +36,7 @@ public final class OkHttpSink implements Sink, EventListener { private final OkHttpClient client; private final HttpUrl metricsUrl; private final List listeners; - private final SpscArrayQueue enqueuedRequests = new SpscArrayQueue<>(10); + private final SpscArrayQueue enqueuedRequests = new SpscArrayQueue<>(16); private final AtomicLong lastRequestTime = new AtomicLong(); private final AtomicLong asyncRequestCounter = new AtomicLong(); private final boolean bufferingEnabled; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java index 111fcd9e1cc..f76f5aa2073 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java @@ -9,11 +9,10 @@ import datadog.trace.common.sampling.SingleSpanSampler; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; +import datadog.trace.util.queue.MpscBlockingConsumerArrayQueue; import java.util.ArrayList; import java.util.List; import java.util.Queue; -import org.jctools.queues.MessagePassingQueue; -import org.jctools.queues.MpscBlockingConsumerArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -172,7 +171,7 @@ public void onEvent(Object event) { } } - private void consumeBatch(MessagePassingQueue queue) { + private void consumeBatch(MpscBlockingConsumerArrayQueue queue) { queue.drain(this::onEvent, queue.size()); } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java index e5bddd5c48d..bb730045c54 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java @@ -15,12 +15,11 @@ import datadog.trace.core.CoreSpan; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; +import datadog.trace.util.queue.MpscArrayQueue; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.function.BooleanSupplier; -import org.jctools.queues.MessagePassingQueue; -import org.jctools.queues.MpscBlockingConsumerArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,8 +35,8 @@ public class TraceProcessingWorker implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(TraceProcessingWorker.class); private final PrioritizationStrategy prioritizationStrategy; - private final MpscBlockingConsumerArrayQueue primaryQueue; - private final MpscBlockingConsumerArrayQueue secondaryQueue; + private final MpscArrayQueue primaryQueue; + private final MpscArrayQueue secondaryQueue; private final TraceSerializingHandler serializingHandler; private final Thread serializerThread; private final int capacity; @@ -121,14 +120,14 @@ public long getRemainingCapacity() { return primaryQueue.remainingCapacity(); } - private static MpscBlockingConsumerArrayQueue createQueue(int capacity) { - return new MpscBlockingConsumerArrayQueue<>(capacity); + private static MpscArrayQueue createQueue(int capacity) { + return new MpscArrayQueue<>(capacity); } public static class TraceSerializingHandler implements Runnable { - private final MpscBlockingConsumerArrayQueue primaryQueue; - private final MpscBlockingConsumerArrayQueue secondaryQueue; + private final MpscArrayQueue primaryQueue; + private final MpscArrayQueue secondaryQueue; private final HealthMetrics healthMetrics; private final long ticksRequiredToFlush; private final boolean doTimeFlush; @@ -136,8 +135,8 @@ public static class TraceSerializingHandler implements Runnable { private long lastTicks; public TraceSerializingHandler( - final MpscBlockingConsumerArrayQueue primaryQueue, - final MpscBlockingConsumerArrayQueue secondaryQueue, + final MpscArrayQueue primaryQueue, + final MpscArrayQueue secondaryQueue, final HealthMetrics healthMetrics, final PayloadDispatcher payloadDispatcher, final long flushInterval, @@ -238,7 +237,7 @@ private boolean shouldFlush() { return false; } - private void consumeBatch(MessagePassingQueue queue) { + private void consumeBatch(MpscArrayQueue queue) { queue.drain(this::onEvent, queue.size()); } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java index 0057eb2ce7d..bfb74c34455 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java @@ -12,16 +12,17 @@ import datadog.trace.api.time.TimeSource; import datadog.trace.common.writer.TraceDumpJsonExporter; import datadog.trace.core.monitor.HealthMetrics; +import datadog.trace.util.queue.MpscBlockingConsumerArrayQueue; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; import java.util.function.Predicate; +import java.util.function.Supplier; import java.util.zip.ZipOutputStream; -import org.jctools.queues.MessagePassingQueue; -import org.jctools.queues.MpscBlockingConsumerArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -136,7 +137,7 @@ public void flush() { } } - private static final class WriteDrain implements MessagePassingQueue.Consumer { + private static final class WriteDrain implements Consumer { private static final WriteDrain WRITE_DRAIN = new WriteDrain(); @Override @@ -145,8 +146,7 @@ public void accept(Element pendingTrace) { } } - private static final class DumpDrain - implements MessagePassingQueue.Consumer, MessagePassingQueue.Supplier { + private static final class DumpDrain implements Consumer, Supplier { private static final Logger LOGGER = LoggerFactory.getLogger(DumpDrain.class); private static final DumpDrain DUMP_DRAIN = new DumpDrain(); private static final int MAX_DUMPED_TRACES = 50; diff --git a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java index c86b9402081..88d4845fb2c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java @@ -28,6 +28,7 @@ import datadog.trace.core.DDSpan; import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.util.AgentTaskScheduler; +import datadog.trace.util.queue.MpscArrayQueue; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -37,7 +38,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; -import org.jctools.queues.MpscArrayQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/dd-trace-core/src/test/groovy/datadog/trace/common/writer/SpanSamplingWorkerTest.groovy b/dd-trace-core/src/test/groovy/datadog/trace/common/writer/SpanSamplingWorkerTest.groovy index b5f265168f7..e9cfc737297 100644 --- a/dd-trace-core/src/test/groovy/datadog/trace/common/writer/SpanSamplingWorkerTest.groovy +++ b/dd-trace-core/src/test/groovy/datadog/trace/common/writer/SpanSamplingWorkerTest.groovy @@ -104,9 +104,9 @@ class SpanSamplingWorkerTest extends DDSpecification { singleSpanSampler.setSamplingPriority(span7) >> true when: - worker.getSpanSamplingQueue().offer([span1, span2, span3]) - worker.getSpanSamplingQueue().offer([span4, span5]) - worker.getSpanSamplingQueue().offer([span6, span7]) + assert worker.getSpanSamplingQueue().offer([span1, span2, span3]) + assert worker.getSpanSamplingQueue().offer([span4, span5]) + assert worker.getSpanSamplingQueue().offer([span6, span7]) then: primaryQueue.take() == [span1, span3] diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 4c18ee280e7..250d595bb4b 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -73,7 +73,6 @@ CachedData.deps.shared = [ libs.dogstatsd, libs.jnr.unixsocket, libs.moshi, - libs.jctools, libs.lz4, libs.aircompressor ] diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts index f05ab8f0b2a..be1183f9da7 100644 --- a/internal-api/build.gradle.kts +++ b/internal-api/build.gradle.kts @@ -276,6 +276,8 @@ dependencies { // it contains annotations that are also present in the instrumented application classes api("com.datadoghq:dd-javac-plugin-client:0.2.2") + jmhImplementation(libs.jctools) + testImplementation("org.snakeyaml:snakeyaml-engine:2.9") testImplementation(project(":utils:test-utils")) testImplementation(libs.bundles.junit5) diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java new file mode 100644 index 00000000000..895618efbb6 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java @@ -0,0 +1,81 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.jctools.queues.MpscArrayQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +JcToolsdMPSCQueueBenchmark.queueTest 1024 thrpt 75.207 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:consume 1024 thrpt 62.553 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:produce 1024 thrpt 12.654 ops/us +JcToolsdMPSCQueueBenchmark.queueTest 65536 thrpt 36.381 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:consume 65536 thrpt 22.665 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:produce 65536 thrpt 13.717 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Threads(Threads.MAX) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class JcToolsMPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + MpscArrayQueue queue; + CountDownLatch consumerReady; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new MpscArrayQueue<>(capacity); + consumerReady = new CountDownLatch(1); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(4) + public void produce(QueueState state) { + try { + state.consumerReady.await(); // wait until consumer is ready + } catch (InterruptedException ignored) { + } + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + state.consumerReady.countDown(); // signal producers can start + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java new file mode 100644 index 00000000000..97eb3b0a875 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java @@ -0,0 +1,84 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; +import org.jctools.queues.SpmcArrayQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/* +Benchmark Mode Cnt Score Error Units +JcToolsSPMCQueueBenchmark.spmc thrpt 5 324.804 ± 15.512 ops/us +JcToolsSPMCQueueBenchmark.spmc:consumer thrpt 5 309.039 ± 15.960 ops/us +JcToolsSPMCQueueBenchmark.spmc:producer thrpt 5 15.765 ± 0.464 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@State(Scope.Group) +@Fork(value = 1, warmups = 0) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class JcToolsSPMCQueueBenchmark { + + private static final int QUEUE_CAPACITY = 1024; + private static final int ITEMS_TO_PRODUCE = 100_000; + + private SpmcArrayQueue queue; + private AtomicInteger produced; + private AtomicInteger consumed; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpmcArrayQueue<>(QUEUE_CAPACITY); + produced = new AtomicInteger(0); + consumed = new AtomicInteger(0); + + // Pre-fill queue for warmup safety + int warmupFill = Math.min(QUEUE_CAPACITY / 2, ITEMS_TO_PRODUCE); + for (int i = 0; i < warmupFill; i++) { + queue.offer(i); + produced.incrementAndGet(); + } + } + + // Single producer in the group + @Benchmark + @Group("spmc") + @GroupThreads(1) + public void producer() { + int i = produced.getAndIncrement(); + if (i < ITEMS_TO_PRODUCE) { + while (!queue.offer(i)) { + LockSupport.parkNanos(1L); + } + } + } + + // Multiple consumers in the group + @Benchmark + @Group("spmc") + @GroupThreads(4) // adjust number of consumers + public int consumer() { + while (true) { + Integer val = queue.poll(); + if (val != null) { + consumed.incrementAndGet(); + return val; + } + + if (produced.get() >= ITEMS_TO_PRODUCE && queue.isEmpty()) { + return 0; + } + + LockSupport.parkNanos(1L); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java new file mode 100644 index 00000000000..801ee2e964c --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java @@ -0,0 +1,71 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.TimeUnit; +import org.jctools.queues.SpscArrayQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +SPSCQueueBenchmark.queueTest 1024 thrpt 136.138 ops/us +SPSCQueueBenchmark.queueTest:consume 1024 thrpt 68.767 ops/us +SPSCQueueBenchmark.queueTest:produce 1024 thrpt 67.371 ops/us +SPSCQueueBenchmark.queueTest 65536 thrpt 127.357 ops/us +SPSCQueueBenchmark.queueTest:consume 65536 thrpt 65.933 ops/us +SPSCQueueBenchmark.queueTest:produce 65536 thrpt 61.424 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class JcToolsSPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + SpscArrayQueue queue; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpscArrayQueue<>(capacity); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void produce(QueueState state) { + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java new file mode 100644 index 00000000000..55046f41ec2 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java @@ -0,0 +1,78 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +MPSCQueueBenchmark.queueTest 1024 thrpt 165.379 ops/us +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 102.258 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 63.121 ops/us +MPSCQueueBenchmark.queueTest 65536 thrpt 135.953 ops/us +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 69.384 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 66.569 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class MPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + MpscArrayQueue queue; + CountDownLatch consumerReady; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new MpscArrayQueue<>(capacity); + consumerReady = new CountDownLatch(1); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(4) + public void produce(QueueState state) { + try { + state.consumerReady.await(); // wait until consumer is ready + } catch (InterruptedException ignored) { + } + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + state.consumerReady.countDown(); // signal producers can start + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java new file mode 100644 index 00000000000..3273bbf351d --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java @@ -0,0 +1,83 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/* +Benchmark Mode Cnt Score Error Units +SPMCQueueBenchmark.spmc thrpt 5 266.576 ± 9.589 ops/us +SPMCQueueBenchmark.spmc:consumer thrpt 5 250.901 ± 9.383 ops/us +SPMCQueueBenchmark.spmc:producer thrpt 5 15.675 ± 0.507 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@State(Scope.Group) +@Fork(value = 1, warmups = 0) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class SPMCQueueBenchmark { + + private static final int QUEUE_CAPACITY = 1024; + private static final int ITEMS_TO_PRODUCE = 100_000; + + private SpmcArrayQueue queue; + private AtomicInteger produced; + private AtomicInteger consumed; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpmcArrayQueue<>(QUEUE_CAPACITY); + produced = new AtomicInteger(0); + consumed = new AtomicInteger(0); + + // Pre-fill queue for warmup safety + int warmupFill = Math.min(QUEUE_CAPACITY / 2, ITEMS_TO_PRODUCE); + for (int i = 0; i < warmupFill; i++) { + queue.offer(i); + produced.incrementAndGet(); + } + } + + // Single producer in the group + @Benchmark + @Group("spmc") + @GroupThreads(1) + public void producer() { + int i = produced.getAndIncrement(); + if (i < ITEMS_TO_PRODUCE) { + while (!queue.offer(i)) { + LockSupport.parkNanos(1L); + } + } + } + + // Multiple consumers in the group + @Benchmark + @Group("spmc") + @GroupThreads(4) // adjust number of consumers + public int consumer() { + while (true) { + Integer val = queue.poll(); + if (val != null) { + consumed.incrementAndGet(); + return val; + } + + if (produced.get() >= ITEMS_TO_PRODUCE && queue.isEmpty()) { + return 0; + } + + LockSupport.parkNanos(1L); + } + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java new file mode 100644 index 00000000000..1ab0e7c982c --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java @@ -0,0 +1,70 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +SPSCQueueBenchmark.queueTest 1024 thrpt 141.400 ops/us +SPSCQueueBenchmark.queueTest:consume 1024 thrpt 73.414 ops/us +SPSCQueueBenchmark.queueTest:produce 1024 thrpt 67.986 ops/us +SPSCQueueBenchmark.queueTest 65536 thrpt 153.123 ops/us +SPSCQueueBenchmark.queueTest:consume 65536 thrpt 79.838 ops/us +SPSCQueueBenchmark.queueTest:produce 65536 thrpt 73.286 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class SPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + SpscArrayQueue queue; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpscArrayQueue<>(capacity); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void produce(QueueState state) { + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/BitUtils.java b/internal-api/src/main/java/datadog/trace/util/BitUtils.java new file mode 100644 index 00000000000..ea47456c403 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/BitUtils.java @@ -0,0 +1,34 @@ +package datadog.trace.util; + +public final class BitUtils { + private BitUtils() {} + + /** + * Returns the next power of two greater than or equal to the given value. If the input is zero or + * negative, this method returns 1; + * + * @param value the input value + * @return the next power of two ≥ {@code value} + */ + public static int nextPowerOfTwo(int value) { + if (value <= 1) { + return 1; + } + + // Round up to next power of two (bitwise equivalent of using log2 and pow again) + value--; + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + value++; + + // handle overflow (e.g., if value was already near Integer.MAX_VALUE) + if (value <= 0) { + return 1 << 30; // max power of two that fits in int + } + + return value; + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java new file mode 100644 index 00000000000..4dff1226e8f --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java @@ -0,0 +1,284 @@ +package datadog.trace.util.queue; + +import static datadog.trace.util.BitUtils.nextPowerOfTwo; + +import java.util.AbstractQueue; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLongFieldUpdater; +import java.util.concurrent.atomic.AtomicReferenceArray; +import java.util.concurrent.locks.LockSupport; +import java.util.function.Consumer; +import javax.annotation.Nonnull; + +/** + * Multiple-Producer, Single-Consumer (MPSC) bounded queue based on a circular array. + * + *

This queue is optimized for high-performance concurrent access where multiple threads + * (producers) can safely enqueue items concurrently, while a single thread (consumer) dequeues + * them. + * + *

Producers leverage a lock free CAS loop to win the race. Fields are padded to minimize cache + * line false sharing. + * + * @param the type of elements held in this queue + */ +public class MpscArrayQueue extends AbstractQueue { + + /** Capacity of the queue, always a power of two for efficient modulo indexing */ + protected final int capacity; + + /** Mask used to convert a sequence number to a circular array index (index = pos & mask) */ + private final int mask; + + /** Array buffer to store the elements; uses AtomicReferenceArray for atomic slot updates */ + private final AtomicReferenceArray buffer; + + // Padding + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** Tail index: the next slot to insert for producers */ + private volatile long tail = 0L; + + // Padding + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + + /** Atomic updater to perform lock-free CAS on tail */ + private static final AtomicLongFieldUpdater TAIL_UPDATER = + AtomicLongFieldUpdater.newUpdater(MpscArrayQueue.class, "tail"); + + // Padding + @SuppressWarnings("unused") + private long p10, p11, p12, p13, p14, p15, p16; + + /** Head index: the next slot to consume for the single consumer */ + private volatile long head = 0L; + + // Padding + @SuppressWarnings("unused") + private long q10, q11, q12, q13, q14, q15, q16; + + // ======================== Constructor ======================== + + /** + * Creates a new MPSC queue with the specified capacity. Capacity will be rounded up to the next + * power of two for efficient modulo operations. + * + * @param capacity the desired maximum number of elements + */ + public MpscArrayQueue(int capacity) { + this.capacity = nextPowerOfTwo(capacity); + this.mask = this.capacity - 1; + this.buffer = new AtomicReferenceArray<>(this.capacity); + } + + // ======================== OFFER ======================== + + /** + * Adds the specified element to the queue if space is available. + * + *

Multiple producers may safely call this concurrently. Uses a CAS loop to claim a slot and + * {@link AtomicReferenceArray#lazySet(Object)} to publish the element. If the queue is full, + * returns {@code false}. + * + * @param e the element to add + * @return {@code true} if successful, {@code false} if queue is full + * @throws NullPointerException if {@code e} is null + */ + @Override + public boolean offer(E e) { + if (e == null) { + throw new NullPointerException(); + } + + while (true) { + long currentTail = tail; + int index = (int) (currentTail & mask); + + // Check if slot is free + if (buffer.get(index) != null) { + return false; // queue full + } + + // Attempt to claim slot using CAS + if (TAIL_UPDATER.compareAndSet(this, currentTail, currentTail + 1)) { + // Use lazySet for release semantics (avoids full volatile write) + buffer.lazySet(index, e); + return true; + } + + // CAS failed, brief backoff to reduce contention + // Note: I found parkNanos more CPU friendly than Thread.yields + LockSupport.parkNanos(1); + } + } + + /** + * Timed offer with progressive backoff. + * + *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → + * park backoff strategy to reduce CPU usage under contention. + * + * @param e the element to insert + * @param timeout maximum time to wait + * @param unit time unit of timeout + * @return {@code true} if inserted, {@code false} if timeout expires + * @throws InterruptedException if interrupted while waiting + */ + public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + if (offer(e)) { + return true; // successfully inserted + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return false; // timeout + } + + // Progressive backoff + if (idleCount < 100) { + // spin (busy-wait) + } else if (idleCount < 1_000) { + Thread.yield(); // give up CPU to other threads + } else { + // park for a short duration, up to 1 ms + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** + * Removes and returns the head of the queue, or null if empty. + * + *

Only a single consumer may call this. Advances the head and frees the slot. + * + * @return the head element, or null if empty + */ + @Override + public E poll() { + long currentHead = head; + int index = (int) (currentHead & mask); + E value = buffer.get(index); + + if (value == null) { + return null; + } + + // Mark slot free with lazySet (release semantics) + buffer.lazySet(index, null); + head = currentHead + 1; // advance head + return value; + } + + /** + * Polls with a timeout using progressive backoff. + * + * @param timeout max wait time + * @param unit time unit + * @return the head element, or null if timed out + * @throws InterruptedException if interrupted + */ + public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + E value = poll(); + if (value != null) { + return value; + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return null; + } + + // Progressive backoff + if (idleCount < 100) { + // spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** Returns but does not remove the head element. */ + @Override + public E peek() { + int index = (int) (head & mask); + return buffer.get(index); + } + + @Override + public int size() { + long currentTail = tail; + long currentHead = head; + return (int) (currentTail - currentHead); + } + + /** + * Drains all available elements from the queue to a consumer. + * + *

This is efficient since it avoids repeated size() checks and returns immediately when empty. + * + * @param consumer a consumer to accept elements + * @return number of elements drained + */ + public int drain(Consumer consumer) { + return drain(consumer, Integer.MAX_VALUE); + } + + /** + * Drains up to {@code limit} elements from the queue to a consumer. + * + *

This method is useful for batch processing. + * + *

Each element is removed atomically using poll() and passed to the consumer. + * + * @param consumer a consumer to accept elements + * @param limit maximum number of elements to drain + * @return number of elements drained + */ + public int drain(Consumer consumer, int limit) { + int count = 0; + E e; + while (count < limit && (e = poll()) != null) { + consumer.accept(e); + count++; + } + return count; + } + + /** + * Returns the remaining capacity. + * + * @return number of additional elements this queue can accept + */ + public int remainingCapacity() { + return capacity - (int) (tail - head); + } + + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java new file mode 100644 index 00000000000..2719ff63ea2 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java @@ -0,0 +1,123 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Supplier; +import javax.annotation.Nonnull; + +/** + * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. + * + *

It features nonblocking offer/poll methods and blocking (condition based) take/put. + */ +public class MpscBlockingConsumerArrayQueue extends MpscArrayQueue { + // Blocking controls + private final ReentrantLock lock = new ReentrantLock(); + private final Condition notEmpty = lock.newCondition(); + private final Condition notFull = lock.newCondition(); + + public MpscBlockingConsumerArrayQueue(int capacity) { + super(capacity); + } + + @Override + public boolean offer(E e) { + final boolean success = super.offer(e); + if (success) { + signalNotEmpty(); + } + return success; + } + + public void put(E e) throws InterruptedException { + while (!offer(e)) { + awaitNotFull(); + } + } + + @Override + public E poll() { + final E ret = super.poll(); + if (ret != null) { + signalNotFull(); + } + return ret; + } + + public E take() throws InterruptedException { + E e; + while ((e = poll()) == null) { + awaitNotEmpty(); + } + return e; + } + + /** + * Fills the queue with elements provided by the supplier until either: - the queue is full, or - + * the supplier runs out of elements (returns null) + * + * @param supplier a supplier of elements + * @param limit maximum number of elements to attempt to insert + * @return number of elements successfully enqueued + */ + public int fill(@Nonnull Supplier supplier, int limit) { + if (limit <= 0) { + return 0; + } + + int added = 0; + while (added < limit) { + E e = supplier.get(); + if (e == null) { + break; // stop if supplier exhausted + } + + if (offer(e)) { + added++; + } else { + break; // queue is full + } + } + return added; + } + + private void signalNotEmpty() { + lock.lock(); + try { + notEmpty.signal(); + } finally { + lock.unlock(); + } + } + + private void signalNotFull() { + lock.lock(); + try { + notFull.signal(); + } finally { + lock.unlock(); + } + } + + private void awaitNotEmpty() throws InterruptedException { + lock.lockInterruptibly(); + try { + while (isEmpty()) { + notEmpty.await(); + } + } finally { + lock.unlock(); + } + } + + private void awaitNotFull() throws InterruptedException { + lock.lockInterruptibly(); + try { + while (size() == capacity) { + notFull.await(); + } + } finally { + lock.unlock(); + } + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java new file mode 100644 index 00000000000..1d2fde4284f --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java @@ -0,0 +1,192 @@ +package datadog.trace.util.queue; + +import static datadog.trace.util.BitUtils.nextPowerOfTwo; + +import java.util.AbstractQueue; +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLongFieldUpdater; +import java.util.concurrent.atomic.AtomicReferenceArray; +import java.util.concurrent.locks.LockSupport; +import java.util.function.Consumer; + +/** + * A Single-Producer, Multiple-Consumer (SPMC) bounded queue based on a circular array. + * + *

This queue allows one producer to enqueue items concurrently with multiple consumers dequeuing + * them. It is lock-free for the producer, and uses CAS on the consumer side to allow multiple + * consumers safely. + * + *

Internally, the queue maintains a padded head and tail index to minimize false sharing, and + * uses {@link AtomicReferenceArray} to store elements safely across threads. + * + * @param the type of elements held in this queue + */ +public class SpmcArrayQueue extends AbstractQueue { + + /** The capacity of the queue (must be a power of two) */ + protected final int capacity; + + /** Mask for fast modulo operation (index = pos & mask) */ + private final int mask; + + /** Array buffer storing elements */ + private final AtomicReferenceArray buffer; + + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** Tail index: next slot to be written by producer */ + private volatile long tail = 0L; + + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + @SuppressWarnings("unused") + private long p10, p11, p12, p13, p14, p15, p16; + + /** Head index: next slot to be claimed by any consumer */ + private volatile long head = 0L; + + @SuppressWarnings("unused") + private long q10, q11, q12, q13, q14, q15, q16; + + /** CAS updater for head to allow multiple consumers to claim elements safely */ + private static final AtomicLongFieldUpdater HEAD_UPDATER = + AtomicLongFieldUpdater.newUpdater(SpmcArrayQueue.class, "head"); + + /** + * Constructs a new SPMC queue with the given capacity.. Capacity will be rounded up to the next + * power of two for efficient modulo operations. + * + * @param capacity the desired maximum number of elements + */ + public SpmcArrayQueue(int capacity) { + this.capacity = nextPowerOfTwo(capacity); + this.mask = capacity - 1; + this.buffer = new AtomicReferenceArray<>(capacity); + } + + /** + * Adds the specified element to the queue if space is available. + * + *

Only one producer is allowed. The producer uses simple volatile writes (lazySet) to publish + * elements, ensuring memory visibility for consumers. + * + * @param e the element to add + * @return true if the element was added, false if the queue is full + * @throws NullPointerException if the element is null + */ + @Override + public boolean offer(E e) { + if (e == null) throw new NullPointerException(); + + long currentTail = tail; + int index = (int) (currentTail & mask); + + if (buffer.get(index) != null) { + return false; // queue full + } + + // Producer increments tail first to claim the slot + tail = currentTail + 1; + + // Use lazySet to publish the element without forcing a full memory fence + buffer.lazySet(index, e); + return true; + } + + /** + * Removes and returns the head element of the queue, or {@code null} if empty. + * + *

Multiple consumers can safely call this concurrently. Each consumer uses CAS on the head + * index to claim a slot. Only the successful consumer sets the element to null. + * + * @return the head element, or {@code null} if the queue is empty + */ + @Override + public E poll() { + while (true) { + long currentHead = head; + int index = (int) (currentHead & mask); + E value = buffer.get(index); + + if (value == null) { + return null; // empty + } + + // CAS ensures only one consumer claims this slot + if (HEAD_UPDATER.compareAndSet(this, currentHead, currentHead + 1)) { + // mark slot free after claiming it + buffer.lazySet(index, null); + return value; + } + + // CAS failed: another consumer claimed it; retry + LockSupport.parkNanos(1); + } + } + + /** + * Returns, but does not remove, the head of the queue. + * + * @return the head element, or {@code null} if empty + */ + @Override + public E peek() { + int index = (int) (head & mask); + return buffer.get(index); + } + + /** + * Returns the number of elements in the queue. + * + *

Approximate: may not be exact under concurrent access. + * + * @return current size of the queue + */ + @Override + public int size() { + return (int) (tail - head); + } + + /** + * Iterator is not supported. + * + * @throws UnsupportedOperationException always + */ + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + /** + * Drains all available elements from the queue to the specified consumer. + * + *

This method repeatedly calls {@link #poll()} until the queue is empty. + * + * @param consumer the consumer to accept elements + * @return number of elements drained + */ + public int drain(Consumer consumer) { + return drain(consumer, Integer.MAX_VALUE); + } + + /** + * Drains up to {@code limit} elements from the queue to the specified consumer. + * + *

Useful for batch processing. This avoids frequent CAS operations by handling multiple + * elements in a single call. + * + * @param consumer the consumer to accept elements + * @param limit maximum number of elements to drain + * @return number of elements drained + */ + public int drain(Consumer consumer, int limit) { + int count = 0; + E e; + while (count < limit && (e = poll()) != null) { + consumer.accept(e); + count++; + } + return count; + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java new file mode 100644 index 00000000000..154aaaec474 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java @@ -0,0 +1,194 @@ +package datadog.trace.util.queue; + +import static datadog.trace.util.BitUtils.nextPowerOfTwo; + +import java.util.AbstractQueue; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; +import java.util.function.Consumer; + +/** + * A high-performance Single-Producer Single-Consumer (SPSC) bounded queue based on a circular + * array. + * + *

This queue is designed for scenarios where exactly one producer thread offers elements and one + * consumer thread polls elements. It uses a plain {@code Object[]} buffer with volatile head and + * tail indices and padded fields to minimize false sharing. + * + * @param element type + */ +public final class SpscArrayQueue extends AbstractQueue { + + private final int mask; + private final Object[] buffer; + + // ========================== Padded tail (producer index) ========================== + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + private volatile long tail = 0L; + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + + // ========================== Padded head (consumer index) ========================== + @SuppressWarnings("unused") + private long r0, r1, r2, r3, r4, r5, r6; + private volatile long head = 0L; + @SuppressWarnings("unused") + private long s0, s1, s2, s3, s4, s5, s6; + + /** + * Constructs a new bounded Single producer single consumer queue. + * + * @param capacity the maximum number of elements the queue can hold. Will be rounded to the next + * power of two if not yet. + */ + public SpscArrayQueue(int capacity) { + final int roundedCap = nextPowerOfTwo(capacity); + this.mask = roundedCap - 1; + this.buffer = new Object[roundedCap]; + } + + /** + * Attempts to add the specified element to this queue. Returns {@code false} if the queue is + * full. + * + * @param e element to add (must not be null) + * @return {@code true} if successfully added; {@code false} if full + */ + @Override + public boolean offer(E e) { + if (e == null) { + throw new NullPointerException(); + } + + final long currentTail = tail; + final int index = (int) (currentTail & mask); + + // Check if slot is still occupied — if so, queue is full + if (buffer[index] != null) { + return false; + } + + buffer[index] = e; // plain write (safe for SPSC) + tail = currentTail + 1; // volatile write to publish + return true; + } + + /** Retrieves and removes the head of this queue, or {@code null} if empty. */ + @Override + @SuppressWarnings("unchecked") + public E poll() { + final long currentHead = head; + final int index = (int) (currentHead & mask); + + final E e = (E) buffer[index]; + if (e == null) { + return null; + } + + buffer[index] = null; // mark slot as free (safe since we only have 1 consumer and 1 producer) + head = currentHead + 1; // volatile write to publish + return e; + } + + /** + * Polls an element, waiting up to the given timeout. + * + * @param timeout maximum time to wait + * @param unit time unit of the timeout + * @return the next element or {@code null} if timed out + * @throws InterruptedException if interrupted while waiting + */ + public E poll(long timeout, TimeUnit unit) throws InterruptedException { + if (timeout <= 0) { + return poll(); + } + + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + for (; ; ) { + E e = poll(); + if (e != null) { + return e; + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return null; + } + + // Progressive backoff to reduce CPU usage + if (idleCount < 100) { + // light spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); // up to 1ms + } + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** + * Retrieves, but does not remove, the head of this queue. + * + * @return the head element or {@code null} if empty + */ + @Override + @SuppressWarnings("unchecked") + public E peek() { + return (E) buffer[(int) (head & mask)]; + } + + /** + * Returns an approximation of the number of elements in this queue. This value may be imprecise + * due to concurrent updates. + */ + @Override + public int size() { + return (int) (tail - head); + } + + /** + * Iterator is not supported. + * + * @throws UnsupportedOperationException always + */ + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + /** + * Drains all available elements and passes them to the given consumer. + * + * @param consumer callback for each drained element + * @return number of elements drained + */ + public int drain(Consumer consumer) { + return drain(consumer, Integer.MAX_VALUE); + } + + /** + * Drains up to {@code limit} elements to the given consumer. + * + * @param consumer element consumer + * @param limit maximum number of elements to drain + * @return number of elements drained + */ + public int drain(Consumer consumer, int limit) { + int count = 0; + E e; + while (count < limit && (e = poll()) != null) { + consumer.accept(e); + count++; + } + return count; + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/BitUtilsTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/BitUtilsTest.groovy new file mode 100644 index 00000000000..fb85a2100d9 --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/BitUtilsTest.groovy @@ -0,0 +1,42 @@ +package datadog.trace.util + +import static datadog.trace.util.BitUtils.nextPowerOfTwo + +import datadog.trace.test.util.DDSpecification + +class BitUtilsTest extends DDSpecification { + def "nextPowerOfTwo(#input) should return #expected"() { + expect: + nextPowerOfTwo(input) == expected + + where: + input | expected + 0 | 1 // smallest case + 1 | 1 // already power of two + 2 | 2 + 3 | 4 + 4 | 4 + 5 | 8 + 6 | 8 + 7 | 8 + 8 | 8 + 9 | 16 + 15 | 16 + 16 | 16 + 17 | 32 + 31 | 32 + 32 | 32 + 33 | 64 + 63 | 64 + 64 | 64 + 65 | 128 + 1000 | 1024 + 1023 | 1024 + 1024 | 1024 + 1025 | 2048 + 4096 | 4096 + 4097 | 8192 + -1 | 1 // negative input edge case + Integer.MAX_VALUE | (1 << 30) // largest safe power of two + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy new file mode 100644 index 00000000000..f9c8a6a1c85 --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy @@ -0,0 +1,163 @@ +package datadog.trace.util.queue + +import static java.util.concurrent.TimeUnit.NANOSECONDS + +import datadog.trace.test.util.DDSpecification +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicBoolean +import java.util.function.Consumer + +abstract class AbstractQueueTest> extends DDSpecification { + abstract T createQueue(int capacity) + protected T queue = createQueue(8) + + def "offer and poll should preserve FIFO order"() { + when: + queue.offer(1) + queue.offer(2) + queue.offer(3) + + then: + queue.poll() == 1 + queue.poll() == 2 + queue.poll() == 3 + queue.poll() == null + } + + def "offer should return false when queue is full"() { + given: + queue.clear() + (1..8).each { queue.offer(it) } + + expect: + !queue.offer(999) + queue.size() == 8 + } + + def "peek should return head element without removing it"() { + given: + queue.clear() + queue.offer(10) + queue.offer(20) + + expect: + queue.peek() == 10 + queue.peek() == 10 + queue.size() == 2 + } + + def "poll should return null when empty"() { + given: + queue.clear() + + expect: + queue.poll() == null + } + + def "size should reflect current number of items"() { + when: + queue.clear() + queue.offer(1) + queue.offer(2) + + then: + queue.size() == 2 + + when: + queue.poll() + queue.poll() + + then: + queue.size() == 0 + } + + def "drain should consume all available elements"() { + given: + queue.clear() + (1..5).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer) + + then: + count == 5 + drained == [1, 2, 3, 4, 5] + queue.isEmpty() + } + + def "drain with limit should only consume that many elements"() { + given: + queue.clear() + (1..6).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer, 3) + + then: + count == 3 + drained == [1, 2, 3] + queue.size() == 3 + } + + def "remainingCapacity should reflect current occupancy"() { + given: + def q = new MpscArrayQueue(4) + q.offer(1) + q.offer(2) + + expect: + q.remainingCapacity() == 2 + + when: + q.poll() + + then: + q.remainingCapacity() == 3 + } + + + def "poll with timeout returns null if no element becomes available"() { + when: + def start = System.nanoTime() + def value = queue.poll(200, TimeUnit.MILLISECONDS) + def elapsedMs = NANOSECONDS.toMillis(System.nanoTime() - start) + + then: + value == null + elapsedMs >= 200 // waited approximately the timeout + } + + def "poll with zero timeout behaves like immediate poll"() { + expect: + queue.poll(0, TimeUnit.MILLISECONDS) == null + + when: + queue.offer(99) + + then: + queue.poll(0, TimeUnit.MILLISECONDS) == 99 + } + + def "poll throws InterruptedException if interrupted"() { + given: + def thrown = new AtomicBoolean() + def thread = Thread.start { + try { + queue.poll(500, TimeUnit.MILLISECONDS) + } catch (InterruptedException ie) { + thrown.set(true) + Thread.currentThread().interrupt() + } + } + + when: + Thread.sleep(50) + thread.interrupt() + thread.join() + + then: + thrown.get() + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy new file mode 100644 index 00000000000..ef7d1e25094 --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy @@ -0,0 +1,61 @@ +package datadog.trace.util.queue + + +import java.util.concurrent.CountDownLatch +import java.util.concurrent.Executors +import spock.lang.Timeout + +class MpscArrayQueueTest extends AbstractQueueTest { + + @Timeout(10) + def "multiple producers single consumer should consume all elements without duplication or loss"() { + given: + int total = 1000 + int producers = 4 + queue = new MpscArrayQueue<>(1024) + def results = Collections.synchronizedList([]) + def executor = Executors.newFixedThreadPool(producers) + def latch = new CountDownLatch(producers) + def consumerDone = new CountDownLatch(1) + + when: "multiple producers enqueue concurrently" + (1..producers).each { id -> + executor.submit { + for (int i = 0; i < total / producers; i++) { + int value = (id * 10000) + i + while (!queue.offer(value)) { + Thread.yield() + } + } + latch.countDown() + } + } + + and: "a single consumer drains all elements" + Thread consumer = new Thread({ + while (results.size() < total) { + def v = queue.poll() + if (v != null) { + results << v + } else { + Thread.yield() + } + } + consumerDone.countDown() + }) + consumer.start() + + latch.await() + consumerDone.await() + executor.shutdown() + + then: + results.size() == total + results.toSet().size() == total // all unique + } + + @Override + MpscArrayQueue createQueue(int capacity) { + return new MpscArrayQueue(capacity) + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy new file mode 100644 index 00000000000..06ca16e3741 --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy @@ -0,0 +1,197 @@ +package datadog.trace.util.queue + + +import java.util.concurrent.CountDownLatch +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicBoolean +import java.util.concurrent.atomic.AtomicReference +import java.util.function.Consumer +import java.util.function.Supplier +import spock.lang.Timeout + +class MpscBlockingConsumerArrayQueueTest extends AbstractQueueTest> { + + @Override + MpscBlockingConsumerArrayQueue createQueue(int capacity) { + return new MpscBlockingConsumerArrayQueue(capacity) + } + + def "put and take should block and release correctly"() { + given: + queue = new MpscBlockingConsumerArrayQueue<>(2) + def taken = new AtomicReference<>() + def latch = new CountDownLatch(1) + + when: + Thread.start { + taken.set(queue.take()) + latch.countDown() + } + + Thread.sleep(100) // ensure consumer is waiting + queue.put(42) + latch.await(1, TimeUnit.SECONDS) + + then: + taken.get() == 42 + queue.isEmpty() + } + + def "put should block when full until space is available"() { + given: + queue = new MpscBlockingConsumerArrayQueue<>(2) + queue.put(1) + queue.put(2) + def added = new AtomicBoolean(false) + + when: + Thread producer = Thread.start { + try { + queue.put(3) // should block until consumer polls + added.set(true) + } catch (InterruptedException ignore) { + } + } + + Thread.sleep(100) + assert !added.get() + queue.take() // frees one slot + producer.join(1000) + + then: + added.get() + queue.size() == 2 + } + + def "drain should consume all elements in order"() { + given: + queue.clear() + (1..5).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer) + + then: + count == 5 + drained == [1, 2, 3, 4, 5] + queue.isEmpty() + } + + def "drain with limit should consume only limited number"() { + given: + queue.clear() + (1..6).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer, 3) + + then: + count == 3 + drained == [1, 2, 3] + queue.size() == 3 + } + + @Timeout(10) + def "multiple producers single consumer should consume all elements without duplicates"() { + given: + int total = 1000 + int producers = 4 + queue = new MpscBlockingConsumerArrayQueue<>(1024) + def results = Collections.synchronizedList([]) + def latch = new CountDownLatch(producers) + + when: + // Multiple producers + (1..producers).each { id -> + Thread.start { + for (int i = 0; i < total / producers; i++) { + int val = id * 10_000 + i + while (!queue.offer(val)) { + Thread.yield() + } + } + latch.countDown() + } + } + + // Single consumer + Thread consumer = Thread.start { + while (results.size() < total) { + def v = queue.poll() + if (v != null) { + results << v + } + else { + Thread.yield() + } + } + } + + latch.await() + consumer.join() + + then: + results.size() == total + results.toSet().size() == total // all unique + } + + def "blocking take should wake up when producer offers"() { + given: + queue = new MpscBlockingConsumerArrayQueue<>(4) + def result = new AtomicReference<>() + + when: + Thread consumer = Thread.start { + try { + result.set(queue.take()) + } catch (InterruptedException ignored) { + } + } + Thread.sleep(100) + queue.offer(123) + consumer.join(1000) + + then: + result.get() == 123 + queue.isEmpty() + } + + def "blocking put should wake up when consumer takes"() { + given: + queue = new MpscBlockingConsumerArrayQueue<>(1) + queue.put(1) + def done = new AtomicBoolean(false) + + when: + Thread producer = Thread.start { + try { + queue.put(2) // blocks until consumer takes + done.set(true) + } catch (InterruptedException ignored) { + } + } + + Thread.sleep(100) + queue.take() + producer.join(1000) + + then: + done.get() + queue.size() == 1 + } + + def "fill inserts up to capacity"() { + given: + def counter = 0 + def supplier = { counter < 10 ? counter++ : null } as Supplier + + when: + def filled = queue.fill(supplier, 10) + + then: + filled == 8 + queue.size() == 8 + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy new file mode 100644 index 00000000000..90c5661f7fa --- /dev/null +++ b/internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy @@ -0,0 +1,59 @@ +package datadog.trace.util.queue + + +import java.util.concurrent.CountDownLatch +import java.util.concurrent.Executors +import spock.lang.Timeout + +class SpmcArrayQueueTest extends AbstractQueueTest> { + + @Override + SpmcArrayQueue createQueue(int capacity) { + return new SpmcArrayQueue(capacity) + } + + @Timeout(10) + def "single producer multiple consumers should consume all elements without duplication or loss"() { + given: + int total = 1000 + int consumers = 4 + queue = new SpmcArrayQueue<>(1024) + def results = Collections.synchronizedList([]) + def executor = Executors.newFixedThreadPool(consumers) + def latch = new CountDownLatch(consumers) + + when: "one producer fills the queue" + Thread producer = new Thread({ + for (int i = 0; i < total; i++) { + while (!queue.offer(i)) { + Thread.yield() + } + } + }) + producer.start() + + and: "multiple consumers drain concurrently" + (1..consumers).each { + executor.submit { + while (results.size() < total) { + def v = queue.poll() + if (v != null) { + results << v + } else { + Thread.yield() + } + } + latch.countDown() + } + } + + latch.await() + producer.join() + executor.shutdown() + + then: + results.size() == total + results.toSet().size() == total // no duplicates + results.containsAll((0..> { + + def "single producer single consumer concurrency"() { + given: + def queue = new SpscArrayQueue(1024) + def producerCount = 1000 + def consumed = new AtomicInteger(0) + def consumedValues = [] + + def producer = Thread.start { + (1..producerCount).each { queue.offer(it) } + } + + def consumer = Thread.start { + while (consumed.get() < producerCount) { + def v = queue.poll() + if (v != null) { + consumedValues << v + consumed.incrementAndGet() + } + } + } + + when: + producer.join() + consumer.join() + + then: + consumed.get() == producerCount + consumedValues.toSet().size() == producerCount // all values unique + } + + @Override + SpscArrayQueue createQueue(int capacity) { + return new SpscArrayQueue(capacity) + } +} From e396dbe6f87377ba6ed35a4e3925047d61a05fde Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Tue, 4 Nov 2025 09:07:14 +0100 Subject: [PATCH 02/18] Refactor --- dd-trace-core/build.gradle | 2 + .../datadog/trace/util/queue/BaseQueue.java | 197 ++++++++++++++++++ .../trace/util/queue/MpscArrayQueue.java | 153 +------------- .../queue/MpscBlockingConsumerArrayQueue.java | 46 ++-- .../trace/util/queue/SpmcArrayQueue.java | 61 +----- .../trace/util/queue/SpscArrayQueue.java | 109 ++-------- 6 files changed, 231 insertions(+), 337 deletions(-) create mode 100644 internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java diff --git a/dd-trace-core/build.gradle b/dd-trace-core/build.gradle index 882d6b6cd06..4b2dc87612c 100644 --- a/dd-trace-core/build.gradle +++ b/dd-trace-core/build.gradle @@ -81,6 +81,8 @@ dependencies { compileOnly group: 'com.github.spotbugs', name: 'spotbugs-annotations', version: '4.2.0' + jmhImplementation(libs.jctools) + // We have autoservices defined in test subtree, looks like we need this to be able to properly rebuild this testAnnotationProcessor libs.autoservice.processor testCompileOnly libs.autoservice.annotation diff --git a/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java new file mode 100644 index 00000000000..2edc5c19214 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java @@ -0,0 +1,197 @@ +package datadog.trace.util.queue; + +import static datadog.trace.util.BitUtils.nextPowerOfTwo; + +import java.util.AbstractQueue; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; +import java.util.function.Consumer; +import java.util.function.Supplier; +import javax.annotation.Nonnull; + +public abstract class BaseQueue extends AbstractQueue { + /** The capacity of the queue (must be a power of two) */ + protected final int capacity; + + /** Mask for fast modulo operation (index = pos & mask) */ + protected final int mask; + + public BaseQueue(int capacity) { + this.capacity = nextPowerOfTwo(capacity); + this.mask = this.capacity - 1; + } + + /** + * Timed offer with progressive backoff. + * + *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → + * park backoff strategy to reduce CPU usage under contention. + * + * @param e the element to insert + * @param timeout maximum time to wait + * @param unit time unit of timeout + * @return {@code true} if inserted, {@code false} if timeout expires + * @throws InterruptedException if interrupted while waiting + */ + public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + if (offer(e)) { + return true; // successfully inserted + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return false; // timeout + } + + // Progressive backoff + if (idleCount < 100) { + // spin (busy-wait) + } else if (idleCount < 1_000) { + Thread.yield(); // give up CPU to other threads + } else { + // park for a short duration, up to 1 ms + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** + * Polls with a timeout using progressive backoff. + * + * @param timeout max wait time + * @param unit time unit + * @return the head element, or null if timed out + * @throws InterruptedException if interrupted + */ + public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + E value = poll(); + if (value != null) { + return value; + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return null; + } + + // Progressive backoff + if (idleCount < 100) { + // spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** + * Drains all available elements from the queue to a consumer. + * + *

This is efficient since it avoids repeated size() checks and returns immediately when empty. + * + * @param consumer a consumer to accept elements + * @return number of elements drained + */ + public int drain(Consumer consumer) { + return drain(consumer, Integer.MAX_VALUE); + } + + /** + * Drains up to {@code limit} elements from the queue to a consumer. + * + *

This method is useful for batch processing. + * + *

Each element is removed atomically using poll() and passed to the consumer. + * + * @param consumer a consumer to accept elements + * @param limit maximum number of elements to drain + * @return number of elements drained + */ + public int drain(Consumer consumer, int limit) { + int count = 0; + E e; + while (count < limit && (e = poll()) != null) { + consumer.accept(e); + count++; + } + return count; + } + + /** + * Fills the queue with elements provided by the supplier until either: - the queue is full, or - + * the supplier runs out of elements (returns null) + * + * @param supplier a supplier of elements + * @param limit maximum number of elements to attempt to insert + * @return number of elements successfully enqueued + */ + public int fill(@Nonnull Supplier supplier, int limit) { + if (limit <= 0) { + return 0; + } + + int added = 0; + while (added < limit) { + E e = supplier.get(); + if (e == null) { + break; // stop if supplier exhausted + } + + if (offer(e)) { + added++; + } else { + break; // queue is full + } + } + return added; + } + + /** + * Iterator is not supported. + * + * @throws UnsupportedOperationException always + */ + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the remaining capacity. + * + * @return number of additional elements this queue can accept + */ + public int remainingCapacity() { + return capacity - size(); + } + + /** + * Returns the maximum queue capacity. + * + * @return number of total elements this queue can accept + */ + public int capacity() { + return capacity; + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java index 4dff1226e8f..ba4da512c6f 100644 --- a/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java +++ b/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java @@ -1,15 +1,8 @@ package datadog.trace.util.queue; -import static datadog.trace.util.BitUtils.nextPowerOfTwo; - -import java.util.AbstractQueue; -import java.util.Iterator; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLongFieldUpdater; import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.concurrent.locks.LockSupport; -import java.util.function.Consumer; -import javax.annotation.Nonnull; /** * Multiple-Producer, Single-Consumer (MPSC) bounded queue based on a circular array. @@ -23,14 +16,7 @@ * * @param the type of elements held in this queue */ -public class MpscArrayQueue extends AbstractQueue { - - /** Capacity of the queue, always a power of two for efficient modulo indexing */ - protected final int capacity; - - /** Mask used to convert a sequence number to a circular array index (index = pos & mask) */ - private final int mask; - +public class MpscArrayQueue extends BaseQueue { /** Array buffer to store the elements; uses AtomicReferenceArray for atomic slot updates */ private final AtomicReferenceArray buffer; @@ -60,8 +46,6 @@ public class MpscArrayQueue extends AbstractQueue { @SuppressWarnings("unused") private long q10, q11, q12, q13, q14, q15, q16; - // ======================== Constructor ======================== - /** * Creates a new MPSC queue with the specified capacity. Capacity will be rounded up to the next * power of two for efficient modulo operations. @@ -69,13 +53,10 @@ public class MpscArrayQueue extends AbstractQueue { * @param capacity the desired maximum number of elements */ public MpscArrayQueue(int capacity) { - this.capacity = nextPowerOfTwo(capacity); - this.mask = this.capacity - 1; + super(capacity); this.buffer = new AtomicReferenceArray<>(this.capacity); } - // ======================== OFFER ======================== - /** * Adds the specified element to the queue if space is available. * @@ -115,50 +96,6 @@ public boolean offer(E e) { } } - /** - * Timed offer with progressive backoff. - * - *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → - * park backoff strategy to reduce CPU usage under contention. - * - * @param e the element to insert - * @param timeout maximum time to wait - * @param unit time unit of timeout - * @return {@code true} if inserted, {@code false} if timeout expires - * @throws InterruptedException if interrupted while waiting - */ - public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - if (offer(e)) { - return true; // successfully inserted - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return false; // timeout - } - - // Progressive backoff - if (idleCount < 100) { - // spin (busy-wait) - } else if (idleCount < 1_000) { - Thread.yield(); // give up CPU to other threads - } else { - // park for a short duration, up to 1 ms - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } - } - /** * Removes and returns the head of the queue, or null if empty. * @@ -182,45 +119,6 @@ public E poll() { return value; } - /** - * Polls with a timeout using progressive backoff. - * - * @param timeout max wait time - * @param unit time unit - * @return the head element, or null if timed out - * @throws InterruptedException if interrupted - */ - public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - E value = poll(); - if (value != null) { - return value; - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return null; - } - - // Progressive backoff - if (idleCount < 100) { - // spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } - } - /** Returns but does not remove the head element. */ @Override public E peek() { @@ -234,51 +132,4 @@ public int size() { long currentHead = head; return (int) (currentTail - currentHead); } - - /** - * Drains all available elements from the queue to a consumer. - * - *

This is efficient since it avoids repeated size() checks and returns immediately when empty. - * - * @param consumer a consumer to accept elements - * @return number of elements drained - */ - public int drain(Consumer consumer) { - return drain(consumer, Integer.MAX_VALUE); - } - - /** - * Drains up to {@code limit} elements from the queue to a consumer. - * - *

This method is useful for batch processing. - * - *

Each element is removed atomically using poll() and passed to the consumer. - * - * @param consumer a consumer to accept elements - * @param limit maximum number of elements to drain - * @return number of elements drained - */ - public int drain(Consumer consumer, int limit) { - int count = 0; - E e; - while (count < limit && (e = poll()) != null) { - consumer.accept(e); - count++; - } - return count; - } - - /** - * Returns the remaining capacity. - * - * @return number of additional elements this queue can accept - */ - public int remainingCapacity() { - return capacity - (int) (tail - head); - } - - @Override - public Iterator iterator() { - throw new UnsupportedOperationException(); - } } diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java index 2719ff63ea2..8efd593abcf 100644 --- a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java +++ b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java @@ -1,16 +1,17 @@ package datadog.trace.util.queue; +import java.util.Collection; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; -import java.util.function.Supplier; -import javax.annotation.Nonnull; /** * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. * *

It features nonblocking offer/poll methods and blocking (condition based) take/put. */ -public class MpscBlockingConsumerArrayQueue extends MpscArrayQueue { +public class MpscBlockingConsumerArrayQueue extends MpscArrayQueue + implements BlockingQueue { // Blocking controls private final ReentrantLock lock = new ReentrantLock(); private final Condition notEmpty = lock.newCondition(); @@ -52,35 +53,6 @@ public E take() throws InterruptedException { return e; } - /** - * Fills the queue with elements provided by the supplier until either: - the queue is full, or - - * the supplier runs out of elements (returns null) - * - * @param supplier a supplier of elements - * @param limit maximum number of elements to attempt to insert - * @return number of elements successfully enqueued - */ - public int fill(@Nonnull Supplier supplier, int limit) { - if (limit <= 0) { - return 0; - } - - int added = 0; - while (added < limit) { - E e = supplier.get(); - if (e == null) { - break; // stop if supplier exhausted - } - - if (offer(e)) { - added++; - } else { - break; // queue is full - } - } - return added; - } - private void signalNotEmpty() { lock.lock(); try { @@ -120,4 +92,14 @@ private void awaitNotFull() throws InterruptedException { lock.unlock(); } } + + @Override + public int drainTo(Collection c) { + return drainTo(c, Integer.MAX_VALUE); + } + + @Override + public int drainTo(Collection c, int maxElements) { + return drain(c::add, maxElements); + } } diff --git a/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java index 1d2fde4284f..f48ed1b6ff6 100644 --- a/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java +++ b/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java @@ -1,13 +1,8 @@ package datadog.trace.util.queue; -import static datadog.trace.util.BitUtils.nextPowerOfTwo; - -import java.util.AbstractQueue; -import java.util.Iterator; import java.util.concurrent.atomic.AtomicLongFieldUpdater; import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.concurrent.locks.LockSupport; -import java.util.function.Consumer; /** * A Single-Producer, Multiple-Consumer (SPMC) bounded queue based on a circular array. @@ -21,14 +16,7 @@ * * @param the type of elements held in this queue */ -public class SpmcArrayQueue extends AbstractQueue { - - /** The capacity of the queue (must be a power of two) */ - protected final int capacity; - - /** Mask for fast modulo operation (index = pos & mask) */ - private final int mask; - +public class SpmcArrayQueue extends BaseQueue { /** Array buffer storing elements */ private final AtomicReferenceArray buffer; @@ -60,9 +48,8 @@ public class SpmcArrayQueue extends AbstractQueue { * @param capacity the desired maximum number of elements */ public SpmcArrayQueue(int capacity) { - this.capacity = nextPowerOfTwo(capacity); - this.mask = capacity - 1; - this.buffer = new AtomicReferenceArray<>(capacity); + super(capacity); + this.buffer = new AtomicReferenceArray<>(this.capacity); } /** @@ -147,46 +134,4 @@ public E peek() { public int size() { return (int) (tail - head); } - - /** - * Iterator is not supported. - * - * @throws UnsupportedOperationException always - */ - @Override - public Iterator iterator() { - throw new UnsupportedOperationException(); - } - - /** - * Drains all available elements from the queue to the specified consumer. - * - *

This method repeatedly calls {@link #poll()} until the queue is empty. - * - * @param consumer the consumer to accept elements - * @return number of elements drained - */ - public int drain(Consumer consumer) { - return drain(consumer, Integer.MAX_VALUE); - } - - /** - * Drains up to {@code limit} elements from the queue to the specified consumer. - * - *

Useful for batch processing. This avoids frequent CAS operations by handling multiple - * elements in a single call. - * - * @param consumer the consumer to accept elements - * @param limit maximum number of elements to drain - * @return number of elements drained - */ - public int drain(Consumer consumer, int limit) { - int count = 0; - E e; - while (count < limit && (e = poll()) != null) { - consumer.accept(e); - count++; - } - return count; - } } diff --git a/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java index 154aaaec474..149d267c5b8 100644 --- a/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java +++ b/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java @@ -1,13 +1,5 @@ package datadog.trace.util.queue; -import static datadog.trace.util.BitUtils.nextPowerOfTwo; - -import java.util.AbstractQueue; -import java.util.Iterator; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.LockSupport; -import java.util.function.Consumer; - /** * A high-performance Single-Producer Single-Consumer (SPSC) bounded queue based on a circular * array. @@ -18,22 +10,28 @@ * * @param element type */ -public final class SpscArrayQueue extends AbstractQueue { - - private final int mask; +public final class SpscArrayQueue extends BaseQueue { private final Object[] buffer; - // ========================== Padded tail (producer index) ========================== + // padding @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; + + /** Producer index */ private volatile long tail = 0L; + + // padding @SuppressWarnings("unused") private long q0, q1, q2, q3, q4, q5, q6; - // ========================== Padded head (consumer index) ========================== + // padding @SuppressWarnings("unused") private long r0, r1, r2, r3, r4, r5, r6; + + /** Consumer index */ private volatile long head = 0L; + + // padding @SuppressWarnings("unused") private long s0, s1, s2, s3, s4, s5, s6; @@ -44,9 +42,8 @@ public final class SpscArrayQueue extends AbstractQueue { * power of two if not yet. */ public SpscArrayQueue(int capacity) { - final int roundedCap = nextPowerOfTwo(capacity); - this.mask = roundedCap - 1; - this.buffer = new Object[roundedCap]; + super(capacity); + this.buffer = new Object[this.capacity]; } /** @@ -92,49 +89,6 @@ public E poll() { return e; } - /** - * Polls an element, waiting up to the given timeout. - * - * @param timeout maximum time to wait - * @param unit time unit of the timeout - * @return the next element or {@code null} if timed out - * @throws InterruptedException if interrupted while waiting - */ - public E poll(long timeout, TimeUnit unit) throws InterruptedException { - if (timeout <= 0) { - return poll(); - } - - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - for (; ; ) { - E e = poll(); - if (e != null) { - return e; - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return null; - } - - // Progressive backoff to reduce CPU usage - if (idleCount < 100) { - // light spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); // up to 1ms - } - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } - } - /** * Retrieves, but does not remove, the head of this queue. * @@ -154,41 +108,4 @@ public E peek() { public int size() { return (int) (tail - head); } - - /** - * Iterator is not supported. - * - * @throws UnsupportedOperationException always - */ - @Override - public Iterator iterator() { - throw new UnsupportedOperationException(); - } - - /** - * Drains all available elements and passes them to the given consumer. - * - * @param consumer callback for each drained element - * @return number of elements drained - */ - public int drain(Consumer consumer) { - return drain(consumer, Integer.MAX_VALUE); - } - - /** - * Drains up to {@code limit} elements to the given consumer. - * - * @param consumer element consumer - * @param limit maximum number of elements to drain - * @return number of elements drained - */ - public int drain(Consumer consumer, int limit) { - int count = 0; - E e; - while (count < limit && (e = poll()) != null) { - consumer.accept(e); - count++; - } - return count; - } } From fb3d71843a79f6a4970c9545a7f78dcaa000ae32 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Tue, 4 Nov 2025 11:46:07 +0100 Subject: [PATCH 03/18] Introduce varhandle based queues --- dd-java-agent/agent-llmobs/build.gradle | 2 + .../trace/llmobs/EvalProcessingWorker.java | 11 +- dd-trace-core/build.gradle | 1 + .../trace/common/metrics/Aggregator.java | 8 +- .../metrics/ConflatingMetricsAggregator.java | 10 +- .../trace/common/metrics/OkHttpSink.java | 9 +- .../common/writer/SpanSamplingWorker.java | 9 +- .../common/writer/TraceProcessingWorker.java | 21 +-- .../trace/core/PendingTraceBuffer.java | 7 +- .../DefaultDataStreamsMonitoring.java | 5 +- .../stacktrace/queue/MPSCQueueBenchmark.java | 78 ++++++++++ .../stacktrace/queue/SPMCQueueBenchmark.java | 83 ++++++++++ .../stacktrace/queue/SPSCQueueBenchmark.java | 71 +++++++++ .../util/queue/MpscArrayQueueVarHandle.java | 139 +++++++++++++++++ ...scBlockingConsumerArrayQueueVarHandle.java | 92 +++++++++++ .../java/datadog/trace/util/queue/Queues.java | 37 +++++ .../util/queue/SpmcArrayQueueVarHandle.java | 145 ++++++++++++++++++ .../util/queue/SpscArrayQueueVarHandle.java | 139 +++++++++++++++++ .../datadog/trace/util/queue/BaseQueue.java | 24 ++- .../queue/MpscBlockingConsumerArrayQueue.java | 15 +- .../util/queue/MpscArrayQueueTest.groovy | 2 +- 21 files changed, 855 insertions(+), 53 deletions(-) create mode 100644 internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java create mode 100644 internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java create mode 100644 internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java diff --git a/dd-java-agent/agent-llmobs/build.gradle b/dd-java-agent/agent-llmobs/build.gradle index 51886fe6449..bb119d21fab 100644 --- a/dd-java-agent/agent-llmobs/build.gradle +++ b/dd-java-agent/agent-llmobs/build.gradle @@ -28,6 +28,8 @@ dependencies { implementation project(':communication') implementation project(':components:json') implementation project(':internal-api') + api project(':internal-api:internal-api-9') + testImplementation project(':dd-java-agent:testing') } diff --git a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java index 75b2edd7cf6..aaabb2017a6 100644 --- a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java +++ b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java @@ -12,7 +12,8 @@ import datadog.communication.http.OkHttpUtils; import datadog.trace.api.Config; import datadog.trace.llmobs.domain.LLMObsEval; -import datadog.trace.util.queue.MpscArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; @@ -34,7 +35,7 @@ public class EvalProcessingWorker implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(EvalProcessingWorker.class); - private final MpscArrayQueue queue; + private final BaseQueue queue; private final Thread serializerThread; public EvalProcessingWorker( @@ -43,7 +44,7 @@ public EvalProcessingWorker( final TimeUnit timeUnit, final SharedCommunicationObjects sco, Config config) { - this.queue = new MpscArrayQueue<>(capacity); + this.queue = Queues.mpscArrayQueue(capacity); boolean isAgentless = config.isLlmObsAgentlessEnabled(); if (isAgentless && (config.getApiKey() == null || config.getApiKey().isEmpty())) { @@ -98,7 +99,7 @@ public static class EvalSerializingHandler implements Runnable { private static final Logger log = LoggerFactory.getLogger(EvalSerializingHandler.class); private static final int FLUSH_THRESHOLD = 50; - private final MpscArrayQueue queue; + private final BaseQueue queue; private final long ticksRequiredToFlush; private long lastTicks; @@ -111,7 +112,7 @@ public static class EvalSerializingHandler implements Runnable { private final List buffer = new ArrayList<>(); public EvalSerializingHandler( - final MpscArrayQueue queue, + final BaseQueue queue, final long flushInterval, final TimeUnit timeUnit, final HttpUrl submissionUrl, diff --git a/dd-trace-core/build.gradle b/dd-trace-core/build.gradle index 4b2dc87612c..a7a7613280f 100644 --- a/dd-trace-core/build.gradle +++ b/dd-trace-core/build.gradle @@ -66,6 +66,7 @@ dependencies { api project(':dd-trace-api') api project(':communication') api project(':internal-api') + api project(':internal-api:internal-api-9') implementation project(':components:json') implementation project(':utils:container-utils') implementation project(':utils:socket-utils') diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java index 00d0b6cec1d..6c868019b21 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java @@ -4,7 +4,7 @@ import datadog.trace.common.metrics.SignalItem.StopSignal; import datadog.trace.core.util.LRUCache; -import datadog.trace.util.queue.MpscArrayQueue; +import datadog.trace.util.queue.BaseQueue; import java.util.Iterator; import java.util.Map; import java.util.Queue; @@ -22,7 +22,7 @@ final class Aggregator implements Runnable { private static final Logger log = LoggerFactory.getLogger(Aggregator.class); private final Queue batchPool; - private final MpscArrayQueue inbox; + private final BaseQueue inbox; private final LRUCache aggregates; private final ConcurrentMap pending; private final Set commonKeys; @@ -39,7 +39,7 @@ final class Aggregator implements Runnable { Aggregator( MetricWriter writer, Queue batchPool, - MpscArrayQueue inbox, + BaseQueue inbox, ConcurrentMap pending, final Set commonKeys, int maxAggregates, @@ -60,7 +60,7 @@ final class Aggregator implements Runnable { Aggregator( MetricWriter writer, Queue batchPool, - MpscArrayQueue inbox, + BaseQueue inbox, ConcurrentMap pending, final Set commonKeys, int maxAggregates, diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java index 60c7ee446c8..fef5e2e3d5b 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java @@ -33,8 +33,8 @@ import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.core.monitor.HealthMetrics; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.MpscArrayQueue; -import datadog.trace.util.queue.SpmcArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -93,7 +93,7 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve private final ConcurrentHashMap pending; private final ConcurrentHashMap keys; private final Thread thread; - private final MpscArrayQueue inbox; + private final BaseQueue inbox; private final Sink sink; private final Aggregator aggregator; private final long reportingInterval; @@ -176,8 +176,8 @@ public ConflatingMetricsAggregator( long reportingInterval, TimeUnit timeUnit) { this.ignoredResources = ignoredResources; - this.inbox = new MpscArrayQueue<>(queueSize); - this.batchPool = new SpmcArrayQueue<>(maxAggregates); + this.inbox = Queues.mpscArrayQueue(queueSize); + this.batchPool = Queues.spmcArrayQueue(maxAggregates); this.pending = new ConcurrentHashMap<>(maxAggregates * 4 / 3); this.keys = new ConcurrentHashMap<>(); this.features = features; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java index b717a3dbcf1..45a39f94daf 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java @@ -10,7 +10,8 @@ import static java.util.concurrent.TimeUnit.SECONDS; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.SpscArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collections; @@ -36,7 +37,7 @@ public final class OkHttpSink implements Sink, EventListener { private final OkHttpClient client; private final HttpUrl metricsUrl; private final List listeners; - private final SpscArrayQueue enqueuedRequests = new SpscArrayQueue<>(16); + private final BaseQueue enqueuedRequests = Queues.spscArrayQueue(16); private final AtomicLong lastRequestTime = new AtomicLong(); private final AtomicLong asyncRequestCounter = new AtomicLong(); private final boolean bufferingEnabled; @@ -157,9 +158,9 @@ private void handleFailure(okhttp3.Response response) throws IOException { private static final class Sender implements AgentTaskScheduler.Task { - private final SpscArrayQueue inbox; + private final BaseQueue inbox; - private Sender(SpscArrayQueue inbox) { + private Sender(BaseQueue inbox) { this.inbox = inbox; } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java index f76f5aa2073..b5a9c3074d1 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java @@ -9,7 +9,8 @@ import datadog.trace.common.sampling.SingleSpanSampler; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.MpscBlockingConsumerArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.List; import java.util.Queue; @@ -44,7 +45,7 @@ class DefaultSpanSamplingWorker implements SpanSamplingWorker { private final Thread spanSamplingThread; private final SamplingHandler samplingHandler; - private final MpscBlockingConsumerArrayQueue spanSamplingQueue; + private final BaseQueue spanSamplingQueue; private final Queue primaryQueue; private final Queue secondaryQueue; private final SingleSpanSampler singleSpanSampler; @@ -61,7 +62,7 @@ protected DefaultSpanSamplingWorker( DroppingPolicy droppingPolicy) { this.samplingHandler = new SamplingHandler(); this.spanSamplingThread = newAgentThread(SPAN_SAMPLING_PROCESSOR, samplingHandler); - this.spanSamplingQueue = new MpscBlockingConsumerArrayQueue<>(capacity); + this.spanSamplingQueue = Queues.mpscBlockingConsumerArrayQueue(capacity); this.primaryQueue = primaryQueue; this.secondaryQueue = secondaryQueue; this.singleSpanSampler = singleSpanSampler; @@ -171,7 +172,7 @@ public void onEvent(Object event) { } } - private void consumeBatch(MpscBlockingConsumerArrayQueue queue) { + private void consumeBatch(BaseQueue queue) { queue.drain(this::onEvent, queue.size()); } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java index bb730045c54..6464653e784 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java @@ -15,7 +15,8 @@ import datadog.trace.core.CoreSpan; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.MpscArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -35,8 +36,8 @@ public class TraceProcessingWorker implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(TraceProcessingWorker.class); private final PrioritizationStrategy prioritizationStrategy; - private final MpscArrayQueue primaryQueue; - private final MpscArrayQueue secondaryQueue; + private final BaseQueue primaryQueue; + private final BaseQueue secondaryQueue; private final TraceSerializingHandler serializingHandler; private final Thread serializerThread; private final int capacity; @@ -120,14 +121,14 @@ public long getRemainingCapacity() { return primaryQueue.remainingCapacity(); } - private static MpscArrayQueue createQueue(int capacity) { - return new MpscArrayQueue<>(capacity); + private static BaseQueue createQueue(int capacity) { + return Queues.mpscArrayQueue(capacity); } public static class TraceSerializingHandler implements Runnable { - private final MpscArrayQueue primaryQueue; - private final MpscArrayQueue secondaryQueue; + private final BaseQueue primaryQueue; + private final BaseQueue secondaryQueue; private final HealthMetrics healthMetrics; private final long ticksRequiredToFlush; private final boolean doTimeFlush; @@ -135,8 +136,8 @@ public static class TraceSerializingHandler implements Runnable { private long lastTicks; public TraceSerializingHandler( - final MpscArrayQueue primaryQueue, - final MpscArrayQueue secondaryQueue, + final BaseQueue primaryQueue, + final BaseQueue secondaryQueue, final HealthMetrics healthMetrics, final PayloadDispatcher payloadDispatcher, final long flushInterval, @@ -237,7 +238,7 @@ private boolean shouldFlush() { return false; } - private void consumeBatch(MpscArrayQueue queue) { + private void consumeBatch(BaseQueue queue) { queue.drain(this::onEvent, queue.size()); } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java index bfb74c34455..d00cc73ee53 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java @@ -12,7 +12,8 @@ import datadog.trace.api.time.TimeSource; import datadog.trace.common.writer.TraceDumpJsonExporter; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.MpscBlockingConsumerArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; @@ -62,7 +63,7 @@ private static class DelayingPendingTraceBuffer extends PendingTraceBuffer { private static final CommandElement DUMP_ELEMENT = new CommandElement(); private static final CommandElement STAND_IN_ELEMENT = new CommandElement(); - private final MpscBlockingConsumerArrayQueue queue; + private final BaseQueue queue; private final Thread worker; private final TimeSource timeSource; @@ -292,7 +293,7 @@ public DelayingPendingTraceBuffer( Config config, SharedCommunicationObjects sharedCommunicationObjects, HealthMetrics healthMetrics) { - this.queue = new MpscBlockingConsumerArrayQueue<>(bufferSize); + this.queue = Queues.mpscBlockingConsumerArrayQueue(bufferSize); this.worker = newAgentThread(TRACE_MONITOR, new Worker()); this.timeSource = timeSource; boolean runningSpansEnabled = config.isLongRunningTraceEnabled(); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java index 88d4845fb2c..5ac444169f7 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java @@ -28,7 +28,8 @@ import datadog.trace.core.DDSpan; import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.MpscArrayQueue; +import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.Queues; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -52,7 +53,7 @@ public class DefaultDataStreamsMonitoring implements DataStreamsMonitoring, Even new StatsPoint(DataStreamsTags.EMPTY, 0, 0, 0, 0, 0, 0, 0, null); private final Map> timeToBucket = new HashMap<>(); - private final MpscArrayQueue inbox = new MpscArrayQueue<>(1024); + private final BaseQueue inbox = Queues.mpscArrayQueue(1024); private final DatastreamsPayloadWriter payloadWriter; private final DDAgentFeaturesDiscovery features; private final TimeSource timeSource; diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java new file mode 100644 index 00000000000..1c504a8302b --- /dev/null +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java @@ -0,0 +1,78 @@ +package datadog.trace.util.stacktrace.queue; + +import datadog.trace.util.queue.MpscArrayQueueVarHandle; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +MPSCQueueBenchmark.queueTest 1024 thrpt 145.261 ops/us +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 84.185 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 61.076 ops/us +MPSCQueueBenchmark.queueTest 65536 thrpt 187.609 ops/us +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 117.097 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 70.512 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class MPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + MpscArrayQueueVarHandle queue; + CountDownLatch consumerReady; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new MpscArrayQueueVarHandle<>(capacity); + consumerReady = new CountDownLatch(1); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(4) + public void produce(QueueState state) { + try { + state.consumerReady.await(); // wait until consumer is ready + } catch (InterruptedException ignored) { + } + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + state.consumerReady.countDown(); // signal producers can start + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java new file mode 100644 index 00000000000..a88f17a7a48 --- /dev/null +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java @@ -0,0 +1,83 @@ +package datadog.trace.util.stacktrace.queue; + +import datadog.trace.util.queue.SpmcArrayQueueVarHandle; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.LockSupport; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/* +SPMCQueueBenchmark.spmc N/A thrpt 5 484.103 ± 64.709 ops/us +SPMCQueueBenchmark.spmc:consumer N/A thrpt 5 466.954 ± 65.712 ops/us +SPMCQueueBenchmark.spmc:producer N/A thrpt 5 17.149 ± 1.541 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@State(Scope.Group) +@Fork(value = 1, warmups = 0) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +public class SPMCQueueBenchmark { + + private static final int QUEUE_CAPACITY = 1024; + private static final int ITEMS_TO_PRODUCE = 100_000; + + private SpmcArrayQueueVarHandle queue; + private AtomicInteger produced; + private AtomicInteger consumed; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpmcArrayQueueVarHandle<>(QUEUE_CAPACITY); + produced = new AtomicInteger(0); + consumed = new AtomicInteger(0); + + // Pre-fill queue for warmup safety + int warmupFill = Math.min(QUEUE_CAPACITY / 2, ITEMS_TO_PRODUCE); + for (int i = 0; i < warmupFill; i++) { + queue.offer(i); + produced.incrementAndGet(); + } + } + + // Single producer in the group + @Benchmark + @Group("spmc") + @GroupThreads(1) + public void producer() { + int i = produced.getAndIncrement(); + if (i < ITEMS_TO_PRODUCE) { + while (!queue.offer(i)) { + LockSupport.parkNanos(1L); + } + } + } + + // Multiple consumers in the group + @Benchmark + @Group("spmc") + @GroupThreads(4) // adjust number of consumers + public int consumer() { + while (true) { + Integer val = queue.poll(); + if (val != null) { + consumed.incrementAndGet(); + return val; + } + + if (produced.get() >= ITEMS_TO_PRODUCE && queue.isEmpty()) { + return 0; + } + + LockSupport.parkNanos(1L); + } + } +} diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java new file mode 100644 index 00000000000..26db96ef1a0 --- /dev/null +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java @@ -0,0 +1,71 @@ +package datadog.trace.util.stacktrace.queue; + +import datadog.trace.util.queue.SpscArrayQueueVarHandle; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +SPSCQueueBenchmark.queueTest 1024 thrpt 91.112 ops/us +SPSCQueueBenchmark.queueTest:consume 1024 thrpt 52.640 ops/us +SPSCQueueBenchmark.queueTest:produce 1024 thrpt 38.472 ops/us +SPSCQueueBenchmark.queueTest 65536 thrpt 140.663 ops/us +SPSCQueueBenchmark.queueTest:consume 65536 thrpt 70.363 ops/us +SPSCQueueBenchmark.queueTest:produce 65536 thrpt 70.300 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class SPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + SpscArrayQueueVarHandle queue; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpscArrayQueueVarHandle<>(capacity); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void produce(QueueState state) { + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java new file mode 100644 index 00000000000..dc32f1b3c27 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java @@ -0,0 +1,139 @@ +package datadog.trace.util.queue; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.VarHandle; +import java.util.concurrent.locks.LockSupport; + +/** + * A Multiple-Producer, Single-Consumer (MPSC) bounded lock-free queue using a circular array and + * VarHandles. + * + *

All operations are wait-free for the consumer and lock-free for producers. + * + * @param the type of elements stored + */ +public class MpscArrayQueueVarHandle extends BaseQueue { + private static final VarHandle ARRAY_HANDLE; + private static final VarHandle HEAD_HANDLE; + private static final VarHandle TAIL_HANDLE; + + static { + try { + final Lookup lookup = MethodHandles.lookup(); + TAIL_HANDLE = lookup.findVarHandle(MpscArrayQueueVarHandle.class, "tail", long.class); + HEAD_HANDLE = lookup.findVarHandle(MpscArrayQueueVarHandle.class, "head", long.class); + ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); + } catch (Throwable t) { + throw new IllegalStateException(t); + } + } + + /** The backing array (plain Java array for VarHandle access) */ + private final Object[] buffer; + + // Padding + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** The next free slot for producers */ + private volatile long tail = 0L; + + // Padding + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + + // Padding + @SuppressWarnings("unused") + private long p10, p11, p12, p13, p14, p15, p16; + + /** The next slot to consume (single-threaded) */ + private volatile long head = 0L; + + // Padding + private long q10, q11, q12, q13, q14, q15, q16; + + /** + * Creates a new MPSC queue. + * + * @param requestedCapacity the desired capacity, rounded up to the next power of two if needed + */ + public MpscArrayQueueVarHandle(int requestedCapacity) { + super(requestedCapacity); + this.buffer = new Object[capacity]; + } + + /** + * Attempts to add an element to the queue. + * + *

This method uses a CAS loop on {@code tail} to allow multiple producers to safely claim + * distinct slots. The producer then performs a release-store into the buffer using {@code + * ARRAY_HANDLE.setRelease()}. + * + * @param e the element to add (must be non-null) + * @return {@code true} if the element was enqueued, {@code false} if the queue is full + */ + @Override + public boolean offer(E e) { + if (e == null) { + throw new NullPointerException(); + } + + while (true) { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + int index = (int) (currentTail & mask); + + Object existing = ARRAY_HANDLE.getVolatile(buffer, index); + if (existing != null) { + return false; // queue full + } + + // CAS tail to claim the slot + if (TAIL_HANDLE.compareAndSet(this, currentTail, currentTail + 1)) { + ARRAY_HANDLE.setRelease(buffer, index, e); // publish with release semantics + return true; + } + + // CAS failed → short backoff to reduce contention + LockSupport.parkNanos(1); + } + } + + /** + * Removes and returns the next element, or {@code null} if the queue is empty. + * + *

This method is single-threaded (one consumer). It performs a volatile read of the buffer, + * and then uses {@code setRelease(null)} to free the slot. + * + * @return the dequeued element, or null if the queue is empty + */ + @Override + @SuppressWarnings("unchecked") + public E poll() { + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + int index = (int) (currentHead & mask); + + Object value = ARRAY_HANDLE.getVolatile(buffer, index); + if (value == null) { + return null; // empty + } + + ARRAY_HANDLE.setRelease(buffer, index, null); // mark slot free + HEAD_HANDLE.setVolatile(this, currentHead + 1); // advance head + return (E) value; + } + + @Override + @SuppressWarnings("unchecked") + public E peek() { + int index = (int) ((long) HEAD_HANDLE.getVolatile(this) & mask); + return (E) ARRAY_HANDLE.getVolatile(buffer, index); + } + + @Override + public int size() { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + return (int) (currentTail - currentHead); + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java new file mode 100644 index 00000000000..d9fcd28e733 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -0,0 +1,92 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. + * + *

It features nonblocking offer/poll methods and blocking (condition based) take/put. + */ +public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle { + // Blocking controls + private final ReentrantLock lock = new ReentrantLock(); + private final Condition notEmpty = lock.newCondition(); + private final Condition notFull = lock.newCondition(); + + public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { + super(capacity); + } + + @Override + public boolean offer(E e) { + final boolean success = super.offer(e); + if (success) { + signalNotEmpty(); + } + return success; + } + + public void put(E e) throws InterruptedException { + while (!offer(e)) { + awaitNotFull(); + } + } + + @Override + public E poll() { + final E ret = super.poll(); + if (ret != null) { + signalNotFull(); + } + return ret; + } + + public E take() throws InterruptedException { + E e; + while ((e = poll()) == null) { + awaitNotEmpty(); + } + return e; + } + + private void signalNotEmpty() { + lock.lock(); + try { + notEmpty.signal(); + } finally { + lock.unlock(); + } + } + + private void signalNotFull() { + lock.lock(); + try { + notFull.signal(); + } finally { + lock.unlock(); + } + } + + private void awaitNotEmpty() throws InterruptedException { + lock.lockInterruptibly(); + try { + while (isEmpty()) { + notEmpty.await(); + } + } finally { + lock.unlock(); + } + } + + private void awaitNotFull() throws InterruptedException { + lock.lockInterruptibly(); + try { + while (size() == capacity) { + notFull.await(); + } + } finally { + lock.unlock(); + } + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java new file mode 100644 index 00000000000..78cc3fcae70 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java @@ -0,0 +1,37 @@ +package datadog.trace.util.queue; + +import datadog.environment.JavaVirtualMachine; + +public final class Queues { + private static final boolean CAN_USE_VARHANDLES = JavaVirtualMachine.isJavaVersionAtLeast(9); + + private Queues() {} + + public static BaseQueue mpscArrayQueue(int requestedCapacity) { + if (CAN_USE_VARHANDLES) { + return new MpscArrayQueueVarHandle<>(requestedCapacity); + } + return new MpscArrayQueue<>(requestedCapacity); + } + + public static BaseQueue spmcArrayQueue(int requestedCapacity) { + if (CAN_USE_VARHANDLES) { + return new SpmcArrayQueueVarHandle<>(requestedCapacity); + } + return new SpmcArrayQueue<>(requestedCapacity); + } + + public static BaseQueue mpscBlockingConsumerArrayQueue(int requestedCapacity) { + if (CAN_USE_VARHANDLES) { + return new MpscBlockingConsumerArrayQueueVarHandle<>(requestedCapacity); + } + return new MpscBlockingConsumerArrayQueue<>(requestedCapacity); + } + + public static BaseQueue spscArrayQueue(int requestedCapacity) { + if (CAN_USE_VARHANDLES) { + return new SpscArrayQueueVarHandle<>(requestedCapacity); + } + return new SpscArrayQueue<>(requestedCapacity); + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java new file mode 100644 index 00000000000..8a5b0523f37 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java @@ -0,0 +1,145 @@ +package datadog.trace.util.queue; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.util.concurrent.locks.LockSupport; + +/** + * A Single-Producer, Multiple-Consumer (SPMC) bounded, lock-free queue based on a circular array. + * + *

All operations are wait-free for the single producer and lock-free for consumers. + * + * @param the element type + */ +public class SpmcArrayQueueVarHandle extends BaseQueue { + + private static final VarHandle HEAD_HANDLE; + private static final VarHandle TAIL_HANDLE; + private static final VarHandle ARRAY_HANDLE; + + static { + try { + final MethodHandles.Lookup lookup = MethodHandles.lookup(); + HEAD_HANDLE = lookup.findVarHandle(SpmcArrayQueueVarHandle.class, "head", long.class); + TAIL_HANDLE = lookup.findVarHandle(SpmcArrayQueueVarHandle.class, "tail", long.class); + ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); + } catch (ReflectiveOperationException e) { + throw new ExceptionInInitializerError(e); + } + } + + /** Backing array buffer. */ + private final Object[] buffer; + + // Padding + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** Tail index (producer-only). */ + private volatile long tail = 0L; + + // Padding + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + + // Padding + @SuppressWarnings("unused") + private long p10, p11, p12, p13, p14, p15, p16; + + /** Head index (claimed atomically by multiple consumers). */ + private volatile long head = 0L; + + // Padding + @SuppressWarnings("unused") + private long q10, q11, q12, q13, q14, q15, q16; + + /** + * Creates a new SPMC queue with the given capacity. + * + * @param requestedCapacity the desired capacity, rounded up to the next power of two if needed + */ + public SpmcArrayQueueVarHandle(int requestedCapacity) { + super(requestedCapacity); + this.buffer = new Object[capacity]; + } + + /** + * Attempts to enqueue the given element. + * + *

This method is called by a single producer, so no CAS is required. It uses {@code + * setRelease} to publish the element and the new tail value. + * + * @param e the element to add + * @return {@code true} if added, {@code false} if the queue is full + * @throws NullPointerException if {@code e} is null + */ + @Override + public boolean offer(E e) { + if (e == null) { + throw new NullPointerException(); + } + + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + int index = (int) (currentTail & mask); + + // Check if slot is still occupied (queue full) + Object existing = ARRAY_HANDLE.getVolatile(buffer, index); + if (existing != null) { + return false; // full + } + + // Publish the element (release ensures write is visible to consumers) + ARRAY_HANDLE.setRelease(buffer, index, e); + + // Advance tail (release ensures the enqueue is visible to consumers) + TAIL_HANDLE.setRelease(this, currentTail + 1); + + return true; + } + + /** + * Removes and returns the next element, or {@code null} if the queue is empty. + * + *

Consumers compete via CAS on {@code head}. The successful thread claims the index and clears + * the slot with release semantics. + * + * @return the dequeued element, or {@code null} if empty + */ + @Override + @SuppressWarnings("unchecked") + public E poll() { + while (true) { + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + int index = (int) (currentHead & mask); + + Object value = ARRAY_HANDLE.getAcquire(buffer, index); + if (value == null) { + return null; // queue empty or not yet visible + } + + // Attempt to claim the element + if (HEAD_HANDLE.compareAndSet(this, currentHead, currentHead + 1)) { + // CAS succeeded: this consumer owns the slot + ARRAY_HANDLE.setRelease(buffer, index, null); // mark slot free + return (E) value; + } + + // CAS failed — another consumer took it, retry + LockSupport.parkNanos(1L); + } + } + + @Override + @SuppressWarnings("unchecked") + public E peek() { + int index = (int) ((long) HEAD_HANDLE.getVolatile(this) & mask); + return (E) ARRAY_HANDLE.getVolatile(buffer, index); + } + + @Override + public int size() { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + return (int) (currentTail - currentHead); + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java new file mode 100644 index 00000000000..2469adc3441 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java @@ -0,0 +1,139 @@ +package datadog.trace.util.queue; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; + +/** + * A high-performance Single-Producer, Single-Consumer (SPSC) bounded queue using a circular buffer + * and VarHandle-based release/acquire memory semantics. + * + *

It is completely lock-free and wait-free, relying solely on release/acquire ordering for + * correctness and visibility. + * + * @param the type of elements held in this queue + */ +public class SpscArrayQueueVarHandle extends BaseQueue { + /** Backing array storing elements. */ + private final Object[] buffer; + + // ===================== Padding to avoid false sharing ===================== + + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** Tail index (producer writes). */ + private volatile long tail = 0L; + + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + @SuppressWarnings("unused") + private long p10, p11, p12, p13, p14, p15, p16; + + /** Head index (consumer writes). */ + private volatile long head = 0L; + + @SuppressWarnings("unused") + private long q10, q11, q12, q13, q14, q15, q16; + + private static final VarHandle HEAD_HANDLE; + private static final VarHandle TAIL_HANDLE; + private static final VarHandle ARRAY_HANDLE; + + static { + try { + final MethodHandles.Lookup lookup = MethodHandles.lookup(); + HEAD_HANDLE = lookup.findVarHandle(SpscArrayQueueVarHandle.class, "head", long.class); + TAIL_HANDLE = lookup.findVarHandle(SpscArrayQueueVarHandle.class, "tail", long.class); + ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); + } catch (Throwable t) { + throw new IllegalStateException(t); + } + } + + /** + * Creates a new SPSC queue with the specified capacity. Capacity must be a power of two. + * + * @param requestedCapacity the desired capacity, rounded up to the next power of two if needed + */ + public SpscArrayQueueVarHandle(int requestedCapacity) { + super(requestedCapacity); + this.buffer = new Object[capacity]; + } + + // ===================== OFFER (Producer only) ===================== + + /** + * Enqueues an element if space is available. + * + *

Since only one producer exists, this method uses simple volatile semantics and never + * contends or retries. + * + * @param e the element to enqueue + * @return {@code true} if enqueued, {@code false} if the queue is full + * @throws NullPointerException if {@code e} is null + */ + @Override + public boolean offer(E e) { + if (e == null) { + throw new NullPointerException(); + } + + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + int index = (int) (currentTail & mask); + + // Check if the next slot is still occupied + Object existing = ARRAY_HANDLE.getVolatile(buffer, index); + if (existing != null) { + return false; // queue full + } + + // Publish element (release semantics) + ARRAY_HANDLE.setRelease(buffer, index, e); + + // Advance tail (release ensures enqueue visibility) + TAIL_HANDLE.setRelease(this, currentTail + 1); + return true; + } + + /** + * Dequeues and returns the next element, or {@code null} if the queue is empty. + * + *

Since only one consumer exists, this method is race-free and does not need CAS. It uses + * acquire semantics to ensure the element is fully visible. + * + * @return the dequeued element, or {@code null} if empty + */ + @Override + @SuppressWarnings("unchecked") + public E poll() { + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + int index = (int) (currentHead & mask); + + Object value = ARRAY_HANDLE.getAcquire(buffer, index); + if (value == null) { + return null; // queue empty + } + + // Clear slot (release to make it visible to producer) + ARRAY_HANDLE.setRelease(buffer, index, null); + + // Advance head (release to ensure ordering) + HEAD_HANDLE.setRelease(this, currentHead + 1); + + return (E) value; + } + + @Override + @SuppressWarnings("unchecked") + public E peek() { + int index = (int) ((long) HEAD_HANDLE.getVolatile(this) & mask); + return (E) ARRAY_HANDLE.getVolatile(buffer, index); + } + + @Override + public int size() { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + return (int) (currentTail - currentHead); + } +} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java index 2edc5c19214..7cbec5f5794 100644 --- a/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java +++ b/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java @@ -3,14 +3,16 @@ import static datadog.trace.util.BitUtils.nextPowerOfTwo; import java.util.AbstractQueue; +import java.util.Collection; import java.util.Iterator; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.LockSupport; import java.util.function.Consumer; import java.util.function.Supplier; import javax.annotation.Nonnull; -public abstract class BaseQueue extends AbstractQueue { +public abstract class BaseQueue extends AbstractQueue implements BlockingQueue { /** The capacity of the queue (must be a power of two) */ protected final int capacity; @@ -194,4 +196,24 @@ public int remainingCapacity() { public int capacity() { return capacity; } + + @Override + public void put(E e) throws InterruptedException { + throw new UnsupportedOperationException("Not implementing blocking operations for producers"); + } + + @Override + public E take() throws InterruptedException { + throw new UnsupportedOperationException("Not implementing blocking operations for consumers"); + } + + @Override + public int drainTo(Collection c) { + return drainTo(c, Integer.MAX_VALUE); + } + + @Override + public int drainTo(Collection c, int maxElements) { + return drain(c::add, maxElements); + } } diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java index 8efd593abcf..5ad007b000b 100644 --- a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java +++ b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java @@ -1,7 +1,5 @@ package datadog.trace.util.queue; -import java.util.Collection; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; @@ -10,8 +8,7 @@ * *

It features nonblocking offer/poll methods and blocking (condition based) take/put. */ -public class MpscBlockingConsumerArrayQueue extends MpscArrayQueue - implements BlockingQueue { +public class MpscBlockingConsumerArrayQueue extends MpscArrayQueue { // Blocking controls private final ReentrantLock lock = new ReentrantLock(); private final Condition notEmpty = lock.newCondition(); @@ -92,14 +89,4 @@ private void awaitNotFull() throws InterruptedException { lock.unlock(); } } - - @Override - public int drainTo(Collection c) { - return drainTo(c, Integer.MAX_VALUE); - } - - @Override - public int drainTo(Collection c, int maxElements) { - return drain(c::add, maxElements); - } } diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy index ef7d1e25094..e0cda0dc21d 100644 --- a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy +++ b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy @@ -12,7 +12,7 @@ class MpscArrayQueueTest extends AbstractQueueTest { given: int total = 1000 int producers = 4 - queue = new MpscArrayQueue<>(1024) + queue = datadog.trace.util.queue.Queues. def results = Collections.synchronizedList([]) def executor = Executors.newFixedThreadPool(producers) def latch = new CountDownLatch(producers) From 9c4940a8e484de335863cd547b98af5118c3dddc Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Tue, 4 Nov 2025 15:50:50 +0100 Subject: [PATCH 04/18] Try to optimise more java 9+ implementation --- .../trace/util/queue/BaseQueueVarHandle.java | 204 ++++++++++++++++++ .../util/queue/MpscArrayQueueVarHandle.java | 34 +-- ...scBlockingConsumerArrayQueueVarHandle.java | 97 ++++----- .../util/queue/SpmcArrayQueueVarHandle.java | 44 ++-- .../util/queue/SpscArrayQueueVarHandle.java | 137 +++++++++--- .../trace/util/queue/AbstractQueueTest.groovy | 163 ++++++++++++++ .../queue/MpscArrayQueueVarHandleTest.groovy | 61 ++++++ ...kingConsumerArrayQueueVarHandleTest.groovy | 124 +++++++++++ .../queue/SpmcArrayQueueVarHandleTest.groovy | 59 +++++ .../queue/SpscArrayQueueVarHandleTest.groovy | 42 ++++ .../util/queue/MpscArrayQueueTest.groovy | 2 +- 11 files changed, 841 insertions(+), 126 deletions(-) create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java create mode 100644 internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy create mode 100644 internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy create mode 100644 internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy create mode 100644 internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy create mode 100644 internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpscArrayQueueVarHandleTest.groovy diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java new file mode 100644 index 00000000000..4fc54b5140e --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java @@ -0,0 +1,204 @@ +package datadog.trace.util.queue; + +import java.util.Collection; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; +import java.util.function.Consumer; +import java.util.function.Supplier; +import javax.annotation.Nonnull; + +public abstract class BaseQueueVarHandle extends BaseQueue { + public BaseQueueVarHandle(int capacity) { + super(capacity); + } + + /** + * Timed offer with progressive backoff. + * + *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → + * park backoff strategy to reduce CPU usage under contention. + * + * @param e the element to insert + * @param timeout maximum time to wait + * @param unit time unit of timeout + * @return {@code true} if inserted, {@code false} if timeout expires + * @throws InterruptedException if interrupted while waiting + */ + public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + if (e == null) { + throw new NullPointerException(); + } + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idle = 0; + + while (true) { + if (offer(e)) return true; + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) return false; + + // Progressive backoff + if (idle < 100) { + // spin + } else if (idle < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(1_000L); + } + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + idle++; + } + } + + /** + * Polls with a timeout using progressive backoff. + * + * @param timeout max wait time + * @param unit time unit + * @return the head element, or null if timed out + * @throws InterruptedException if interrupted + */ + public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + if (timeout <= 0) { + return poll(); + } + + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + E e = poll(); + if (e != null) return e; + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) return null; + + if (idleCount < 100) { + // spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(1_000L); + } + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + idleCount++; + } + } + + /** + * Drains all available elements from the queue to a consumer. + * + *

This is efficient since it avoids repeated size() checks and returns immediately when empty. + * + * @param consumer a consumer to accept elements + * @return number of elements drained + */ + public int drain(Consumer consumer) { + return drain(consumer, Integer.MAX_VALUE); + } + + /** + * Drains up to {@code limit} elements from the queue to a consumer. + * + *

This method is useful for batch processing. + * + *

Each element is removed atomically using poll() and passed to the consumer. + * + * @param consumer a consumer to accept elements + * @param limit maximum number of elements to drain + * @return number of elements drained + */ + public int drain(Consumer consumer, int limit) { + int count = 0; + E e; + while (count < limit && (e = poll()) != null) { + consumer.accept(e); + count++; + } + return count; + } + + /** + * Fills the queue with elements provided by the supplier until either: - the queue is full, or - + * the supplier runs out of elements (returns null) + * + * @param supplier a supplier of elements + * @param limit maximum number of elements to attempt to insert + * @return number of elements successfully enqueued + */ + public int fill(@Nonnull Supplier supplier, int limit) { + if (limit <= 0) { + return 0; + } + + int added = 0; + while (added < limit) { + E e = supplier.get(); + if (e == null) { + break; // stop if supplier exhausted + } + + if (offer(e)) { + added++; + } else { + break; // queue is full + } + } + return added; + } + + /** + * Iterator is not supported. + * + * @throws UnsupportedOperationException always + */ + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the remaining capacity. + * + * @return number of additional elements this queue can accept + */ + public int remainingCapacity() { + return capacity - size(); + } + + /** + * Returns the maximum queue capacity. + * + * @return number of total elements this queue can accept + */ + public int capacity() { + return capacity; + } + + @Override + public void put(E e) throws InterruptedException { + throw new UnsupportedOperationException("Not implementing blocking operations for producers"); + } + + @Override + public E take() throws InterruptedException { + throw new UnsupportedOperationException("Not implementing blocking operations for consumers"); + } + + @Override + public int drainTo(Collection c) { + return drainTo(c, Integer.MAX_VALUE); + } + + @Override + public int drainTo(Collection c, int maxElements) { + return drain(c::add, maxElements); + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java index dc32f1b3c27..aa1471b4ad9 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java @@ -80,22 +80,22 @@ public boolean offer(E e) { } while (true) { - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - int index = (int) (currentTail & mask); + final long currentTail = (long) TAIL_HANDLE.getVolatile(this); + final long wrapPoint = currentTail - capacity; + final long currentHead = (long) HEAD_HANDLE.getVolatile(this); - Object existing = ARRAY_HANDLE.getVolatile(buffer, index); - if (existing != null) { - return false; // queue full + if (wrapPoint >= currentHead) { + return false; // full } - // CAS tail to claim the slot if (TAIL_HANDLE.compareAndSet(this, currentTail, currentTail + 1)) { - ARRAY_HANDLE.setRelease(buffer, index, e); // publish with release semantics + final int index = (int) (currentTail & mask); + ARRAY_HANDLE.setRelease(buffer, index, e); return true; } - // CAS failed → short backoff to reduce contention - LockSupport.parkNanos(1); + // Backoff on contention + LockSupport.parkNanos(1L); } } @@ -110,30 +110,30 @@ public boolean offer(E e) { @Override @SuppressWarnings("unchecked") public E poll() { - long currentHead = (long) HEAD_HANDLE.getVolatile(this); - int index = (int) (currentHead & mask); + final long currentHead = (long) HEAD_HANDLE.getOpaque(this); + final int index = (int) (currentHead & mask); - Object value = ARRAY_HANDLE.getVolatile(buffer, index); + Object value = ARRAY_HANDLE.getAcquire(buffer, index); if (value == null) { - return null; // empty + return null; } - ARRAY_HANDLE.setRelease(buffer, index, null); // mark slot free - HEAD_HANDLE.setVolatile(this, currentHead + 1); // advance head + ARRAY_HANDLE.setOpaque(buffer, index, null); // clear slot + HEAD_HANDLE.setOpaque(this, currentHead + 1); return (E) value; } @Override @SuppressWarnings("unchecked") public E peek() { - int index = (int) ((long) HEAD_HANDLE.getVolatile(this) & mask); + final int index = (int) ((long) HEAD_HANDLE.getOpaque(this) & mask); return (E) ARRAY_HANDLE.getVolatile(buffer, index); } @Override public int size() { + long currentHead = head; // non-volatile read long currentTail = (long) TAIL_HANDLE.getVolatile(this); - long currentHead = (long) HEAD_HANDLE.getVolatile(this); return (int) (currentTail - currentHead); } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java index d9fcd28e733..a5b104c9909 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -1,7 +1,8 @@ package datadog.trace.util.queue; -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.util.concurrent.locks.LockSupport; /** * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. @@ -9,10 +10,21 @@ *

It features nonblocking offer/poll methods and blocking (condition based) take/put. */ public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle { - // Blocking controls - private final ReentrantLock lock = new ReentrantLock(); - private final Condition notEmpty = lock.newCondition(); - private final Condition notFull = lock.newCondition(); + /** Consumer thread reference for wake-up. */ + private volatile Thread consumerThread; + + private static final VarHandle CONSUMER_THREAD_HANDLE; + + static { + try { + MethodHandles.Lookup l = MethodHandles.lookup(); + CONSUMER_THREAD_HANDLE = + l.findVarHandle( + MpscBlockingConsumerArrayQueueVarHandle.class, "consumerThread", Thread.class); + } catch (Throwable t) { + throw new IllegalStateException(t); + } + } public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { super(capacity); @@ -22,71 +34,34 @@ public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { public boolean offer(E e) { final boolean success = super.offer(e); if (success) { - signalNotEmpty(); + Thread c = (Thread) CONSUMER_THREAD_HANDLE.getVolatile(this); + if (c != null) LockSupport.unpark(c); } return success; } public void put(E e) throws InterruptedException { - while (!offer(e)) { - awaitNotFull(); - } - } - - @Override - public E poll() { - final E ret = super.poll(); - if (ret != null) { - signalNotFull(); - } - return ret; + // in this variant we should not use a blocking put since we do not support blocking producers + throw new UnsupportedOperationException(); } + /** + * Retrieves and removes the head element, waiting if necessary until one becomes available. + * + * @return the next element (never null) + * @throws InterruptedException if interrupted while waiting + */ public E take() throws InterruptedException { - E e; - while ((e = poll()) == null) { - awaitNotEmpty(); - } - return e; - } - - private void signalNotEmpty() { - lock.lock(); - try { - notEmpty.signal(); - } finally { - lock.unlock(); - } - } - - private void signalNotFull() { - lock.lock(); - try { - notFull.signal(); - } finally { - lock.unlock(); - } - } + consumerThread = Thread.currentThread(); + while (true) { + E e = poll(); + if (e != null) return e; - private void awaitNotEmpty() throws InterruptedException { - lock.lockInterruptibly(); - try { - while (isEmpty()) { - notEmpty.await(); - } - } finally { - lock.unlock(); - } - } - - private void awaitNotFull() throws InterruptedException { - lock.lockInterruptibly(); - try { - while (size() == capacity) { - notFull.await(); + if (Thread.interrupted()) { + throw new InterruptedException(); } - } finally { - lock.unlock(); + // Block until producer unparks us + LockSupport.park(this); } } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java index 8a5b0523f37..ea0c3f3d235 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java @@ -2,7 +2,6 @@ import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; -import java.util.concurrent.locks.LockSupport; /** * A Single-Producer, Multiple-Consumer (SPMC) bounded, lock-free queue based on a circular array. @@ -79,21 +78,16 @@ public boolean offer(E e) { throw new NullPointerException(); } - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - int index = (int) (currentTail & mask); - - // Check if slot is still occupied (queue full) - Object existing = ARRAY_HANDLE.getVolatile(buffer, index); - if (existing != null) { - return false; // full + long currentTail = tail; + long wrapPoint = currentTail - capacity; + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + if (wrapPoint >= currentHead) { + return false; // queue full } - // Publish the element (release ensures write is visible to consumers) + int index = (int) (currentTail & mask); ARRAY_HANDLE.setRelease(buffer, index, e); - - // Advance tail (release ensures the enqueue is visible to consumers) - TAIL_HANDLE.setRelease(this, currentTail + 1); - + tail = currentTail + 1; return true; } @@ -114,32 +108,40 @@ public E poll() { Object value = ARRAY_HANDLE.getAcquire(buffer, index); if (value == null) { - return null; // queue empty or not yet visible + // Possibly empty + long currentTail = tail; // producer thread only writes + if (currentHead >= currentTail) { + return null; // queue empty + } else { + // Not yet visible, retry + Thread.onSpinWait(); + continue; + } } - // Attempt to claim the element + // Try to claim this slot if (HEAD_HANDLE.compareAndSet(this, currentHead, currentHead + 1)) { - // CAS succeeded: this consumer owns the slot - ARRAY_HANDLE.setRelease(buffer, index, null); // mark slot free + ARRAY_HANDLE.setOpaque(buffer, index, null); return (E) value; } - // CAS failed — another consumer took it, retry - LockSupport.parkNanos(1L); + // Lost race to another consumer + Thread.onSpinWait(); } } @Override @SuppressWarnings("unchecked") public E peek() { - int index = (int) ((long) HEAD_HANDLE.getVolatile(this) & mask); + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + int index = (int) (currentHead & mask); return (E) ARRAY_HANDLE.getVolatile(buffer, index); } @Override public int size() { - long currentTail = (long) TAIL_HANDLE.getVolatile(this); long currentHead = (long) HEAD_HANDLE.getVolatile(this); + long currentTail = tail; return (int) (currentTail - currentHead); } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java index 2469adc3441..6a667cd6592 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java @@ -2,6 +2,8 @@ import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; +import java.util.concurrent.TimeUnit; +import javax.annotation.Nonnull; /** * A high-performance Single-Producer, Single-Consumer (SPSC) bounded queue using a circular buffer @@ -35,6 +37,10 @@ public class SpscArrayQueueVarHandle extends BaseQueue { @SuppressWarnings("unused") private long q10, q11, q12, q13, q14, q15, q16; + // These caches eliminate redundant volatile reads + private long cachedHead = 0L; // visible only to producer + private long cachedTail = 0L; // visible only to consumer + private static final VarHandle HEAD_HANDLE; private static final VarHandle TAIL_HANDLE; private static final VarHandle ARRAY_HANDLE; @@ -65,8 +71,8 @@ public SpscArrayQueueVarHandle(int requestedCapacity) { /** * Enqueues an element if space is available. * - *

Since only one producer exists, this method uses simple volatile semantics and never - * contends or retries. + *

Uses cached head to minimize volatile reads. Only refreshes the head when the queue looks + * full. Writes use release semantics for publication. * * @param e the element to enqueue * @return {@code true} if enqueued, {@code false} if the queue is full @@ -78,20 +84,19 @@ public boolean offer(E e) { throw new NullPointerException(); } - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - int index = (int) (currentTail & mask); + final long currentTail = (long) TAIL_HANDLE.getOpaque(this); + final int index = (int) (currentTail & mask); - // Check if the next slot is still occupied - Object existing = ARRAY_HANDLE.getVolatile(buffer, index); - if (existing != null) { - return false; // queue full + if (currentTail - cachedHead >= capacity) { + // Refresh cached head (read from consumer side) + cachedHead = (long) HEAD_HANDLE.getVolatile(this); + if (currentTail - cachedHead >= capacity) { + return false; // still full + } } - // Publish element (release semantics) - ARRAY_HANDLE.setRelease(buffer, index, e); - - // Advance tail (release ensures enqueue visibility) - TAIL_HANDLE.setRelease(this, currentTail + 1); + ARRAY_HANDLE.setRelease(buffer, index, e); // publish value + TAIL_HANDLE.setOpaque(this, currentTail + 1); // relaxed tail update return true; } @@ -106,27 +111,27 @@ public boolean offer(E e) { @Override @SuppressWarnings("unchecked") public E poll() { - long currentHead = (long) HEAD_HANDLE.getVolatile(this); - int index = (int) (currentHead & mask); - - Object value = ARRAY_HANDLE.getAcquire(buffer, index); - if (value == null) { - return null; // queue empty + final long currentHead = (long) HEAD_HANDLE.getOpaque(this); + final int index = (int) (currentHead & mask); + + if (currentHead >= cachedTail) { + // refresh tail cache + cachedTail = (long) TAIL_HANDLE.getVolatile(this); + if (currentHead >= cachedTail) { + return null; // still empty + } } - // Clear slot (release to make it visible to producer) - ARRAY_HANDLE.setRelease(buffer, index, null); - - // Advance head (release to ensure ordering) - HEAD_HANDLE.setRelease(this, currentHead + 1); - + Object value = ARRAY_HANDLE.getAcquire(buffer, index); + ARRAY_HANDLE.setOpaque(buffer, index, null); // clear slot + HEAD_HANDLE.setOpaque(this, currentHead + 1); // relaxed head update return (E) value; } @Override @SuppressWarnings("unchecked") public E peek() { - int index = (int) ((long) HEAD_HANDLE.getVolatile(this) & mask); + final int index = (int) ((long) HEAD_HANDLE.getOpaque(this) & mask); return (E) ARRAY_HANDLE.getVolatile(buffer, index); } @@ -136,4 +141,84 @@ public int size() { long currentHead = (long) HEAD_HANDLE.getVolatile(this); return (int) (currentTail - currentHead); } + + /** + * Timed offer with progressive backoff. + * + *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → + * park backoff strategy to reduce CPU usage under contention. + * + * @param e the element to insert + * @param timeout maximum time to wait + * @param unit time unit of timeout + * @return {@code true} if inserted, {@code false} if timeout expires + * @throws InterruptedException if interrupted while waiting + */ + @Override + public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + long deadline = System.nanoTime() + unit.toNanos(timeout); + int idle = 0; + + while (true) { + if (offer(e)) return true; + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) return false; + + // progressive spin/yield + if (idle < 100) { + // spin + } else if (idle < 1_000) { + Thread.yield(); + } else { + Thread.onSpinWait(); + } + if (Thread.interrupted()) { + throw new InterruptedException(); + } + idle++; + } + } + + /** + * Polls with a timeout using progressive backoff. + * + * @param timeout max wait time + * @param unit time unit + * @return the head element, or null if timed out + * @throws InterruptedException if interrupted + */ + @Override + public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + if (timeout <= 0) { + return poll(); + } + + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + E e = poll(); + if (e != null) { + return e; + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return null; + } + + if (idleCount < 100) { + // spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + Thread.onSpinWait(); + } + if (Thread.interrupted()) { + throw new InterruptedException(); + } + idleCount++; + } + } } diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy new file mode 100644 index 00000000000..f9c8a6a1c85 --- /dev/null +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy @@ -0,0 +1,163 @@ +package datadog.trace.util.queue + +import static java.util.concurrent.TimeUnit.NANOSECONDS + +import datadog.trace.test.util.DDSpecification +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicBoolean +import java.util.function.Consumer + +abstract class AbstractQueueTest> extends DDSpecification { + abstract T createQueue(int capacity) + protected T queue = createQueue(8) + + def "offer and poll should preserve FIFO order"() { + when: + queue.offer(1) + queue.offer(2) + queue.offer(3) + + then: + queue.poll() == 1 + queue.poll() == 2 + queue.poll() == 3 + queue.poll() == null + } + + def "offer should return false when queue is full"() { + given: + queue.clear() + (1..8).each { queue.offer(it) } + + expect: + !queue.offer(999) + queue.size() == 8 + } + + def "peek should return head element without removing it"() { + given: + queue.clear() + queue.offer(10) + queue.offer(20) + + expect: + queue.peek() == 10 + queue.peek() == 10 + queue.size() == 2 + } + + def "poll should return null when empty"() { + given: + queue.clear() + + expect: + queue.poll() == null + } + + def "size should reflect current number of items"() { + when: + queue.clear() + queue.offer(1) + queue.offer(2) + + then: + queue.size() == 2 + + when: + queue.poll() + queue.poll() + + then: + queue.size() == 0 + } + + def "drain should consume all available elements"() { + given: + queue.clear() + (1..5).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer) + + then: + count == 5 + drained == [1, 2, 3, 4, 5] + queue.isEmpty() + } + + def "drain with limit should only consume that many elements"() { + given: + queue.clear() + (1..6).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer, 3) + + then: + count == 3 + drained == [1, 2, 3] + queue.size() == 3 + } + + def "remainingCapacity should reflect current occupancy"() { + given: + def q = new MpscArrayQueue(4) + q.offer(1) + q.offer(2) + + expect: + q.remainingCapacity() == 2 + + when: + q.poll() + + then: + q.remainingCapacity() == 3 + } + + + def "poll with timeout returns null if no element becomes available"() { + when: + def start = System.nanoTime() + def value = queue.poll(200, TimeUnit.MILLISECONDS) + def elapsedMs = NANOSECONDS.toMillis(System.nanoTime() - start) + + then: + value == null + elapsedMs >= 200 // waited approximately the timeout + } + + def "poll with zero timeout behaves like immediate poll"() { + expect: + queue.poll(0, TimeUnit.MILLISECONDS) == null + + when: + queue.offer(99) + + then: + queue.poll(0, TimeUnit.MILLISECONDS) == 99 + } + + def "poll throws InterruptedException if interrupted"() { + given: + def thrown = new AtomicBoolean() + def thread = Thread.start { + try { + queue.poll(500, TimeUnit.MILLISECONDS) + } catch (InterruptedException ie) { + thrown.set(true) + Thread.currentThread().interrupt() + } + } + + when: + Thread.sleep(50) + thread.interrupt() + thread.join() + + then: + thrown.get() + } +} diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy new file mode 100644 index 00000000000..7f8bbcc1d0b --- /dev/null +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy @@ -0,0 +1,61 @@ +package datadog.trace.util.queue + + +import java.util.concurrent.CountDownLatch +import java.util.concurrent.Executors +import spock.lang.Timeout + +class MpscArrayQueueVarHandleTest extends AbstractQueueTest { + + @Timeout(10) + def "multiple producers single consumer should consume all elements without duplication or loss"() { + given: + int total = 1000 + int producers = 4 + queue = new MpscArrayQueueVarHandle<>(1024) + def results = Collections.synchronizedList([]) + def executor = Executors.newFixedThreadPool(producers) + def latch = new CountDownLatch(producers) + def consumerDone = new CountDownLatch(1) + + when: "multiple producers enqueue concurrently" + (1..producers).each { id -> + executor.submit { + for (int i = 0; i < total / producers; i++) { + int value = (id * 10000) + i + while (!queue.offer(value)) { + Thread.yield() + } + } + latch.countDown() + } + } + + and: "a single consumer drains all elements" + Thread consumer = new Thread({ + while (results.size() < total) { + def v = queue.poll() + if (v != null) { + results << v + } else { + Thread.yield() + } + } + consumerDone.countDown() + }) + consumer.start() + + latch.await() + consumerDone.await() + executor.shutdown() + + then: + results.size() == total + results.toSet().size() == total // all unique + } + + @Override + MpscArrayQueueVarHandle createQueue(int capacity) { + return new MpscArrayQueueVarHandle(capacity) + } +} diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy new file mode 100644 index 00000000000..fe02e6bd297 --- /dev/null +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy @@ -0,0 +1,124 @@ +package datadog.trace.util.queue + + +import java.util.concurrent.CountDownLatch +import java.util.concurrent.atomic.AtomicReference +import java.util.function.Consumer +import java.util.function.Supplier +import spock.lang.Timeout + +class MpscBlockingConsumerArrayQueueVarHandleTest extends AbstractQueueTest> { + + @Override + MpscBlockingConsumerArrayQueueVarHandle createQueue(int capacity) { + return new MpscBlockingConsumerArrayQueueVarHandle(capacity) + } + + def "drain should consume all elements in order"() { + given: + queue.clear() + (1..5).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer) + + then: + count == 5 + drained == [1, 2, 3, 4, 5] + queue.isEmpty() + } + + def "drain with limit should consume only limited number"() { + given: + queue.clear() + (1..6).each { queue.offer(it) } + def drained = [] + + when: + def count = queue.drain({ drained << it } as Consumer, 3) + + then: + count == 3 + drained == [1, 2, 3] + queue.size() == 3 + } + + @Timeout(10) + def "multiple producers single consumer should consume all elements without duplicates"() { + given: + int total = 1000 + int producers = 4 + queue = new MpscBlockingConsumerArrayQueueVarHandle<>(1024) + def results = Collections.synchronizedList([]) + def latch = new CountDownLatch(producers) + + when: + // Multiple producers + (1..producers).each { id -> + Thread.start { + for (int i = 0; i < total / producers; i++) { + int val = id * 10_000 + i + while (!queue.offer(val)) { + Thread.yield() + } + } + latch.countDown() + } + } + + // Single consumer + Thread consumer = Thread.start { + while (results.size() < total) { + def v = queue.poll() + if (v != null) { + results << v + } + else { + Thread.yield() + } + } + } + + latch.await() + consumer.join() + + then: + results.size() == total + results.toSet().size() == total // all unique + } + + def "blocking take should wake up when producer offers"() { + given: + queue = new MpscBlockingConsumerArrayQueueVarHandle<>(4) + def result = new AtomicReference<>() + + when: + Thread consumer = Thread.start { + try { + result.set(queue.take()) + } catch (InterruptedException ignored) { + } + } + Thread.sleep(100) + queue.offer(123) + consumer.join(1000) + + then: + result.get() == 123 + queue.isEmpty() + } + + def "fill inserts up to capacity"() { + given: + def counter = 0 + def supplier = { counter < 10 ? counter++ : null } as Supplier + + when: + def filled = queue.fill(supplier, 10) + + then: + filled == 8 + queue.size() == 8 + } +} diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy new file mode 100644 index 00000000000..9038ab058a1 --- /dev/null +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy @@ -0,0 +1,59 @@ +package datadog.trace.util.queue + + +import java.util.concurrent.CountDownLatch +import java.util.concurrent.Executors +import spock.lang.Timeout + +class SpmcArrayQueueVarHandleTest extends AbstractQueueTest> { + + @Override + SpmcArrayQueueVarHandle createQueue(int capacity) { + return new SpmcArrayQueueVarHandle(capacity) + } + + @Timeout(10) + def "single producer multiple consumers should consume all elements without duplication or loss"() { + given: + int total = 1000 + int consumers = 4 + queue = new SpmcArrayQueueVarHandle<>(1024) + def results = Collections.synchronizedList([]) + def executor = Executors.newFixedThreadPool(consumers) + def latch = new CountDownLatch(consumers) + + when: "one producer fills the queue" + Thread producer = new Thread({ + for (int i = 0; i < total; i++) { + while (!queue.offer(i)) { + Thread.yield() + } + } + }) + producer.start() + + and: "multiple consumers drain concurrently" + (1..consumers).each { + executor.submit { + while (results.size() < total) { + def v = queue.poll() + if (v != null) { + results << v + } else { + Thread.yield() + } + } + latch.countDown() + } + } + + latch.await() + producer.join() + executor.shutdown() + + then: + results.size() == total + results.toSet().size() == total // no duplicates + results.containsAll((0..> { + + def "single producer single consumer concurrency"() { + given: + def queue = new SpscArrayQueueVarHandle(1024) + def producerCount = 1000 + def consumed = new AtomicInteger(0) + def consumedValues = [] + + def producer = Thread.start { + (1..producerCount).each { queue.offer(it) } + } + + def consumer = Thread.start { + while (consumed.get() < producerCount) { + def v = queue.poll() + if (v != null) { + consumedValues << v + consumed.incrementAndGet() + } + } + } + + when: + producer.join() + consumer.join() + + then: + consumed.get() == producerCount + consumedValues.toSet().size() == producerCount // all values unique + } + + @Override + SpscArrayQueueVarHandle createQueue(int capacity) { + return new SpscArrayQueueVarHandle(capacity) + } +} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy index e0cda0dc21d..ef7d1e25094 100644 --- a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy +++ b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy @@ -12,7 +12,7 @@ class MpscArrayQueueTest extends AbstractQueueTest { given: int total = 1000 int producers = 4 - queue = datadog.trace.util.queue.Queues. + queue = new MpscArrayQueue<>(1024) def results = Collections.synchronizedList([]) def executor = Executors.newFixedThreadPool(producers) def latch = new CountDownLatch(producers) From 29c8fca6043162d3f15ee31e7fd0c43302c4ae83 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Wed, 5 Nov 2025 14:39:15 +0100 Subject: [PATCH 05/18] Try to optimise varhandle version further --- .../stacktrace/queue/MPSCQueueBenchmark.java | 15 ++- .../stacktrace/queue/SPMCQueueBenchmark.java | 92 ++++++------- .../util/queue/MpscArrayQueueVarHandle.java | 125 +++++++++++++----- ...scBlockingConsumerArrayQueueVarHandle.java | 17 +-- .../util/queue/SpmcArrayQueueVarHandle.java | 116 +++++++++------- .../util/queue/SpscArrayQueueVarHandle.java | 97 +------------- .../util/queue/JcToolsMPSCQueueBenchmark.java | 6 +- .../util/queue/JcToolsSPMCQueueBenchmark.java | 94 ++++++------- 8 files changed, 257 insertions(+), 305 deletions(-) diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java index 1c504a8302b..f0e15c4350d 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java @@ -20,12 +20,15 @@ import org.openjdk.jmh.infra.Blackhole; /* -MPSCQueueBenchmark.queueTest 1024 thrpt 145.261 ops/us -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 84.185 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 61.076 ops/us -MPSCQueueBenchmark.queueTest 65536 thrpt 187.609 ops/us -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 117.097 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 70.512 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +MPSCQueueBenchmark.queueTest 65536 thrpt 208.469 ops/us +MPSCQueueBenchmark.queueTest:async 65536 thrpt NaN --- +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 199.309 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 9.161 ops/us +MPSCQueueBenchmark.queueTest 1024 thrpt 195.200 ops/us +MPSCQueueBenchmark.queueTest:async 1024 thrpt NaN --- +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 185.929 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 9.272 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java index a88f17a7a48..29a84b43501 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java @@ -1,83 +1,77 @@ package datadog.trace.util.stacktrace.queue; import datadog.trace.util.queue.SpmcArrayQueueVarHandle; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.locks.LockSupport; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Group; import org.openjdk.jmh.annotations.GroupThreads; import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; /* -SPMCQueueBenchmark.spmc N/A thrpt 5 484.103 ± 64.709 ops/us -SPMCQueueBenchmark.spmc:consumer N/A thrpt 5 466.954 ± 65.712 ops/us -SPMCQueueBenchmark.spmc:producer N/A thrpt 5 17.149 ± 1.541 ops/us +MPSCQueueBenchmark.queueTest 1024 thrpt 145.261 ops/us +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 84.185 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 61.076 ops/us +MPSCQueueBenchmark.queueTest 65536 thrpt 187.609 ops/us +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 117.097 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 70.512 ops/us */ @BenchmarkMode(Mode.Throughput) -@State(Scope.Group) -@Fork(value = 1, warmups = 0) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) @OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) public class SPMCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + SpmcArrayQueueVarHandle queue; + CountDownLatch producerReady; - private static final int QUEUE_CAPACITY = 1024; - private static final int ITEMS_TO_PRODUCE = 100_000; + @Param({"1024", "65536"}) + int capacity; - private SpmcArrayQueueVarHandle queue; - private AtomicInteger produced; - private AtomicInteger consumed; - - @Setup(Level.Iteration) - public void setup() { - queue = new SpmcArrayQueueVarHandle<>(QUEUE_CAPACITY); - produced = new AtomicInteger(0); - consumed = new AtomicInteger(0); - - // Pre-fill queue for warmup safety - int warmupFill = Math.min(QUEUE_CAPACITY / 2, ITEMS_TO_PRODUCE); - for (int i = 0; i < warmupFill; i++) { - queue.offer(i); - produced.incrementAndGet(); + @Setup(Level.Iteration) + public void setup() { + queue = new SpmcArrayQueueVarHandle<>(capacity); + producerReady = new CountDownLatch(1); } } - // Single producer in the group @Benchmark - @Group("spmc") - @GroupThreads(1) - public void producer() { - int i = produced.getAndIncrement(); - if (i < ITEMS_TO_PRODUCE) { - while (!queue.offer(i)) { - LockSupport.parkNanos(1L); - } + @Group("queueTest") + @GroupThreads(4) + public void consume(QueueState state, Blackhole bh) { + try { + state.producerReady.await(); // wait until consumer is ready + } catch (InterruptedException ignored) { + } + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); } } - // Multiple consumers in the group @Benchmark - @Group("spmc") - @GroupThreads(4) // adjust number of consumers - public int consumer() { - while (true) { - Integer val = queue.poll(); - if (val != null) { - consumed.incrementAndGet(); - return val; - } - - if (produced.get() >= ITEMS_TO_PRODUCE && queue.isEmpty()) { - return 0; - } - - LockSupport.parkNanos(1L); + @Group("queueTest") + @GroupThreads(1) + public void produce(QueueState state) { + state.producerReady.countDown(); // signal consumers can start + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); } } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java index aa1471b4ad9..759e82ae97b 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java @@ -3,6 +3,7 @@ import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles.Lookup; import java.lang.invoke.VarHandle; +import java.util.Objects; import java.util.concurrent.locks.LockSupport; /** @@ -17,6 +18,7 @@ public class MpscArrayQueueVarHandle extends BaseQueue { private static final VarHandle ARRAY_HANDLE; private static final VarHandle HEAD_HANDLE; private static final VarHandle TAIL_HANDLE; + private static final VarHandle PRODUCER_LIMIT_HANDLE; static { try { @@ -24,6 +26,8 @@ public class MpscArrayQueueVarHandle extends BaseQueue { TAIL_HANDLE = lookup.findVarHandle(MpscArrayQueueVarHandle.class, "tail", long.class); HEAD_HANDLE = lookup.findVarHandle(MpscArrayQueueVarHandle.class, "head", long.class); ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); + PRODUCER_LIMIT_HANDLE = + lookup.findVarHandle(MpscArrayQueueVarHandle.class, "producerLimit", long.class); } catch (Throwable t) { throw new IllegalStateException(t); } @@ -32,97 +36,139 @@ public class MpscArrayQueueVarHandle extends BaseQueue { /** The backing array (plain Java array for VarHandle access) */ private final Object[] buffer; - // Padding + // Padding to prevent false sharing @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; - /** The next free slot for producers */ + /** Next free slot for producers (multi-threaded) */ private volatile long tail = 0L; - // Padding + // Padding around tail @SuppressWarnings("unused") private long q0, q1, q2, q3, q4, q5, q6; - // Padding + /** Cached producer limit to reduce volatile head reads */ + private volatile long producerLimit = 0L; + + // Padding around producerLimit @SuppressWarnings("unused") - private long p10, p11, p12, p13, p14, p15, p16; + private long r0, r1, r2, r3, r4, r5, r6; - /** The next slot to consume (single-threaded) */ + /** Next slot to consume (single-threaded) */ private volatile long head = 0L; - // Padding - private long q10, q11, q12, q13, q14, q15, q16; + // Padding around head + @SuppressWarnings("unused") + private long s0, s1, s2, s3, s4, s5, s6; /** * Creates a new MPSC queue. * - * @param requestedCapacity the desired capacity, rounded up to the next power of two if needed + * @param requestedCapacity the desired capacity, rounded up to next power of two */ public MpscArrayQueueVarHandle(int requestedCapacity) { super(requestedCapacity); this.buffer = new Object[capacity]; + this.producerLimit = capacity; } /** * Attempts to add an element to the queue. * - *

This method uses a CAS loop on {@code tail} to allow multiple producers to safely claim - * distinct slots. The producer then performs a release-store into the buffer using {@code - * ARRAY_HANDLE.setRelease()}. - * * @param e the element to add (must be non-null) - * @return {@code true} if the element was enqueued, {@code false} if the queue is full + * @return true if element was enqueued, false if queue is full */ @Override public boolean offer(E e) { - if (e == null) { - throw new NullPointerException(); + Objects.requireNonNull(e); + + // jctools does the same local copy to have the jitter optimise the accesses + final Object[] localBuffer = this.buffer; + + // depending on the thread id, choose a different backoff strategy. + // Note: it reduces fairness but also the contention on the cas. + boolean s0 = false, s1 = false, s2 = false; + switch ((int) (Thread.currentThread().getId() & 3)) { + case 0: + s0 = true; + break; + case 1: + s1 = true; + break; + case 2: + s2 = true; + break; + default: + break; } + long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); + long cachedHead = 0L; // Local cache of head to reduce volatile reads + while (true) { - final long currentTail = (long) TAIL_HANDLE.getVolatile(this); - final long wrapPoint = currentTail - capacity; - final long currentHead = (long) HEAD_HANDLE.getVolatile(this); + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + + // Check if producer limit exceeded + if (currentTail >= localProducerLimit) { + // Refresh head only when necessary + cachedHead = (long) HEAD_HANDLE.getVolatile(this); + localProducerLimit = cachedHead + capacity; - if (wrapPoint >= currentHead) { - return false; // full + if (currentTail >= localProducerLimit) return false; // queue full + + // Update producerLimit so other producers also benefit + PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); } + // Attempt to claim a slot if (TAIL_HANDLE.compareAndSet(this, currentTail, currentTail + 1)) { final int index = (int) (currentTail & mask); - ARRAY_HANDLE.setRelease(buffer, index, e); + + // Release-store ensures producer's write is visible to consumer + ARRAY_HANDLE.setRelease(localBuffer, index, e); return true; } - // Backoff on contention - LockSupport.parkNanos(1L); + // Backoff to reduce contention + if (s0) Thread.onSpinWait(); + else if (s1) Thread.yield(); + else if (s2) LockSupport.parkNanos(1); } } /** - * Removes and returns the next element, or {@code null} if the queue is empty. - * - *

This method is single-threaded (one consumer). It performs a volatile read of the buffer, - * and then uses {@code setRelease(null)} to free the slot. + * Removes and returns the next element, or null if empty. * - * @return the dequeued element, or null if the queue is empty + * @return dequeued element, or null if queue empty */ @Override @SuppressWarnings("unchecked") public E poll() { - final long currentHead = (long) HEAD_HANDLE.getOpaque(this); + final Object[] localBuffer = this.buffer; + + long currentHead = (long) HEAD_HANDLE.getOpaque(this); final int index = (int) (currentHead & mask); - Object value = ARRAY_HANDLE.getAcquire(buffer, index); - if (value == null) { - return null; - } + // Acquire-load ensures visibility of producer write + Object value = ARRAY_HANDLE.getAcquire(localBuffer, index); + if (value == null) return null; - ARRAY_HANDLE.setOpaque(buffer, index, null); // clear slot + // Clear the slot without additional fence + ARRAY_HANDLE.setOpaque(localBuffer, index, null); + + // Advance head using opaque write (consumer-only) HEAD_HANDLE.setOpaque(this, currentHead + 1); + return (E) value; } + /** + * Returns next element without removing it. + * + *

The memory visibility is only correct if the consumer calls it. + * + * @return next element or null if empty + */ @Override @SuppressWarnings("unchecked") public E peek() { @@ -130,9 +176,16 @@ public E peek() { return (E) ARRAY_HANDLE.getVolatile(buffer, index); } + /** + * Returns number of elements in queue. + * + *

Volatile reads of tail and head ensure accurate result in multi-threaded context. + * + * @return current size + */ @Override public int size() { - long currentHead = head; // non-volatile read + long currentHead = (long) HEAD_HANDLE.getVolatile(this); long currentTail = (long) TAIL_HANDLE.getVolatile(this); return (int) (currentTail - currentHead); } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java index a5b104c9909..cc407c98585 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -1,7 +1,5 @@ package datadog.trace.util.queue; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.VarHandle; import java.util.concurrent.locks.LockSupport; /** @@ -13,19 +11,6 @@ public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVa /** Consumer thread reference for wake-up. */ private volatile Thread consumerThread; - private static final VarHandle CONSUMER_THREAD_HANDLE; - - static { - try { - MethodHandles.Lookup l = MethodHandles.lookup(); - CONSUMER_THREAD_HANDLE = - l.findVarHandle( - MpscBlockingConsumerArrayQueueVarHandle.class, "consumerThread", Thread.class); - } catch (Throwable t) { - throw new IllegalStateException(t); - } - } - public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { super(capacity); } @@ -34,7 +19,7 @@ public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { public boolean offer(E e) { final boolean success = super.offer(e); if (success) { - Thread c = (Thread) CONSUMER_THREAD_HANDLE.getVolatile(this); + Thread c = consumerThread; if (c != null) LockSupport.unpark(c); } return success; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java index ea0c3f3d235..b5ac23abec0 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java @@ -2,6 +2,7 @@ import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; +import java.util.Objects; /** * A Single-Producer, Multiple-Consumer (SPMC) bounded, lock-free queue based on a circular array. @@ -27,35 +28,34 @@ public class SpmcArrayQueueVarHandle extends BaseQueue { } } - /** Backing array buffer. */ + /** The backing array (plain Java array for VarHandle access) */ private final Object[] buffer; - // Padding + // Padding to avoid false sharing @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; - /** Tail index (producer-only). */ + /** Next free slot for producer (single-threaded) */ private volatile long tail = 0L; - // Padding + // Padding around tail @SuppressWarnings("unused") private long q0, q1, q2, q3, q4, q5, q6; - // Padding - @SuppressWarnings("unused") - private long p10, p11, p12, p13, p14, p15, p16; - - /** Head index (claimed atomically by multiple consumers). */ + /** Next slot to consume (multi-threaded) */ private volatile long head = 0L; - // Padding + /** Cached consumer limit to avoid repeated volatile tail reads */ + private volatile long consumerLimit = 0L; + + // Padding around head @SuppressWarnings("unused") - private long q10, q11, q12, q13, q14, q15, q16; + private long r0, r1, r2, r3, r4, r5, r6; /** - * Creates a new SPMC queue with the given capacity. + * Creates a new SPMC queue. * - * @param requestedCapacity the desired capacity, rounded up to the next power of two if needed + * @param requestedCapacity the desired capacity, rounded up to next power of two */ public SpmcArrayQueueVarHandle(int requestedCapacity) { super(requestedCapacity); @@ -63,85 +63,101 @@ public SpmcArrayQueueVarHandle(int requestedCapacity) { } /** - * Attempts to enqueue the given element. + * Adds an element to the queue. * - *

This method is called by a single producer, so no CAS is required. It uses {@code - * setRelease} to publish the element and the new tail value. + *

Single-producer: no CAS needed. Uses a release-store to ensure consumers see the write. * - * @param e the element to add - * @return {@code true} if added, {@code false} if the queue is full - * @throws NullPointerException if {@code e} is null + * @param e element to enqueue (must be non-null) + * @return true if element was added, false if queue is full */ @Override public boolean offer(E e) { - if (e == null) { - throw new NullPointerException(); - } + Objects.requireNonNull(e); long currentTail = tail; long wrapPoint = currentTail - capacity; long currentHead = (long) HEAD_HANDLE.getVolatile(this); - if (wrapPoint >= currentHead) { - return false; // queue full - } + + if (wrapPoint >= currentHead) return false; // queue full int index = (int) (currentTail & mask); - ARRAY_HANDLE.setRelease(buffer, index, e); - tail = currentTail + 1; + + // Release-store ensures that the element is visible to consumers + ARRAY_HANDLE.setRelease(this.buffer, index, e); + + // Single-producer: simple volatile write to advance tail + TAIL_HANDLE.setVolatile(this, currentTail + 1); return true; } /** - * Removes and returns the next element, or {@code null} if the queue is empty. - * - *

Consumers compete via CAS on {@code head}. The successful thread claims the index and clears - * the slot with release semantics. + * Removes and returns the next element, or null if empty. * - * @return the dequeued element, or {@code null} if empty + * @return dequeued element, or null if queue is empty */ @Override @SuppressWarnings("unchecked") public E poll() { + final Object[] localBuffer = this.buffer; + while (true) { long currentHead = (long) HEAD_HANDLE.getVolatile(this); - int index = (int) (currentHead & mask); - - Object value = ARRAY_HANDLE.getAcquire(buffer, index); - if (value == null) { - // Possibly empty - long currentTail = tail; // producer thread only writes - if (currentHead >= currentTail) { - return null; // queue empty - } else { - // Not yet visible, retry - Thread.onSpinWait(); - continue; + long limit = consumerLimit; // local cached tail + + if (currentHead >= limit) { + limit = (long) TAIL_HANDLE.getVolatile(this); + if (currentHead >= limit) { + return null; // empty } + consumerLimit = limit; // update local view } - // Try to claim this slot + // Attempt to claim this slot if (HEAD_HANDLE.compareAndSet(this, currentHead, currentHead + 1)) { - ARRAY_HANDLE.setOpaque(buffer, index, null); + int index = (int) (currentHead & mask); + Object value; + + // Wait for the producer to publish the value + while ((value = ARRAY_HANDLE.getAcquire(localBuffer, index)) == null) { + Thread.onSpinWait(); + } + + // Clear slot + ARRAY_HANDLE.setOpaque(localBuffer, index, null); return (E) value; } - // Lost race to another consumer - Thread.onSpinWait(); + // CAS failed, retry loop } } + /** + * Returns the next element without removing it. + * + * @return next element or null if queue empty + */ @Override @SuppressWarnings("unchecked") public E peek() { + final Object[] localBuffer = this.buffer; long currentHead = (long) HEAD_HANDLE.getVolatile(this); + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + + if (currentHead >= currentTail) return null; + int index = (int) (currentHead & mask); - return (E) ARRAY_HANDLE.getVolatile(buffer, index); + return (E) ARRAY_HANDLE.getAcquire(localBuffer, index); // acquire-load ensures visibility } + /** + * Returns the approximate number of elements in the queue. + * + * @return current queue size + */ @Override public int size() { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); long currentHead = (long) HEAD_HANDLE.getVolatile(this); - long currentTail = tail; return (int) (currentTail - currentHead); } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java index 6a667cd6592..e1fe55e0300 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java @@ -2,24 +2,18 @@ import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; -import java.util.concurrent.TimeUnit; -import javax.annotation.Nonnull; +import java.util.Objects; /** * A high-performance Single-Producer, Single-Consumer (SPSC) bounded queue using a circular buffer * and VarHandle-based release/acquire memory semantics. * - *

It is completely lock-free and wait-free, relying solely on release/acquire ordering for - * correctness and visibility. - * * @param the type of elements held in this queue */ public class SpscArrayQueueVarHandle extends BaseQueue { /** Backing array storing elements. */ private final Object[] buffer; - // ===================== Padding to avoid false sharing ===================== - @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; @@ -66,23 +60,16 @@ public SpscArrayQueueVarHandle(int requestedCapacity) { this.buffer = new Object[capacity]; } - // ===================== OFFER (Producer only) ===================== - /** * Enqueues an element if space is available. * - *

Uses cached head to minimize volatile reads. Only refreshes the head when the queue looks - * full. Writes use release semantics for publication. - * * @param e the element to enqueue * @return {@code true} if enqueued, {@code false} if the queue is full * @throws NullPointerException if {@code e} is null */ @Override public boolean offer(E e) { - if (e == null) { - throw new NullPointerException(); - } + Objects.requireNonNull(e); final long currentTail = (long) TAIL_HANDLE.getOpaque(this); final int index = (int) (currentTail & mask); @@ -141,84 +128,4 @@ public int size() { long currentHead = (long) HEAD_HANDLE.getVolatile(this); return (int) (currentTail - currentHead); } - - /** - * Timed offer with progressive backoff. - * - *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → - * park backoff strategy to reduce CPU usage under contention. - * - * @param e the element to insert - * @param timeout maximum time to wait - * @param unit time unit of timeout - * @return {@code true} if inserted, {@code false} if timeout expires - * @throws InterruptedException if interrupted while waiting - */ - @Override - public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - long deadline = System.nanoTime() + unit.toNanos(timeout); - int idle = 0; - - while (true) { - if (offer(e)) return true; - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) return false; - - // progressive spin/yield - if (idle < 100) { - // spin - } else if (idle < 1_000) { - Thread.yield(); - } else { - Thread.onSpinWait(); - } - if (Thread.interrupted()) { - throw new InterruptedException(); - } - idle++; - } - } - - /** - * Polls with a timeout using progressive backoff. - * - * @param timeout max wait time - * @param unit time unit - * @return the head element, or null if timed out - * @throws InterruptedException if interrupted - */ - @Override - public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - if (timeout <= 0) { - return poll(); - } - - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - E e = poll(); - if (e != null) { - return e; - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return null; - } - - if (idleCount < 100) { - // spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - Thread.onSpinWait(); - } - if (Thread.interrupted()) { - throw new InterruptedException(); - } - idleCount++; - } - } } diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java index 895618efbb6..da0da85d96a 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java @@ -61,9 +61,7 @@ public void produce(QueueState state) { } catch (InterruptedException ignored) { } - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { + while (!state.queue.offer(0)) { Thread.yield(); } } @@ -76,6 +74,8 @@ public void consume(QueueState state, Blackhole bh) { Integer v = state.queue.poll(); if (v != null) { bh.consume(v); + } else { + Thread.yield(); } } } diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java index 97eb3b0a875..9b9ef7ace18 100644 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java +++ b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java @@ -1,84 +1,78 @@ package datadog.trace.util.queue; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.LockSupport; -import org.jctools.queues.SpmcArrayQueue; +import org.jctools.queues.MpscArrayQueue; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Group; import org.openjdk.jmh.annotations.GroupThreads; import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; /* -Benchmark Mode Cnt Score Error Units -JcToolsSPMCQueueBenchmark.spmc thrpt 5 324.804 ± 15.512 ops/us -JcToolsSPMCQueueBenchmark.spmc:consumer thrpt 5 309.039 ± 15.960 ops/us -JcToolsSPMCQueueBenchmark.spmc:producer thrpt 5 15.765 ± 0.464 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +JcToolsdMPSCQueueBenchmark.queueTest 1024 thrpt 75.207 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:consume 1024 thrpt 62.553 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:produce 1024 thrpt 12.654 ops/us +JcToolsdMPSCQueueBenchmark.queueTest 65536 thrpt 36.381 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:consume 65536 thrpt 22.665 ops/us +JcToolsdMPSCQueueBenchmark.queueTest:produce 65536 thrpt 13.717 ops/us */ @BenchmarkMode(Mode.Throughput) -@State(Scope.Group) -@Fork(value = 1, warmups = 0) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) @OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) public class JcToolsSPMCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + MpscArrayQueue queue; + CountDownLatch producerReady; - private static final int QUEUE_CAPACITY = 1024; - private static final int ITEMS_TO_PRODUCE = 100_000; + @Param({"1024", "65536"}) + int capacity; - private SpmcArrayQueue queue; - private AtomicInteger produced; - private AtomicInteger consumed; - - @Setup(Level.Iteration) - public void setup() { - queue = new SpmcArrayQueue<>(QUEUE_CAPACITY); - produced = new AtomicInteger(0); - consumed = new AtomicInteger(0); - - // Pre-fill queue for warmup safety - int warmupFill = Math.min(QUEUE_CAPACITY / 2, ITEMS_TO_PRODUCE); - for (int i = 0; i < warmupFill; i++) { - queue.offer(i); - produced.incrementAndGet(); + @Setup(Level.Iteration) + public void setup() { + queue = new MpscArrayQueue<>(capacity); + producerReady = new CountDownLatch(1); } } - // Single producer in the group @Benchmark - @Group("spmc") - @GroupThreads(1) - public void producer() { - int i = produced.getAndIncrement(); - if (i < ITEMS_TO_PRODUCE) { - while (!queue.offer(i)) { - LockSupport.parkNanos(1L); - } + @Group("queueTest") + @GroupThreads(4) + public void consume(QueueState state, Blackhole bh) throws InterruptedException { + state.producerReady.await(); // wait until consumer is ready + Integer v = state.queue.poll(); + if (v == null) { + LockSupport.parkNanos(1); + } else { + bh.consume(v); } } - // Multiple consumers in the group @Benchmark - @Group("spmc") - @GroupThreads(4) // adjust number of consumers - public int consumer() { - while (true) { - Integer val = queue.poll(); - if (val != null) { - consumed.incrementAndGet(); - return val; - } - - if (produced.get() >= ITEMS_TO_PRODUCE && queue.isEmpty()) { - return 0; - } - - LockSupport.parkNanos(1L); + @Group("queueTest") + @GroupThreads(1) + public void produce(QueueState state) { + state.producerReady.countDown(); // signal consumers can start + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); } } } From 1cf7748edee2098c7d1d594add6717f2ae3a5302 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Wed, 5 Nov 2025 17:33:40 +0100 Subject: [PATCH 06/18] Remove old java 8 implementation and reintroduce jctools for 1.8 --- dd-java-agent/agent-builder/gradle.lockfile | 1 + dd-java-agent/agent-debugger/gradle.lockfile | 1 + dd-java-agent/agent-llmobs/build.gradle | 1 + .../trace/llmobs/EvalProcessingWorker.java | 10 +- .../profiling-controller-jfr/build.gradle | 1 + ...nSubstitutionProcessorInstrumentation.java | 3 + ...dog_jctools_util_UnsafeRefArrayAccess.java | 12 + dd-trace-core/build.gradle | 3 +- .../trace/common/metrics/Aggregator.java | 8 +- .../metrics/ConflatingMetricsAggregator.java | 4 +- .../trace/common/metrics/OkHttpSink.java | 8 +- .../common/writer/SpanSamplingWorker.java | 7 +- .../common/writer/TraceProcessingWorker.java | 21 +- .../trace/core/PendingTraceBuffer.java | 4 +- .../DefaultDataStreamsMonitoring.java | 4 +- gradle/dependencies.gradle | 1 + internal-api/internal-api-9/build.gradle.kts | 1 + .../datadog/trace/util/queue/BaseQueue.java | 117 ++++++++++ .../trace/util/queue/BaseQueueVarHandle.java | 204 ---------------- .../BlockingConsumerNonBlockingQueue.java | 14 ++ ...toolsMpscBlockingConsumerWrappedQueue.java | 114 +++++++++ .../trace/util/queue/JctoolsWrappedQueue.java | 68 ++++++ ...scBlockingConsumerArrayQueueVarHandle.java | 88 ++++++- .../trace/util/queue/NonBlockingQueue.java | 18 ++ .../java/datadog/trace/util/queue/Queues.java | 22 +- .../util/queue/JcToolsMPSCQueueBenchmark.java | 81 ------- .../util/queue/JcToolsSPMCQueueBenchmark.java | 78 ------- .../util/queue/JcToolsSPSCQueueBenchmark.java | 71 ------ .../trace/util/queue/MPSCQueueBenchmark.java | 78 ------- .../trace/util/queue/SPMCQueueBenchmark.java | 83 ------- .../trace/util/queue/SPSCQueueBenchmark.java | 70 ------ .../datadog/trace/util/queue/BaseQueue.java | 219 ------------------ .../trace/util/queue/MpscArrayQueue.java | 135 ----------- .../queue/MpscBlockingConsumerArrayQueue.java | 92 -------- .../trace/util/queue/SpmcArrayQueue.java | 137 ----------- .../trace/util/queue/SpscArrayQueue.java | 111 --------- .../trace/util/queue/AbstractQueueTest.groovy | 163 ------------- .../util/queue/MpscArrayQueueTest.groovy | 61 ----- .../MpscBlockingConsumerArrayQueueTest.groovy | 197 ---------------- .../util/queue/SpmcArrayQueueTest.groovy | 59 ----- .../util/queue/SpscArrayQueueTest.groovy | 42 ---- 41 files changed, 488 insertions(+), 1924 deletions(-) create mode 100644 dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java delete mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java create mode 100644 internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java delete mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java delete mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java delete mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java delete mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java delete mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java delete mode 100644 internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java delete mode 100644 internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java delete mode 100644 internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java delete mode 100644 internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java delete mode 100644 internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java delete mode 100644 internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java delete mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy delete mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy delete mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy delete mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy delete mode 100644 internal-api/src/test/groovy/datadog/trace/util/queue/SpscArrayQueueTest.groovy diff --git a/dd-java-agent/agent-builder/gradle.lockfile b/dd-java-agent/agent-builder/gradle.lockfile index 642dfd9a2fd..d1e26b993e8 100644 --- a/dd-java-agent/agent-builder/gradle.lockfile +++ b/dd-java-agent/agent-builder/gradle.lockfile @@ -109,6 +109,7 @@ org.jacoco:org.jacoco.agent:0.8.14=jacocoAgent,jacocoAnt org.jacoco:org.jacoco.ant:0.8.14=jacocoAnt org.jacoco:org.jacoco.core:0.8.14=jacocoAnt org.jacoco:org.jacoco.report:0.8.14=jacocoAnt +org.jctools:jctools-core:3.3.0=runtimeClasspath,testRuntimeClasspath org.junit.jupiter:junit-jupiter-api:5.12.2=testCompileClasspath,testRuntimeClasspath org.junit.jupiter:junit-jupiter-engine:5.12.2=testRuntimeClasspath org.junit.jupiter:junit-jupiter-params:5.12.2=testCompileClasspath,testRuntimeClasspath diff --git a/dd-java-agent/agent-debugger/gradle.lockfile b/dd-java-agent/agent-debugger/gradle.lockfile index 44bbe2e0df8..5281b239dcd 100644 --- a/dd-java-agent/agent-debugger/gradle.lockfile +++ b/dd-java-agent/agent-debugger/gradle.lockfile @@ -123,6 +123,7 @@ org.jacoco:org.jacoco.agent:0.8.14=jacocoAgent,jacocoAnt org.jacoco:org.jacoco.ant:0.8.14=jacocoAnt org.jacoco:org.jacoco.core:0.8.14=jacocoAnt org.jacoco:org.jacoco.report:0.8.14=jacocoAnt +org.jctools:jctools-core:3.3.0=testRuntimeClasspath org.jetbrains.intellij.deps:trove4j:1.0.20200330=testRuntimeClasspath org.jetbrains.kotlin:kotlin-compiler-embeddable:2.1.21=testCompileClasspath,testRuntimeClasspath org.jetbrains.kotlin:kotlin-daemon-embeddable:2.1.21=testRuntimeClasspath diff --git a/dd-java-agent/agent-llmobs/build.gradle b/dd-java-agent/agent-llmobs/build.gradle index bb119d21fab..0edfdbad404 100644 --- a/dd-java-agent/agent-llmobs/build.gradle +++ b/dd-java-agent/agent-llmobs/build.gradle @@ -24,6 +24,7 @@ minimumInstructionCoverage = 0.0 dependencies { api libs.slf4j + implementation libs.jctools implementation project(':communication') implementation project(':components:json') diff --git a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java index aaabb2017a6..0c8b3cc5487 100644 --- a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java +++ b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java @@ -12,7 +12,7 @@ import datadog.communication.http.OkHttpUtils; import datadog.trace.api.Config; import datadog.trace.llmobs.domain.LLMObsEval; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.List; @@ -35,7 +35,7 @@ public class EvalProcessingWorker implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(EvalProcessingWorker.class); - private final BaseQueue queue; + private final BlockingConsumerNonBlockingQueue queue; private final Thread serializerThread; public EvalProcessingWorker( @@ -44,7 +44,7 @@ public EvalProcessingWorker( final TimeUnit timeUnit, final SharedCommunicationObjects sco, Config config) { - this.queue = Queues.mpscArrayQueue(capacity); + this.queue = Queues.mpscBlockingConsumerArrayQueue(capacity); boolean isAgentless = config.isLlmObsAgentlessEnabled(); if (isAgentless && (config.getApiKey() == null || config.getApiKey().isEmpty())) { @@ -99,7 +99,7 @@ public static class EvalSerializingHandler implements Runnable { private static final Logger log = LoggerFactory.getLogger(EvalSerializingHandler.class); private static final int FLUSH_THRESHOLD = 50; - private final BaseQueue queue; + private final BlockingConsumerNonBlockingQueue queue; private final long ticksRequiredToFlush; private long lastTicks; @@ -112,7 +112,7 @@ public static class EvalSerializingHandler implements Runnable { private final List buffer = new ArrayList<>(); public EvalSerializingHandler( - final BaseQueue queue, + final BlockingConsumerNonBlockingQueue queue, final long flushInterval, final TimeUnit timeUnit, final HttpUrl submissionUrl, diff --git a/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle b/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle index 85af5ff6b12..4e72e59a468 100644 --- a/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle +++ b/dd-java-agent/agent-profiling/profiling-controller-jfr/build.gradle @@ -14,6 +14,7 @@ testJvmConstraints { dependencies { api project(':dd-java-agent:agent-profiling:profiling-controller') + implementation libs.jctools implementation libs.slf4j annotationProcessor libs.autoservice.processor diff --git a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java index 8ac20dbe71a..18d41331a00 100644 --- a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java +++ b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/AnnotationSubstitutionProcessorInstrumentation.java @@ -37,6 +37,7 @@ public void methodAdvice(MethodTransformer transformer) { public String[] helperClassNames() { return new String[] { packageName + ".Target_com_datadog_profiling_agent_ProcessContext", + packageName + ".Target_datadog_jctools_util_UnsafeRefArrayAccess", packageName + ".Target_org_datadog_jmxfetch_App", packageName + ".Target_org_datadog_jmxfetch_Status", packageName + ".Target_org_datadog_jmxfetch_reporter_JsonReporter", @@ -51,6 +52,7 @@ public String[] muzzleIgnoredClassNames() { "jdk.vm.ci.meta.ResolvedJavaField", // ignore helper class names as usual packageName + ".Target_com_datadog_profiling_agent_ProcessContext", + packageName + ".Target_datadog_jctools_util_UnsafeRefArrayAccess", packageName + ".Target_org_datadog_jmxfetch_App", packageName + ".Target_org_datadog_jmxfetch_Status", packageName + ".Target_org_datadog_jmxfetch_reporter_JsonReporter", @@ -61,6 +63,7 @@ public static class FindTargetClassesAdvice { @Advice.OnMethodExit(suppress = Throwable.class) public static void onExit(@Advice.Return(readOnly = false) List> result) { result.add(Target_com_datadog_profiling_agent_ProcessContext.class); + result.add(Target_datadog_jctools_util_UnsafeRefArrayAccess.class); result.add(Target_org_datadog_jmxfetch_App.class); result.add(Target_org_datadog_jmxfetch_Status.class); result.add(Target_org_datadog_jmxfetch_reporter_JsonReporter.class); diff --git a/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java new file mode 100644 index 00000000000..e00ce7b1387 --- /dev/null +++ b/dd-java-agent/instrumentation/graal/native-image/src/main/java/datadog/trace/instrumentation/graal/nativeimage/Target_datadog_jctools_util_UnsafeRefArrayAccess.java @@ -0,0 +1,12 @@ +package datadog.trace.instrumentation.graal.nativeimage; + +import com.oracle.svm.core.annotate.Alias; +import com.oracle.svm.core.annotate.RecomputeFieldValue; +import com.oracle.svm.core.annotate.TargetClass; + +@TargetClass(className = "datadog.jctools.util.UnsafeRefArrayAccess") +public final class Target_datadog_jctools_util_UnsafeRefArrayAccess { + @Alias + @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayIndexShift, declClass = Object[].class) + public static int REF_ELEMENT_SHIFT; +} diff --git a/dd-trace-core/build.gradle b/dd-trace-core/build.gradle index a7a7613280f..6f992e45ec3 100644 --- a/dd-trace-core/build.gradle +++ b/dd-trace-core/build.gradle @@ -75,6 +75,7 @@ dependencies { implementation libs.slf4j implementation libs.moshi + implementation libs.jctools implementation group: 'com.datadoghq', name: 'sketches-java', version: '0.8.3' @@ -82,8 +83,6 @@ dependencies { compileOnly group: 'com.github.spotbugs', name: 'spotbugs-annotations', version: '4.2.0' - jmhImplementation(libs.jctools) - // We have autoservices defined in test subtree, looks like we need this to be able to properly rebuild this testAnnotationProcessor libs.autoservice.processor testCompileOnly libs.autoservice.annotation diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java index 6c868019b21..e043ea5dfe2 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java @@ -4,7 +4,7 @@ import datadog.trace.common.metrics.SignalItem.StopSignal; import datadog.trace.core.util.LRUCache; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.NonBlockingQueue; import java.util.Iterator; import java.util.Map; import java.util.Queue; @@ -22,7 +22,7 @@ final class Aggregator implements Runnable { private static final Logger log = LoggerFactory.getLogger(Aggregator.class); private final Queue batchPool; - private final BaseQueue inbox; + private final NonBlockingQueue inbox; private final LRUCache aggregates; private final ConcurrentMap pending; private final Set commonKeys; @@ -39,7 +39,7 @@ final class Aggregator implements Runnable { Aggregator( MetricWriter writer, Queue batchPool, - BaseQueue inbox, + NonBlockingQueue inbox, ConcurrentMap pending, final Set commonKeys, int maxAggregates, @@ -60,7 +60,7 @@ final class Aggregator implements Runnable { Aggregator( MetricWriter writer, Queue batchPool, - BaseQueue inbox, + NonBlockingQueue inbox, ConcurrentMap pending, final Set commonKeys, int maxAggregates, diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java index fef5e2e3d5b..5d39995c7f4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java @@ -33,7 +33,7 @@ import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.core.monitor.HealthMetrics; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.NonBlockingQueue; import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.Arrays; @@ -93,7 +93,7 @@ public final class ConflatingMetricsAggregator implements MetricsAggregator, Eve private final ConcurrentHashMap pending; private final ConcurrentHashMap keys; private final Thread thread; - private final BaseQueue inbox; + private final NonBlockingQueue inbox; private final Sink sink; private final Aggregator aggregator; private final long reportingInterval; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java index 45a39f94daf..b91994617d4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java @@ -10,7 +10,7 @@ import static java.util.concurrent.TimeUnit.SECONDS; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.NonBlockingQueue; import datadog.trace.util.queue.Queues; import java.io.IOException; import java.nio.ByteBuffer; @@ -37,7 +37,7 @@ public final class OkHttpSink implements Sink, EventListener { private final OkHttpClient client; private final HttpUrl metricsUrl; private final List listeners; - private final BaseQueue enqueuedRequests = Queues.spscArrayQueue(16); + private final NonBlockingQueue enqueuedRequests = Queues.spscArrayQueue(16); private final AtomicLong lastRequestTime = new AtomicLong(); private final AtomicLong asyncRequestCounter = new AtomicLong(); private final boolean bufferingEnabled; @@ -158,9 +158,9 @@ private void handleFailure(okhttp3.Response response) throws IOException { private static final class Sender implements AgentTaskScheduler.Task { - private final BaseQueue inbox; + private final NonBlockingQueue inbox; - private Sender(BaseQueue inbox) { + private Sender(NonBlockingQueue inbox) { this.inbox = inbox; } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java index b5a9c3074d1..ef8b5f479aa 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java @@ -9,7 +9,8 @@ import datadog.trace.common.sampling.SingleSpanSampler; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; +import datadog.trace.util.queue.NonBlockingQueue; import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.List; @@ -45,7 +46,7 @@ class DefaultSpanSamplingWorker implements SpanSamplingWorker { private final Thread spanSamplingThread; private final SamplingHandler samplingHandler; - private final BaseQueue spanSamplingQueue; + private final BlockingConsumerNonBlockingQueue spanSamplingQueue; private final Queue primaryQueue; private final Queue secondaryQueue; private final SingleSpanSampler singleSpanSampler; @@ -172,7 +173,7 @@ public void onEvent(Object event) { } } - private void consumeBatch(BaseQueue queue) { + private void consumeBatch(NonBlockingQueue queue) { queue.drain(this::onEvent, queue.size()); } } diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java index 6464653e784..77e8624a36c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java @@ -15,7 +15,8 @@ import datadog.trace.core.CoreSpan; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; +import datadog.trace.util.queue.NonBlockingQueue; import datadog.trace.util.queue.Queues; import java.util.List; import java.util.concurrent.CountDownLatch; @@ -36,8 +37,8 @@ public class TraceProcessingWorker implements AutoCloseable { private static final Logger log = LoggerFactory.getLogger(TraceProcessingWorker.class); private final PrioritizationStrategy prioritizationStrategy; - private final BaseQueue primaryQueue; - private final BaseQueue secondaryQueue; + private final BlockingConsumerNonBlockingQueue primaryQueue; + private final BlockingConsumerNonBlockingQueue secondaryQueue; private final TraceSerializingHandler serializingHandler; private final Thread serializerThread; private final int capacity; @@ -121,14 +122,14 @@ public long getRemainingCapacity() { return primaryQueue.remainingCapacity(); } - private static BaseQueue createQueue(int capacity) { - return Queues.mpscArrayQueue(capacity); + private static BlockingConsumerNonBlockingQueue createQueue(int capacity) { + return Queues.mpscBlockingConsumerArrayQueue(capacity); } public static class TraceSerializingHandler implements Runnable { - private final BaseQueue primaryQueue; - private final BaseQueue secondaryQueue; + private final BlockingConsumerNonBlockingQueue primaryQueue; + private final BlockingConsumerNonBlockingQueue secondaryQueue; private final HealthMetrics healthMetrics; private final long ticksRequiredToFlush; private final boolean doTimeFlush; @@ -136,8 +137,8 @@ public static class TraceSerializingHandler implements Runnable { private long lastTicks; public TraceSerializingHandler( - final BaseQueue primaryQueue, - final BaseQueue secondaryQueue, + final BlockingConsumerNonBlockingQueue primaryQueue, + final BlockingConsumerNonBlockingQueue secondaryQueue, final HealthMetrics healthMetrics, final PayloadDispatcher payloadDispatcher, final long flushInterval, @@ -238,7 +239,7 @@ private boolean shouldFlush() { return false; } - private void consumeBatch(BaseQueue queue) { + private void consumeBatch(NonBlockingQueue queue) { queue.drain(this::onEvent, queue.size()); } diff --git a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java index d00cc73ee53..9d89a3ae895 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java @@ -12,7 +12,7 @@ import datadog.trace.api.time.TimeSource; import datadog.trace.common.writer.TraceDumpJsonExporter; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; import datadog.trace.util.queue.Queues; import java.io.IOException; import java.util.ArrayList; @@ -63,7 +63,7 @@ private static class DelayingPendingTraceBuffer extends PendingTraceBuffer { private static final CommandElement DUMP_ELEMENT = new CommandElement(); private static final CommandElement STAND_IN_ELEMENT = new CommandElement(); - private final BaseQueue queue; + private final BlockingConsumerNonBlockingQueue queue; private final Thread worker; private final TimeSource timeSource; diff --git a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java index 5ac444169f7..ab27407f357 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java @@ -28,7 +28,7 @@ import datadog.trace.core.DDSpan; import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.BaseQueue; +import datadog.trace.util.queue.NonBlockingQueue; import datadog.trace.util.queue.Queues; import java.util.Collections; import java.util.HashMap; @@ -53,7 +53,7 @@ public class DefaultDataStreamsMonitoring implements DataStreamsMonitoring, Even new StatsPoint(DataStreamsTags.EMPTY, 0, 0, 0, 0, 0, 0, 0, null); private final Map> timeToBucket = new HashMap<>(); - private final BaseQueue inbox = Queues.mpscArrayQueue(1024); + private final NonBlockingQueue inbox = Queues.mpscArrayQueue(1024); private final DatastreamsPayloadWriter payloadWriter; private final DDAgentFeaturesDiscovery features; private final TimeSource timeSource; diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 250d595bb4b..4c18ee280e7 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -73,6 +73,7 @@ CachedData.deps.shared = [ libs.dogstatsd, libs.jnr.unixsocket, libs.moshi, + libs.jctools, libs.lz4, libs.aircompressor ] diff --git a/internal-api/internal-api-9/build.gradle.kts b/internal-api/internal-api-9/build.gradle.kts index fa799f17919..374b343d34a 100644 --- a/internal-api/internal-api-9/build.gradle.kts +++ b/internal-api/internal-api-9/build.gradle.kts @@ -39,6 +39,7 @@ val minimumInstructionCoverage by extra(0.8) dependencies { api(project(":internal-api")) + api(libs.jctools) // probably the Queues factory should be moved away from there testImplementation(project(":dd-java-agent:testing")) testImplementation(libs.slf4j) diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java new file mode 100644 index 00000000000..f9b2cbada49 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java @@ -0,0 +1,117 @@ +package datadog.trace.util.queue; + +import static datadog.trace.util.BitUtils.nextPowerOfTwo; + +import java.util.AbstractQueue; +import java.util.Iterator; +import java.util.function.Consumer; +import java.util.function.Supplier; +import javax.annotation.Nonnull; + +public abstract class BaseQueue extends AbstractQueue implements NonBlockingQueue { + /** The capacity of the queue (must be a power of two) */ + protected final int capacity; + + /** Mask for fast modulo operation (index = pos & mask) */ + protected final int mask; + + public BaseQueue(int capacity) { + this.capacity = nextPowerOfTwo(capacity); + this.mask = this.capacity - 1; + } + + /** + * Drains all available elements from the queue to a consumer. + * + *

This is efficient since it avoids repeated size() checks and returns immediately when empty. + * + * @param consumer a consumer to accept elements + * @return number of elements drained + */ + @Override + public int drain(Consumer consumer) { + return drain(consumer, Integer.MAX_VALUE); + } + + /** + * Drains up to {@code limit} elements from the queue to a consumer. + * + *

This method is useful for batch processing. + * + *

Each element is removed atomically using poll() and passed to the consumer. + * + * @param consumer a consumer to accept elements + * @param limit maximum number of elements to drain + * @return number of elements drained + */ + @Override + public int drain(Consumer consumer, int limit) { + int count = 0; + E e; + while (count < limit && (e = poll()) != null) { + consumer.accept(e); + count++; + } + return count; + } + + /** + * Fills the queue with elements provided by the supplier until either: - the queue is full, or - + * the supplier runs out of elements (returns null) + * + * @param supplier a supplier of elements + * @param limit maximum number of elements to attempt to insert + * @return number of elements successfully enqueued + */ + @Override + public int fill(@Nonnull Supplier supplier, int limit) { + if (limit <= 0) { + return 0; + } + + int added = 0; + while (added < limit) { + E e = supplier.get(); + if (e == null) { + break; // stop if supplier exhausted + } + + if (offer(e)) { + added++; + } else { + break; // queue is full + } + } + return added; + } + + /** + * Iterator is not supported. + * + * @throws UnsupportedOperationException always + */ + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the remaining capacity. + * + * @return number of additional elements this queue can accept + */ + @Override + public int remainingCapacity() { + return capacity - size(); + } + + /** + * Returns the maximum queue capacity. + * + * @return number of total elements this queue can accept + */ + @Override + public int capacity() { + return capacity; + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java deleted file mode 100644 index 4fc54b5140e..00000000000 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueueVarHandle.java +++ /dev/null @@ -1,204 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.Collection; -import java.util.Iterator; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.LockSupport; -import java.util.function.Consumer; -import java.util.function.Supplier; -import javax.annotation.Nonnull; - -public abstract class BaseQueueVarHandle extends BaseQueue { - public BaseQueueVarHandle(int capacity) { - super(capacity); - } - - /** - * Timed offer with progressive backoff. - * - *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → - * park backoff strategy to reduce CPU usage under contention. - * - * @param e the element to insert - * @param timeout maximum time to wait - * @param unit time unit of timeout - * @return {@code true} if inserted, {@code false} if timeout expires - * @throws InterruptedException if interrupted while waiting - */ - public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - if (e == null) { - throw new NullPointerException(); - } - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idle = 0; - - while (true) { - if (offer(e)) return true; - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) return false; - - // Progressive backoff - if (idle < 100) { - // spin - } else if (idle < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(1_000L); - } - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - idle++; - } - } - - /** - * Polls with a timeout using progressive backoff. - * - * @param timeout max wait time - * @param unit time unit - * @return the head element, or null if timed out - * @throws InterruptedException if interrupted - */ - public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - if (timeout <= 0) { - return poll(); - } - - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - E e = poll(); - if (e != null) return e; - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) return null; - - if (idleCount < 100) { - // spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(1_000L); - } - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - idleCount++; - } - } - - /** - * Drains all available elements from the queue to a consumer. - * - *

This is efficient since it avoids repeated size() checks and returns immediately when empty. - * - * @param consumer a consumer to accept elements - * @return number of elements drained - */ - public int drain(Consumer consumer) { - return drain(consumer, Integer.MAX_VALUE); - } - - /** - * Drains up to {@code limit} elements from the queue to a consumer. - * - *

This method is useful for batch processing. - * - *

Each element is removed atomically using poll() and passed to the consumer. - * - * @param consumer a consumer to accept elements - * @param limit maximum number of elements to drain - * @return number of elements drained - */ - public int drain(Consumer consumer, int limit) { - int count = 0; - E e; - while (count < limit && (e = poll()) != null) { - consumer.accept(e); - count++; - } - return count; - } - - /** - * Fills the queue with elements provided by the supplier until either: - the queue is full, or - - * the supplier runs out of elements (returns null) - * - * @param supplier a supplier of elements - * @param limit maximum number of elements to attempt to insert - * @return number of elements successfully enqueued - */ - public int fill(@Nonnull Supplier supplier, int limit) { - if (limit <= 0) { - return 0; - } - - int added = 0; - while (added < limit) { - E e = supplier.get(); - if (e == null) { - break; // stop if supplier exhausted - } - - if (offer(e)) { - added++; - } else { - break; // queue is full - } - } - return added; - } - - /** - * Iterator is not supported. - * - * @throws UnsupportedOperationException always - */ - @Override - public Iterator iterator() { - throw new UnsupportedOperationException(); - } - - /** - * Returns the remaining capacity. - * - * @return number of additional elements this queue can accept - */ - public int remainingCapacity() { - return capacity - size(); - } - - /** - * Returns the maximum queue capacity. - * - * @return number of total elements this queue can accept - */ - public int capacity() { - return capacity; - } - - @Override - public void put(E e) throws InterruptedException { - throw new UnsupportedOperationException("Not implementing blocking operations for producers"); - } - - @Override - public E take() throws InterruptedException { - throw new UnsupportedOperationException("Not implementing blocking operations for consumers"); - } - - @Override - public int drainTo(Collection c) { - return drainTo(c, Integer.MAX_VALUE); - } - - @Override - public int drainTo(Collection c, int maxElements) { - return drain(c::add, maxElements); - } -} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java new file mode 100644 index 00000000000..ae16a042f20 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java @@ -0,0 +1,14 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.TimeUnit; +import javax.annotation.Nonnull; + +public interface BlockingConsumerNonBlockingQueue extends NonBlockingQueue { + boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException; + + E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException; + + void put(E e) throws InterruptedException; + + E take() throws InterruptedException; +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java new file mode 100644 index 00000000000..6c69615200d --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java @@ -0,0 +1,114 @@ +package datadog.trace.util.queue; + +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; +import javax.annotation.Nonnull; +import org.jctools.queues.MpscBlockingConsumerArrayQueue; + +public class JctoolsMpscBlockingConsumerWrappedQueue extends JctoolsWrappedQueue + implements BlockingConsumerNonBlockingQueue { + + private final BlockingQueue blockingQueueDelegate; + + public JctoolsMpscBlockingConsumerWrappedQueue( + @Nonnull MpscBlockingConsumerArrayQueue delegate) { + super(delegate); + this.blockingQueueDelegate = delegate; + } + + @Override + public void put(E e) throws InterruptedException { + blockingQueueDelegate.put(e); + } + + @Override + public E take() throws InterruptedException { + return blockingQueueDelegate.take(); + } + + /** + * Timed offer with progressive backoff. + * + *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → + * park backoff strategy to reduce CPU usage under contention. + * + * @param e the element to insert + * @param timeout maximum time to wait + * @param unit time unit of timeout + * @return {@code true} if inserted, {@code false} if timeout expires + * @throws InterruptedException if interrupted while waiting + */ + @Override + public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + if (offer(e)) { + return true; // successfully inserted + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return false; // timeout + } + + // Progressive backoff + if (idleCount < 100) { + // spin (busy-wait) + } else if (idleCount < 1_000) { + Thread.yield(); // give up CPU to other threads + } else { + // park for a short duration, up to 1 ms + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** + * Polls with a timeout using progressive backoff. + * + * @param timeout max wait time + * @param unit time unit + * @return the head element, or null if timed out + * @throws InterruptedException if interrupted + */ + @Override + public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + E value = poll(); + if (value != null) { + return value; + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return null; + } + + // Progressive backoff + if (idleCount < 100) { + // spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java new file mode 100644 index 00000000000..32b456129bd --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java @@ -0,0 +1,68 @@ +package datadog.trace.util.queue; + +import static java.lang.Math.E; + +import java.util.AbstractQueue; +import java.util.Iterator; +import java.util.function.Consumer; +import java.util.function.Supplier; +import javax.annotation.Nonnull; +import org.jctools.queues.MessagePassingQueue; + +public class JctoolsWrappedQueue extends AbstractQueue implements NonBlockingQueue { + private final MessagePassingQueue delegate; + + public JctoolsWrappedQueue(@Nonnull MessagePassingQueue delegate) { + this.delegate = delegate; + } + + @Override + public int drain(Consumer consumer) { + return delegate.drain(consumer::accept); + } + + @Override + public int drain(Consumer consumer, int limit) { + return delegate.drain(consumer::accept, limit); + } + + @Override + public int fill(@Nonnull Supplier supplier, int limit) { + return delegate.fill(supplier::get, limit); + } + + @Override + public int remainingCapacity() { + return capacity() - size(); + } + + @Override + public int capacity() { + return delegate.capacity(); + } + + @Override + public Iterator iterator() { + throw new UnsupportedOperationException(); + } + + @Override + public int size() { + return delegate.size(); + } + + @Override + public boolean offer(E e) { + return delegate.offer(e); + } + + @Override + public E poll() { + return delegate.poll(); + } + + @Override + public E peek() { + return delegate.peek(); + } +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java index cc407c98585..f0f783f6f5d 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -1,13 +1,16 @@ package datadog.trace.util.queue; +import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.LockSupport; +import javax.annotation.Nonnull; /** * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. * *

It features nonblocking offer/poll methods and blocking (condition based) take/put. */ -public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle { +public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle + implements BlockingConsumerNonBlockingQueue { /** Consumer thread reference for wake-up. */ private volatile Thread consumerThread; @@ -49,4 +52,87 @@ public E take() throws InterruptedException { LockSupport.park(this); } } + + /** + * Timed offer with progressive backoff. + * + *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → + * park backoff strategy to reduce CPU usage under contention. + * + * @param e the element to insert + * @param timeout maximum time to wait + * @param unit time unit of timeout + * @return {@code true} if inserted, {@code false} if timeout expires + * @throws InterruptedException if interrupted while waiting + */ + public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + if (offer(e)) { + return true; // successfully inserted + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return false; // timeout + } + + // Progressive backoff + if (idleCount < 100) { + // spin (busy-wait) + } else if (idleCount < 1_000) { + Thread.yield(); // give up CPU to other threads + } else { + // park for a short duration, up to 1 ms + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } + + /** + * Polls with a timeout using progressive backoff. + * + * @param timeout max wait time + * @param unit time unit + * @return the head element, or null if timed out + * @throws InterruptedException if interrupted + */ + public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + final long deadline = System.nanoTime() + unit.toNanos(timeout); + int idleCount = 0; + + while (true) { + E value = poll(); + if (value != null) { + return value; + } + + long remaining = deadline - System.nanoTime(); + if (remaining <= 0) { + return null; + } + + // Progressive backoff + if (idleCount < 100) { + // spin + } else if (idleCount < 1_000) { + Thread.yield(); + } else { + LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); + } + idleCount++; + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } + } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java new file mode 100644 index 00000000000..489ccdb9295 --- /dev/null +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java @@ -0,0 +1,18 @@ +package datadog.trace.util.queue; + +import java.util.Queue; +import java.util.function.Consumer; +import java.util.function.Supplier; +import javax.annotation.Nonnull; + +public interface NonBlockingQueue extends Queue { + int drain(Consumer consumer); + + int drain(Consumer consumer, int limit); + + int fill(@Nonnull Supplier supplier, int limit); + + int remainingCapacity(); + + int capacity(); +} diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java index 78cc3fcae70..3ea29ae17fb 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java @@ -1,37 +1,43 @@ package datadog.trace.util.queue; import datadog.environment.JavaVirtualMachine; +import org.jctools.queues.MpscArrayQueue; +import org.jctools.queues.MpscBlockingConsumerArrayQueue; +import org.jctools.queues.SpmcArrayQueue; +import org.jctools.queues.SpscArrayQueue; public final class Queues { private static final boolean CAN_USE_VARHANDLES = JavaVirtualMachine.isJavaVersionAtLeast(9); private Queues() {} - public static BaseQueue mpscArrayQueue(int requestedCapacity) { + public static NonBlockingQueue mpscArrayQueue(int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new MpscArrayQueueVarHandle<>(requestedCapacity); } - return new MpscArrayQueue<>(requestedCapacity); + return new JctoolsWrappedQueue<>(new MpscArrayQueue<>(requestedCapacity)); } - public static BaseQueue spmcArrayQueue(int requestedCapacity) { + public static NonBlockingQueue spmcArrayQueue(int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new SpmcArrayQueueVarHandle<>(requestedCapacity); } - return new SpmcArrayQueue<>(requestedCapacity); + return new JctoolsWrappedQueue<>(new SpmcArrayQueue<>(requestedCapacity)); } - public static BaseQueue mpscBlockingConsumerArrayQueue(int requestedCapacity) { + public static BlockingConsumerNonBlockingQueue mpscBlockingConsumerArrayQueue( + int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new MpscBlockingConsumerArrayQueueVarHandle<>(requestedCapacity); } - return new MpscBlockingConsumerArrayQueue<>(requestedCapacity); + return new JctoolsMpscBlockingConsumerWrappedQueue<>( + new MpscBlockingConsumerArrayQueue<>(requestedCapacity)); } - public static BaseQueue spscArrayQueue(int requestedCapacity) { + public static NonBlockingQueue spscArrayQueue(int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new SpscArrayQueueVarHandle<>(requestedCapacity); } - return new SpscArrayQueue<>(requestedCapacity); + return new JctoolsWrappedQueue<>(new SpscArrayQueue<>(requestedCapacity)); } } diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java deleted file mode 100644 index da0da85d96a..00000000000 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsMPSCQueueBenchmark.java +++ /dev/null @@ -1,81 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import org.jctools.queues.MpscArrayQueue; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Group; -import org.openjdk.jmh.annotations.GroupThreads; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Threads; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; - -/* -Benchmark (capacity) Mode Cnt Score Error Units -JcToolsdMPSCQueueBenchmark.queueTest 1024 thrpt 75.207 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:consume 1024 thrpt 62.553 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:produce 1024 thrpt 12.654 ops/us -JcToolsdMPSCQueueBenchmark.queueTest 65536 thrpt 36.381 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:consume 65536 thrpt 22.665 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:produce 65536 thrpt 13.717 ops/us - */ -@BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 1, time = 30) -@Measurement(iterations = 1, time = 30) -@Threads(Threads.MAX) -@Fork(1) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -@State(Scope.Benchmark) -public class JcToolsMPSCQueueBenchmark { - @State(Scope.Group) - public static class QueueState { - MpscArrayQueue queue; - CountDownLatch consumerReady; - - @Param({"1024", "65536"}) - int capacity; - - @Setup(Level.Iteration) - public void setup() { - queue = new MpscArrayQueue<>(capacity); - consumerReady = new CountDownLatch(1); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(4) - public void produce(QueueState state) { - try { - state.consumerReady.await(); // wait until consumer is ready - } catch (InterruptedException ignored) { - } - - while (!state.queue.offer(0)) { - Thread.yield(); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void consume(QueueState state, Blackhole bh) { - state.consumerReady.countDown(); // signal producers can start - Integer v = state.queue.poll(); - if (v != null) { - bh.consume(v); - } else { - Thread.yield(); - } - } -} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java deleted file mode 100644 index 9b9ef7ace18..00000000000 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPMCQueueBenchmark.java +++ /dev/null @@ -1,78 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.LockSupport; -import org.jctools.queues.MpscArrayQueue; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Group; -import org.openjdk.jmh.annotations.GroupThreads; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; - -/* -Benchmark (capacity) Mode Cnt Score Error Units -JcToolsdMPSCQueueBenchmark.queueTest 1024 thrpt 75.207 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:consume 1024 thrpt 62.553 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:produce 1024 thrpt 12.654 ops/us -JcToolsdMPSCQueueBenchmark.queueTest 65536 thrpt 36.381 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:consume 65536 thrpt 22.665 ops/us -JcToolsdMPSCQueueBenchmark.queueTest:produce 65536 thrpt 13.717 ops/us - */ -@BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 1, time = 30) -@Measurement(iterations = 1, time = 30) -@Fork(1) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -@State(Scope.Benchmark) -public class JcToolsSPMCQueueBenchmark { - @State(Scope.Group) - public static class QueueState { - MpscArrayQueue queue; - CountDownLatch producerReady; - - @Param({"1024", "65536"}) - int capacity; - - @Setup(Level.Iteration) - public void setup() { - queue = new MpscArrayQueue<>(capacity); - producerReady = new CountDownLatch(1); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(4) - public void consume(QueueState state, Blackhole bh) throws InterruptedException { - state.producerReady.await(); // wait until consumer is ready - Integer v = state.queue.poll(); - if (v == null) { - LockSupport.parkNanos(1); - } else { - bh.consume(v); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void produce(QueueState state) { - state.producerReady.countDown(); // signal consumers can start - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } - } -} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java deleted file mode 100644 index 801ee2e964c..00000000000 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/JcToolsSPSCQueueBenchmark.java +++ /dev/null @@ -1,71 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.TimeUnit; -import org.jctools.queues.SpscArrayQueue; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Group; -import org.openjdk.jmh.annotations.GroupThreads; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; - -/* -Benchmark (capacity) Mode Cnt Score Error Units -SPSCQueueBenchmark.queueTest 1024 thrpt 136.138 ops/us -SPSCQueueBenchmark.queueTest:consume 1024 thrpt 68.767 ops/us -SPSCQueueBenchmark.queueTest:produce 1024 thrpt 67.371 ops/us -SPSCQueueBenchmark.queueTest 65536 thrpt 127.357 ops/us -SPSCQueueBenchmark.queueTest:consume 65536 thrpt 65.933 ops/us -SPSCQueueBenchmark.queueTest:produce 65536 thrpt 61.424 ops/us - */ -@BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 1, time = 30) -@Measurement(iterations = 1, time = 30) -@Fork(1) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -@State(Scope.Benchmark) -public class JcToolsSPSCQueueBenchmark { - @State(Scope.Group) - public static class QueueState { - SpscArrayQueue queue; - - @Param({"1024", "65536"}) - int capacity; - - @Setup(Level.Iteration) - public void setup() { - queue = new SpscArrayQueue<>(capacity); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void produce(QueueState state) { - - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void consume(QueueState state, Blackhole bh) { - Integer v = state.queue.poll(); - if (v != null) { - bh.consume(v); - } - } -} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java deleted file mode 100644 index 55046f41ec2..00000000000 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java +++ /dev/null @@ -1,78 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Group; -import org.openjdk.jmh.annotations.GroupThreads; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; - -/* -Benchmark (capacity) Mode Cnt Score Error Units -MPSCQueueBenchmark.queueTest 1024 thrpt 165.379 ops/us -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 102.258 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 63.121 ops/us -MPSCQueueBenchmark.queueTest 65536 thrpt 135.953 ops/us -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 69.384 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 66.569 ops/us - */ -@BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 1, time = 30) -@Measurement(iterations = 1, time = 30) -@Fork(1) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -@State(Scope.Benchmark) -public class MPSCQueueBenchmark { - @State(Scope.Group) - public static class QueueState { - MpscArrayQueue queue; - CountDownLatch consumerReady; - - @Param({"1024", "65536"}) - int capacity; - - @Setup(Level.Iteration) - public void setup() { - queue = new MpscArrayQueue<>(capacity); - consumerReady = new CountDownLatch(1); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(4) - public void produce(QueueState state) { - try { - state.consumerReady.await(); // wait until consumer is ready - } catch (InterruptedException ignored) { - } - - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void consume(QueueState state, Blackhole bh) { - state.consumerReady.countDown(); // signal producers can start - Integer v = state.queue.poll(); - if (v != null) { - bh.consume(v); - } - } -} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java deleted file mode 100644 index 3273bbf351d..00000000000 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/SPMCQueueBenchmark.java +++ /dev/null @@ -1,83 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.locks.LockSupport; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Group; -import org.openjdk.jmh.annotations.GroupThreads; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; - -/* -Benchmark Mode Cnt Score Error Units -SPMCQueueBenchmark.spmc thrpt 5 266.576 ± 9.589 ops/us -SPMCQueueBenchmark.spmc:consumer thrpt 5 250.901 ± 9.383 ops/us -SPMCQueueBenchmark.spmc:producer thrpt 5 15.675 ± 0.507 ops/us - */ -@BenchmarkMode(Mode.Throughput) -@State(Scope.Group) -@Fork(value = 1, warmups = 0) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -public class SPMCQueueBenchmark { - - private static final int QUEUE_CAPACITY = 1024; - private static final int ITEMS_TO_PRODUCE = 100_000; - - private SpmcArrayQueue queue; - private AtomicInteger produced; - private AtomicInteger consumed; - - @Setup(Level.Iteration) - public void setup() { - queue = new SpmcArrayQueue<>(QUEUE_CAPACITY); - produced = new AtomicInteger(0); - consumed = new AtomicInteger(0); - - // Pre-fill queue for warmup safety - int warmupFill = Math.min(QUEUE_CAPACITY / 2, ITEMS_TO_PRODUCE); - for (int i = 0; i < warmupFill; i++) { - queue.offer(i); - produced.incrementAndGet(); - } - } - - // Single producer in the group - @Benchmark - @Group("spmc") - @GroupThreads(1) - public void producer() { - int i = produced.getAndIncrement(); - if (i < ITEMS_TO_PRODUCE) { - while (!queue.offer(i)) { - LockSupport.parkNanos(1L); - } - } - } - - // Multiple consumers in the group - @Benchmark - @Group("spmc") - @GroupThreads(4) // adjust number of consumers - public int consumer() { - while (true) { - Integer val = queue.poll(); - if (val != null) { - consumed.incrementAndGet(); - return val; - } - - if (produced.get() >= ITEMS_TO_PRODUCE && queue.isEmpty()) { - return 0; - } - - LockSupport.parkNanos(1L); - } - } -} diff --git a/internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java deleted file mode 100644 index 1ab0e7c982c..00000000000 --- a/internal-api/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java +++ /dev/null @@ -1,70 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.TimeUnit; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Group; -import org.openjdk.jmh.annotations.GroupThreads; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; - -/* -Benchmark (capacity) Mode Cnt Score Error Units -SPSCQueueBenchmark.queueTest 1024 thrpt 141.400 ops/us -SPSCQueueBenchmark.queueTest:consume 1024 thrpt 73.414 ops/us -SPSCQueueBenchmark.queueTest:produce 1024 thrpt 67.986 ops/us -SPSCQueueBenchmark.queueTest 65536 thrpt 153.123 ops/us -SPSCQueueBenchmark.queueTest:consume 65536 thrpt 79.838 ops/us -SPSCQueueBenchmark.queueTest:produce 65536 thrpt 73.286 ops/us - */ -@BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 1, time = 30) -@Measurement(iterations = 1, time = 30) -@Fork(1) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -@State(Scope.Benchmark) -public class SPSCQueueBenchmark { - @State(Scope.Group) - public static class QueueState { - SpscArrayQueue queue; - - @Param({"1024", "65536"}) - int capacity; - - @Setup(Level.Iteration) - public void setup() { - queue = new SpscArrayQueue<>(capacity); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void produce(QueueState state) { - - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } - } - - @Benchmark - @Group("queueTest") - @GroupThreads(1) - public void consume(QueueState state, Blackhole bh) { - Integer v = state.queue.poll(); - if (v != null) { - bh.consume(v); - } - } -} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java deleted file mode 100644 index 7cbec5f5794..00000000000 --- a/internal-api/src/main/java/datadog/trace/util/queue/BaseQueue.java +++ /dev/null @@ -1,219 +0,0 @@ -package datadog.trace.util.queue; - -import static datadog.trace.util.BitUtils.nextPowerOfTwo; - -import java.util.AbstractQueue; -import java.util.Collection; -import java.util.Iterator; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.LockSupport; -import java.util.function.Consumer; -import java.util.function.Supplier; -import javax.annotation.Nonnull; - -public abstract class BaseQueue extends AbstractQueue implements BlockingQueue { - /** The capacity of the queue (must be a power of two) */ - protected final int capacity; - - /** Mask for fast modulo operation (index = pos & mask) */ - protected final int mask; - - public BaseQueue(int capacity) { - this.capacity = nextPowerOfTwo(capacity); - this.mask = this.capacity - 1; - } - - /** - * Timed offer with progressive backoff. - * - *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → - * park backoff strategy to reduce CPU usage under contention. - * - * @param e the element to insert - * @param timeout maximum time to wait - * @param unit time unit of timeout - * @return {@code true} if inserted, {@code false} if timeout expires - * @throws InterruptedException if interrupted while waiting - */ - public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - if (offer(e)) { - return true; // successfully inserted - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return false; // timeout - } - - // Progressive backoff - if (idleCount < 100) { - // spin (busy-wait) - } else if (idleCount < 1_000) { - Thread.yield(); // give up CPU to other threads - } else { - // park for a short duration, up to 1 ms - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } - } - - /** - * Polls with a timeout using progressive backoff. - * - * @param timeout max wait time - * @param unit time unit - * @return the head element, or null if timed out - * @throws InterruptedException if interrupted - */ - public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - E value = poll(); - if (value != null) { - return value; - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return null; - } - - // Progressive backoff - if (idleCount < 100) { - // spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } - } - - /** - * Drains all available elements from the queue to a consumer. - * - *

This is efficient since it avoids repeated size() checks and returns immediately when empty. - * - * @param consumer a consumer to accept elements - * @return number of elements drained - */ - public int drain(Consumer consumer) { - return drain(consumer, Integer.MAX_VALUE); - } - - /** - * Drains up to {@code limit} elements from the queue to a consumer. - * - *

This method is useful for batch processing. - * - *

Each element is removed atomically using poll() and passed to the consumer. - * - * @param consumer a consumer to accept elements - * @param limit maximum number of elements to drain - * @return number of elements drained - */ - public int drain(Consumer consumer, int limit) { - int count = 0; - E e; - while (count < limit && (e = poll()) != null) { - consumer.accept(e); - count++; - } - return count; - } - - /** - * Fills the queue with elements provided by the supplier until either: - the queue is full, or - - * the supplier runs out of elements (returns null) - * - * @param supplier a supplier of elements - * @param limit maximum number of elements to attempt to insert - * @return number of elements successfully enqueued - */ - public int fill(@Nonnull Supplier supplier, int limit) { - if (limit <= 0) { - return 0; - } - - int added = 0; - while (added < limit) { - E e = supplier.get(); - if (e == null) { - break; // stop if supplier exhausted - } - - if (offer(e)) { - added++; - } else { - break; // queue is full - } - } - return added; - } - - /** - * Iterator is not supported. - * - * @throws UnsupportedOperationException always - */ - @Override - public Iterator iterator() { - throw new UnsupportedOperationException(); - } - - /** - * Returns the remaining capacity. - * - * @return number of additional elements this queue can accept - */ - public int remainingCapacity() { - return capacity - size(); - } - - /** - * Returns the maximum queue capacity. - * - * @return number of total elements this queue can accept - */ - public int capacity() { - return capacity; - } - - @Override - public void put(E e) throws InterruptedException { - throw new UnsupportedOperationException("Not implementing blocking operations for producers"); - } - - @Override - public E take() throws InterruptedException { - throw new UnsupportedOperationException("Not implementing blocking operations for consumers"); - } - - @Override - public int drainTo(Collection c) { - return drainTo(c, Integer.MAX_VALUE); - } - - @Override - public int drainTo(Collection c, int maxElements) { - return drain(c::add, maxElements); - } -} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java deleted file mode 100644 index ba4da512c6f..00000000000 --- a/internal-api/src/main/java/datadog/trace/util/queue/MpscArrayQueue.java +++ /dev/null @@ -1,135 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.atomic.AtomicLongFieldUpdater; -import java.util.concurrent.atomic.AtomicReferenceArray; -import java.util.concurrent.locks.LockSupport; - -/** - * Multiple-Producer, Single-Consumer (MPSC) bounded queue based on a circular array. - * - *

This queue is optimized for high-performance concurrent access where multiple threads - * (producers) can safely enqueue items concurrently, while a single thread (consumer) dequeues - * them. - * - *

Producers leverage a lock free CAS loop to win the race. Fields are padded to minimize cache - * line false sharing. - * - * @param the type of elements held in this queue - */ -public class MpscArrayQueue extends BaseQueue { - /** Array buffer to store the elements; uses AtomicReferenceArray for atomic slot updates */ - private final AtomicReferenceArray buffer; - - // Padding - @SuppressWarnings("unused") - private long p0, p1, p2, p3, p4, p5, p6; - - /** Tail index: the next slot to insert for producers */ - private volatile long tail = 0L; - - // Padding - @SuppressWarnings("unused") - private long q0, q1, q2, q3, q4, q5, q6; - - /** Atomic updater to perform lock-free CAS on tail */ - private static final AtomicLongFieldUpdater TAIL_UPDATER = - AtomicLongFieldUpdater.newUpdater(MpscArrayQueue.class, "tail"); - - // Padding - @SuppressWarnings("unused") - private long p10, p11, p12, p13, p14, p15, p16; - - /** Head index: the next slot to consume for the single consumer */ - private volatile long head = 0L; - - // Padding - @SuppressWarnings("unused") - private long q10, q11, q12, q13, q14, q15, q16; - - /** - * Creates a new MPSC queue with the specified capacity. Capacity will be rounded up to the next - * power of two for efficient modulo operations. - * - * @param capacity the desired maximum number of elements - */ - public MpscArrayQueue(int capacity) { - super(capacity); - this.buffer = new AtomicReferenceArray<>(this.capacity); - } - - /** - * Adds the specified element to the queue if space is available. - * - *

Multiple producers may safely call this concurrently. Uses a CAS loop to claim a slot and - * {@link AtomicReferenceArray#lazySet(Object)} to publish the element. If the queue is full, - * returns {@code false}. - * - * @param e the element to add - * @return {@code true} if successful, {@code false} if queue is full - * @throws NullPointerException if {@code e} is null - */ - @Override - public boolean offer(E e) { - if (e == null) { - throw new NullPointerException(); - } - - while (true) { - long currentTail = tail; - int index = (int) (currentTail & mask); - - // Check if slot is free - if (buffer.get(index) != null) { - return false; // queue full - } - - // Attempt to claim slot using CAS - if (TAIL_UPDATER.compareAndSet(this, currentTail, currentTail + 1)) { - // Use lazySet for release semantics (avoids full volatile write) - buffer.lazySet(index, e); - return true; - } - - // CAS failed, brief backoff to reduce contention - // Note: I found parkNanos more CPU friendly than Thread.yields - LockSupport.parkNanos(1); - } - } - - /** - * Removes and returns the head of the queue, or null if empty. - * - *

Only a single consumer may call this. Advances the head and frees the slot. - * - * @return the head element, or null if empty - */ - @Override - public E poll() { - long currentHead = head; - int index = (int) (currentHead & mask); - E value = buffer.get(index); - - if (value == null) { - return null; - } - - // Mark slot free with lazySet (release semantics) - buffer.lazySet(index, null); - head = currentHead + 1; // advance head - return value; - } - - /** Returns but does not remove the head element. */ - @Override - public E peek() { - int index = (int) (head & mask); - return buffer.get(index); - } - - @Override - public int size() { - long currentTail = tail; - long currentHead = head; - return (int) (currentTail - currentHead); - } -} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java deleted file mode 100644 index 5ad007b000b..00000000000 --- a/internal-api/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueue.java +++ /dev/null @@ -1,92 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.locks.Condition; -import java.util.concurrent.locks.ReentrantLock; - -/** - * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. - * - *

It features nonblocking offer/poll methods and blocking (condition based) take/put. - */ -public class MpscBlockingConsumerArrayQueue extends MpscArrayQueue { - // Blocking controls - private final ReentrantLock lock = new ReentrantLock(); - private final Condition notEmpty = lock.newCondition(); - private final Condition notFull = lock.newCondition(); - - public MpscBlockingConsumerArrayQueue(int capacity) { - super(capacity); - } - - @Override - public boolean offer(E e) { - final boolean success = super.offer(e); - if (success) { - signalNotEmpty(); - } - return success; - } - - public void put(E e) throws InterruptedException { - while (!offer(e)) { - awaitNotFull(); - } - } - - @Override - public E poll() { - final E ret = super.poll(); - if (ret != null) { - signalNotFull(); - } - return ret; - } - - public E take() throws InterruptedException { - E e; - while ((e = poll()) == null) { - awaitNotEmpty(); - } - return e; - } - - private void signalNotEmpty() { - lock.lock(); - try { - notEmpty.signal(); - } finally { - lock.unlock(); - } - } - - private void signalNotFull() { - lock.lock(); - try { - notFull.signal(); - } finally { - lock.unlock(); - } - } - - private void awaitNotEmpty() throws InterruptedException { - lock.lockInterruptibly(); - try { - while (isEmpty()) { - notEmpty.await(); - } - } finally { - lock.unlock(); - } - } - - private void awaitNotFull() throws InterruptedException { - lock.lockInterruptibly(); - try { - while (size() == capacity) { - notFull.await(); - } - } finally { - lock.unlock(); - } - } -} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java deleted file mode 100644 index f48ed1b6ff6..00000000000 --- a/internal-api/src/main/java/datadog/trace/util/queue/SpmcArrayQueue.java +++ /dev/null @@ -1,137 +0,0 @@ -package datadog.trace.util.queue; - -import java.util.concurrent.atomic.AtomicLongFieldUpdater; -import java.util.concurrent.atomic.AtomicReferenceArray; -import java.util.concurrent.locks.LockSupport; - -/** - * A Single-Producer, Multiple-Consumer (SPMC) bounded queue based on a circular array. - * - *

This queue allows one producer to enqueue items concurrently with multiple consumers dequeuing - * them. It is lock-free for the producer, and uses CAS on the consumer side to allow multiple - * consumers safely. - * - *

Internally, the queue maintains a padded head and tail index to minimize false sharing, and - * uses {@link AtomicReferenceArray} to store elements safely across threads. - * - * @param the type of elements held in this queue - */ -public class SpmcArrayQueue extends BaseQueue { - /** Array buffer storing elements */ - private final AtomicReferenceArray buffer; - - @SuppressWarnings("unused") - private long p0, p1, p2, p3, p4, p5, p6; - - /** Tail index: next slot to be written by producer */ - private volatile long tail = 0L; - - @SuppressWarnings("unused") - private long q0, q1, q2, q3, q4, q5, q6; - @SuppressWarnings("unused") - private long p10, p11, p12, p13, p14, p15, p16; - - /** Head index: next slot to be claimed by any consumer */ - private volatile long head = 0L; - - @SuppressWarnings("unused") - private long q10, q11, q12, q13, q14, q15, q16; - - /** CAS updater for head to allow multiple consumers to claim elements safely */ - private static final AtomicLongFieldUpdater HEAD_UPDATER = - AtomicLongFieldUpdater.newUpdater(SpmcArrayQueue.class, "head"); - - /** - * Constructs a new SPMC queue with the given capacity.. Capacity will be rounded up to the next - * power of two for efficient modulo operations. - * - * @param capacity the desired maximum number of elements - */ - public SpmcArrayQueue(int capacity) { - super(capacity); - this.buffer = new AtomicReferenceArray<>(this.capacity); - } - - /** - * Adds the specified element to the queue if space is available. - * - *

Only one producer is allowed. The producer uses simple volatile writes (lazySet) to publish - * elements, ensuring memory visibility for consumers. - * - * @param e the element to add - * @return true if the element was added, false if the queue is full - * @throws NullPointerException if the element is null - */ - @Override - public boolean offer(E e) { - if (e == null) throw new NullPointerException(); - - long currentTail = tail; - int index = (int) (currentTail & mask); - - if (buffer.get(index) != null) { - return false; // queue full - } - - // Producer increments tail first to claim the slot - tail = currentTail + 1; - - // Use lazySet to publish the element without forcing a full memory fence - buffer.lazySet(index, e); - return true; - } - - /** - * Removes and returns the head element of the queue, or {@code null} if empty. - * - *

Multiple consumers can safely call this concurrently. Each consumer uses CAS on the head - * index to claim a slot. Only the successful consumer sets the element to null. - * - * @return the head element, or {@code null} if the queue is empty - */ - @Override - public E poll() { - while (true) { - long currentHead = head; - int index = (int) (currentHead & mask); - E value = buffer.get(index); - - if (value == null) { - return null; // empty - } - - // CAS ensures only one consumer claims this slot - if (HEAD_UPDATER.compareAndSet(this, currentHead, currentHead + 1)) { - // mark slot free after claiming it - buffer.lazySet(index, null); - return value; - } - - // CAS failed: another consumer claimed it; retry - LockSupport.parkNanos(1); - } - } - - /** - * Returns, but does not remove, the head of the queue. - * - * @return the head element, or {@code null} if empty - */ - @Override - public E peek() { - int index = (int) (head & mask); - return buffer.get(index); - } - - /** - * Returns the number of elements in the queue. - * - *

Approximate: may not be exact under concurrent access. - * - * @return current size of the queue - */ - @Override - public int size() { - return (int) (tail - head); - } -} diff --git a/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java b/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java deleted file mode 100644 index 149d267c5b8..00000000000 --- a/internal-api/src/main/java/datadog/trace/util/queue/SpscArrayQueue.java +++ /dev/null @@ -1,111 +0,0 @@ -package datadog.trace.util.queue; - -/** - * A high-performance Single-Producer Single-Consumer (SPSC) bounded queue based on a circular - * array. - * - *

This queue is designed for scenarios where exactly one producer thread offers elements and one - * consumer thread polls elements. It uses a plain {@code Object[]} buffer with volatile head and - * tail indices and padded fields to minimize false sharing. - * - * @param element type - */ -public final class SpscArrayQueue extends BaseQueue { - private final Object[] buffer; - - // padding - @SuppressWarnings("unused") - private long p0, p1, p2, p3, p4, p5, p6; - - /** Producer index */ - private volatile long tail = 0L; - - // padding - @SuppressWarnings("unused") - private long q0, q1, q2, q3, q4, q5, q6; - - // padding - @SuppressWarnings("unused") - private long r0, r1, r2, r3, r4, r5, r6; - - /** Consumer index */ - private volatile long head = 0L; - - // padding - @SuppressWarnings("unused") - private long s0, s1, s2, s3, s4, s5, s6; - - /** - * Constructs a new bounded Single producer single consumer queue. - * - * @param capacity the maximum number of elements the queue can hold. Will be rounded to the next - * power of two if not yet. - */ - public SpscArrayQueue(int capacity) { - super(capacity); - this.buffer = new Object[this.capacity]; - } - - /** - * Attempts to add the specified element to this queue. Returns {@code false} if the queue is - * full. - * - * @param e element to add (must not be null) - * @return {@code true} if successfully added; {@code false} if full - */ - @Override - public boolean offer(E e) { - if (e == null) { - throw new NullPointerException(); - } - - final long currentTail = tail; - final int index = (int) (currentTail & mask); - - // Check if slot is still occupied — if so, queue is full - if (buffer[index] != null) { - return false; - } - - buffer[index] = e; // plain write (safe for SPSC) - tail = currentTail + 1; // volatile write to publish - return true; - } - - /** Retrieves and removes the head of this queue, or {@code null} if empty. */ - @Override - @SuppressWarnings("unchecked") - public E poll() { - final long currentHead = head; - final int index = (int) (currentHead & mask); - - final E e = (E) buffer[index]; - if (e == null) { - return null; - } - - buffer[index] = null; // mark slot as free (safe since we only have 1 consumer and 1 producer) - head = currentHead + 1; // volatile write to publish - return e; - } - - /** - * Retrieves, but does not remove, the head of this queue. - * - * @return the head element or {@code null} if empty - */ - @Override - @SuppressWarnings("unchecked") - public E peek() { - return (E) buffer[(int) (head & mask)]; - } - - /** - * Returns an approximation of the number of elements in this queue. This value may be imprecise - * due to concurrent updates. - */ - @Override - public int size() { - return (int) (tail - head); - } -} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy deleted file mode 100644 index f9c8a6a1c85..00000000000 --- a/internal-api/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy +++ /dev/null @@ -1,163 +0,0 @@ -package datadog.trace.util.queue - -import static java.util.concurrent.TimeUnit.NANOSECONDS - -import datadog.trace.test.util.DDSpecification -import java.util.concurrent.TimeUnit -import java.util.concurrent.atomic.AtomicBoolean -import java.util.function.Consumer - -abstract class AbstractQueueTest> extends DDSpecification { - abstract T createQueue(int capacity) - protected T queue = createQueue(8) - - def "offer and poll should preserve FIFO order"() { - when: - queue.offer(1) - queue.offer(2) - queue.offer(3) - - then: - queue.poll() == 1 - queue.poll() == 2 - queue.poll() == 3 - queue.poll() == null - } - - def "offer should return false when queue is full"() { - given: - queue.clear() - (1..8).each { queue.offer(it) } - - expect: - !queue.offer(999) - queue.size() == 8 - } - - def "peek should return head element without removing it"() { - given: - queue.clear() - queue.offer(10) - queue.offer(20) - - expect: - queue.peek() == 10 - queue.peek() == 10 - queue.size() == 2 - } - - def "poll should return null when empty"() { - given: - queue.clear() - - expect: - queue.poll() == null - } - - def "size should reflect current number of items"() { - when: - queue.clear() - queue.offer(1) - queue.offer(2) - - then: - queue.size() == 2 - - when: - queue.poll() - queue.poll() - - then: - queue.size() == 0 - } - - def "drain should consume all available elements"() { - given: - queue.clear() - (1..5).each { queue.offer(it) } - def drained = [] - - when: - def count = queue.drain({ drained << it } as Consumer) - - then: - count == 5 - drained == [1, 2, 3, 4, 5] - queue.isEmpty() - } - - def "drain with limit should only consume that many elements"() { - given: - queue.clear() - (1..6).each { queue.offer(it) } - def drained = [] - - when: - def count = queue.drain({ drained << it } as Consumer, 3) - - then: - count == 3 - drained == [1, 2, 3] - queue.size() == 3 - } - - def "remainingCapacity should reflect current occupancy"() { - given: - def q = new MpscArrayQueue(4) - q.offer(1) - q.offer(2) - - expect: - q.remainingCapacity() == 2 - - when: - q.poll() - - then: - q.remainingCapacity() == 3 - } - - - def "poll with timeout returns null if no element becomes available"() { - when: - def start = System.nanoTime() - def value = queue.poll(200, TimeUnit.MILLISECONDS) - def elapsedMs = NANOSECONDS.toMillis(System.nanoTime() - start) - - then: - value == null - elapsedMs >= 200 // waited approximately the timeout - } - - def "poll with zero timeout behaves like immediate poll"() { - expect: - queue.poll(0, TimeUnit.MILLISECONDS) == null - - when: - queue.offer(99) - - then: - queue.poll(0, TimeUnit.MILLISECONDS) == 99 - } - - def "poll throws InterruptedException if interrupted"() { - given: - def thrown = new AtomicBoolean() - def thread = Thread.start { - try { - queue.poll(500, TimeUnit.MILLISECONDS) - } catch (InterruptedException ie) { - thrown.set(true) - Thread.currentThread().interrupt() - } - } - - when: - Thread.sleep(50) - thread.interrupt() - thread.join() - - then: - thrown.get() - } -} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy deleted file mode 100644 index ef7d1e25094..00000000000 --- a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueTest.groovy +++ /dev/null @@ -1,61 +0,0 @@ -package datadog.trace.util.queue - - -import java.util.concurrent.CountDownLatch -import java.util.concurrent.Executors -import spock.lang.Timeout - -class MpscArrayQueueTest extends AbstractQueueTest { - - @Timeout(10) - def "multiple producers single consumer should consume all elements without duplication or loss"() { - given: - int total = 1000 - int producers = 4 - queue = new MpscArrayQueue<>(1024) - def results = Collections.synchronizedList([]) - def executor = Executors.newFixedThreadPool(producers) - def latch = new CountDownLatch(producers) - def consumerDone = new CountDownLatch(1) - - when: "multiple producers enqueue concurrently" - (1..producers).each { id -> - executor.submit { - for (int i = 0; i < total / producers; i++) { - int value = (id * 10000) + i - while (!queue.offer(value)) { - Thread.yield() - } - } - latch.countDown() - } - } - - and: "a single consumer drains all elements" - Thread consumer = new Thread({ - while (results.size() < total) { - def v = queue.poll() - if (v != null) { - results << v - } else { - Thread.yield() - } - } - consumerDone.countDown() - }) - consumer.start() - - latch.await() - consumerDone.await() - executor.shutdown() - - then: - results.size() == total - results.toSet().size() == total // all unique - } - - @Override - MpscArrayQueue createQueue(int capacity) { - return new MpscArrayQueue(capacity) - } -} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy deleted file mode 100644 index 06ca16e3741..00000000000 --- a/internal-api/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueTest.groovy +++ /dev/null @@ -1,197 +0,0 @@ -package datadog.trace.util.queue - - -import java.util.concurrent.CountDownLatch -import java.util.concurrent.TimeUnit -import java.util.concurrent.atomic.AtomicBoolean -import java.util.concurrent.atomic.AtomicReference -import java.util.function.Consumer -import java.util.function.Supplier -import spock.lang.Timeout - -class MpscBlockingConsumerArrayQueueTest extends AbstractQueueTest> { - - @Override - MpscBlockingConsumerArrayQueue createQueue(int capacity) { - return new MpscBlockingConsumerArrayQueue(capacity) - } - - def "put and take should block and release correctly"() { - given: - queue = new MpscBlockingConsumerArrayQueue<>(2) - def taken = new AtomicReference<>() - def latch = new CountDownLatch(1) - - when: - Thread.start { - taken.set(queue.take()) - latch.countDown() - } - - Thread.sleep(100) // ensure consumer is waiting - queue.put(42) - latch.await(1, TimeUnit.SECONDS) - - then: - taken.get() == 42 - queue.isEmpty() - } - - def "put should block when full until space is available"() { - given: - queue = new MpscBlockingConsumerArrayQueue<>(2) - queue.put(1) - queue.put(2) - def added = new AtomicBoolean(false) - - when: - Thread producer = Thread.start { - try { - queue.put(3) // should block until consumer polls - added.set(true) - } catch (InterruptedException ignore) { - } - } - - Thread.sleep(100) - assert !added.get() - queue.take() // frees one slot - producer.join(1000) - - then: - added.get() - queue.size() == 2 - } - - def "drain should consume all elements in order"() { - given: - queue.clear() - (1..5).each { queue.offer(it) } - def drained = [] - - when: - def count = queue.drain({ drained << it } as Consumer) - - then: - count == 5 - drained == [1, 2, 3, 4, 5] - queue.isEmpty() - } - - def "drain with limit should consume only limited number"() { - given: - queue.clear() - (1..6).each { queue.offer(it) } - def drained = [] - - when: - def count = queue.drain({ drained << it } as Consumer, 3) - - then: - count == 3 - drained == [1, 2, 3] - queue.size() == 3 - } - - @Timeout(10) - def "multiple producers single consumer should consume all elements without duplicates"() { - given: - int total = 1000 - int producers = 4 - queue = new MpscBlockingConsumerArrayQueue<>(1024) - def results = Collections.synchronizedList([]) - def latch = new CountDownLatch(producers) - - when: - // Multiple producers - (1..producers).each { id -> - Thread.start { - for (int i = 0; i < total / producers; i++) { - int val = id * 10_000 + i - while (!queue.offer(val)) { - Thread.yield() - } - } - latch.countDown() - } - } - - // Single consumer - Thread consumer = Thread.start { - while (results.size() < total) { - def v = queue.poll() - if (v != null) { - results << v - } - else { - Thread.yield() - } - } - } - - latch.await() - consumer.join() - - then: - results.size() == total - results.toSet().size() == total // all unique - } - - def "blocking take should wake up when producer offers"() { - given: - queue = new MpscBlockingConsumerArrayQueue<>(4) - def result = new AtomicReference<>() - - when: - Thread consumer = Thread.start { - try { - result.set(queue.take()) - } catch (InterruptedException ignored) { - } - } - Thread.sleep(100) - queue.offer(123) - consumer.join(1000) - - then: - result.get() == 123 - queue.isEmpty() - } - - def "blocking put should wake up when consumer takes"() { - given: - queue = new MpscBlockingConsumerArrayQueue<>(1) - queue.put(1) - def done = new AtomicBoolean(false) - - when: - Thread producer = Thread.start { - try { - queue.put(2) // blocks until consumer takes - done.set(true) - } catch (InterruptedException ignored) { - } - } - - Thread.sleep(100) - queue.take() - producer.join(1000) - - then: - done.get() - queue.size() == 1 - } - - def "fill inserts up to capacity"() { - given: - def counter = 0 - def supplier = { counter < 10 ? counter++ : null } as Supplier - - when: - def filled = queue.fill(supplier, 10) - - then: - filled == 8 - queue.size() == 8 - } -} diff --git a/internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy b/internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy deleted file mode 100644 index 90c5661f7fa..00000000000 --- a/internal-api/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueTest.groovy +++ /dev/null @@ -1,59 +0,0 @@ -package datadog.trace.util.queue - - -import java.util.concurrent.CountDownLatch -import java.util.concurrent.Executors -import spock.lang.Timeout - -class SpmcArrayQueueTest extends AbstractQueueTest> { - - @Override - SpmcArrayQueue createQueue(int capacity) { - return new SpmcArrayQueue(capacity) - } - - @Timeout(10) - def "single producer multiple consumers should consume all elements without duplication or loss"() { - given: - int total = 1000 - int consumers = 4 - queue = new SpmcArrayQueue<>(1024) - def results = Collections.synchronizedList([]) - def executor = Executors.newFixedThreadPool(consumers) - def latch = new CountDownLatch(consumers) - - when: "one producer fills the queue" - Thread producer = new Thread({ - for (int i = 0; i < total; i++) { - while (!queue.offer(i)) { - Thread.yield() - } - } - }) - producer.start() - - and: "multiple consumers drain concurrently" - (1..consumers).each { - executor.submit { - while (results.size() < total) { - def v = queue.poll() - if (v != null) { - results << v - } else { - Thread.yield() - } - } - latch.countDown() - } - } - - latch.await() - producer.join() - executor.shutdown() - - then: - results.size() == total - results.toSet().size() == total // no duplicates - results.containsAll((0..> { - - def "single producer single consumer concurrency"() { - given: - def queue = new SpscArrayQueue(1024) - def producerCount = 1000 - def consumed = new AtomicInteger(0) - def consumedValues = [] - - def producer = Thread.start { - (1..producerCount).each { queue.offer(it) } - } - - def consumer = Thread.start { - while (consumed.get() < producerCount) { - def v = queue.poll() - if (v != null) { - consumedValues << v - consumed.incrementAndGet() - } - } - } - - when: - producer.join() - consumer.join() - - then: - consumed.get() == producerCount - consumedValues.toSet().size() == producerCount // all values unique - } - - @Override - SpscArrayQueue createQueue(int capacity) { - return new SpscArrayQueue(capacity) - } -} From 291544ca84d228ab6408e5075723b1235e96c6c1 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Thu, 6 Nov 2025 09:41:44 +0100 Subject: [PATCH 07/18] Use a better time wait poll --- ...scBlockingConsumerArrayQueueVarHandle.java | 39 ++++++-------- .../trace/util/queue/AbstractQueueTest.groovy | 51 +------------------ ...kingConsumerArrayQueueVarHandleTest.groovy | 46 +++++++++++++++++ 3 files changed, 64 insertions(+), 72 deletions(-) diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java index f0f783f6f5d..71485e55a48 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -98,7 +98,7 @@ public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws Interrupt } /** - * Polls with a timeout using progressive backoff. + * Polls with a timeout. * * @param timeout max wait time * @param unit time unit @@ -107,32 +107,25 @@ public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws Interrupt */ public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - while (true) { - E value = poll(); - if (value != null) { - return value; - } + E e = poll(); + if (e != null) { + return e; + } - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return null; - } + // register this thread as the waiting consumer + consumerThread = Thread.currentThread(); + final long remaining = deadline - System.nanoTime(); - // Progressive backoff - if (idleCount < 100) { - // spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - idleCount++; + if (remaining <= 0) { + consumerThread = null; + return null; + } - if (Thread.interrupted()) { - throw new InterruptedException(); - } + LockSupport.parkNanos(this, remaining); + if (Thread.interrupted()) { + throw new InterruptedException(); } + return poll(); } } diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy index f9c8a6a1c85..6ba053c9a6f 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy @@ -1,13 +1,10 @@ package datadog.trace.util.queue -import static java.util.concurrent.TimeUnit.NANOSECONDS import datadog.trace.test.util.DDSpecification -import java.util.concurrent.TimeUnit -import java.util.concurrent.atomic.AtomicBoolean import java.util.function.Consumer -abstract class AbstractQueueTest> extends DDSpecification { +abstract class AbstractQueueTest> extends DDSpecification { abstract T createQueue(int capacity) protected T queue = createQueue(8) @@ -103,7 +100,7 @@ abstract class AbstractQueueTest> extends DDSpe def "remainingCapacity should reflect current occupancy"() { given: - def q = new MpscArrayQueue(4) + def q = createQueue(4) q.offer(1) q.offer(2) @@ -116,48 +113,4 @@ abstract class AbstractQueueTest> extends DDSpe then: q.remainingCapacity() == 3 } - - - def "poll with timeout returns null if no element becomes available"() { - when: - def start = System.nanoTime() - def value = queue.poll(200, TimeUnit.MILLISECONDS) - def elapsedMs = NANOSECONDS.toMillis(System.nanoTime() - start) - - then: - value == null - elapsedMs >= 200 // waited approximately the timeout - } - - def "poll with zero timeout behaves like immediate poll"() { - expect: - queue.poll(0, TimeUnit.MILLISECONDS) == null - - when: - queue.offer(99) - - then: - queue.poll(0, TimeUnit.MILLISECONDS) == 99 - } - - def "poll throws InterruptedException if interrupted"() { - given: - def thrown = new AtomicBoolean() - def thread = Thread.start { - try { - queue.poll(500, TimeUnit.MILLISECONDS) - } catch (InterruptedException ie) { - thrown.set(true) - Thread.currentThread().interrupt() - } - } - - when: - Thread.sleep(50) - thread.interrupt() - thread.join() - - then: - thrown.get() - } } diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy index fe02e6bd297..8a14534d8e3 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy @@ -1,7 +1,10 @@ package datadog.trace.util.queue +import static java.util.concurrent.TimeUnit.NANOSECONDS import java.util.concurrent.CountDownLatch +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicBoolean import java.util.concurrent.atomic.AtomicReference import java.util.function.Consumer import java.util.function.Supplier @@ -121,4 +124,47 @@ class MpscBlockingConsumerArrayQueueVarHandleTest extends AbstractQueueTest= 200 // waited approximately the timeout + } + + def "poll with zero timeout behaves like immediate poll"() { + expect: + queue.poll(0, TimeUnit.MILLISECONDS) == null + + when: + queue.offer(99) + + then: + queue.poll(0, TimeUnit.MILLISECONDS) == 99 + } + + def "poll throws InterruptedException if interrupted"() { + given: + def thrown = new AtomicBoolean() + def thread = Thread.start { + try { + queue.poll(500, TimeUnit.MILLISECONDS) + } catch (InterruptedException ie) { + thrown.set(true) + Thread.currentThread().interrupt() + } + } + + when: + Thread.sleep(50) + thread.interrupt() + thread.join() + + then: + thrown.get() + } } From aaa0803de37582e7794cd64e1cb20a2cdf679306 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Thu, 6 Nov 2025 23:23:12 +0100 Subject: [PATCH 08/18] try to improve the fairness --- dd-java-agent/agent-llmobs/build.gradle | 2 +- internal-api/build.gradle.kts | 2 - internal-api/internal-api-9/build.gradle.kts | 2 +- .../MPSCBlockingConsumerQueueBenchmark.java} | 53 ++++---- .../queue/MPSCQueueBenchmark.java | 18 ++- .../queue/SPSCQueueBenchmark.java | 3 +- .../BlockingConsumerNonBlockingQueue.java | 4 - ...toolsMpscBlockingConsumerWrappedQueue.java | 88 +------------ .../util/queue/MpscArrayQueueVarHandle.java | 43 +++---- ...scBlockingConsumerArrayQueueVarHandle.java | 121 +++++++----------- .../util/queue/SpmcArrayQueueVarHandle.java | 5 +- .../util/queue/JctoolsWrapppersTest.groovy | 42 ++++++ 12 files changed, 153 insertions(+), 230 deletions(-) rename internal-api/internal-api-9/src/jmh/java/datadog/trace/util/{stacktrace/queue/SPMCQueueBenchmark.java => queue/MPSCBlockingConsumerQueueBenchmark.java} (54%) rename internal-api/internal-api-9/src/jmh/java/datadog/trace/util/{stacktrace => }/queue/MPSCQueueBenchmark.java (78%) rename internal-api/internal-api-9/src/jmh/java/datadog/trace/util/{stacktrace => }/queue/SPSCQueueBenchmark.java (95%) create mode 100644 internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy diff --git a/dd-java-agent/agent-llmobs/build.gradle b/dd-java-agent/agent-llmobs/build.gradle index 0edfdbad404..09a6f540f7f 100644 --- a/dd-java-agent/agent-llmobs/build.gradle +++ b/dd-java-agent/agent-llmobs/build.gradle @@ -29,7 +29,7 @@ dependencies { implementation project(':communication') implementation project(':components:json') implementation project(':internal-api') - api project(':internal-api:internal-api-9') + implementation project(':internal-api:internal-api-9') testImplementation project(':dd-java-agent:testing') diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts index be1183f9da7..f05ab8f0b2a 100644 --- a/internal-api/build.gradle.kts +++ b/internal-api/build.gradle.kts @@ -276,8 +276,6 @@ dependencies { // it contains annotations that are also present in the instrumented application classes api("com.datadoghq:dd-javac-plugin-client:0.2.2") - jmhImplementation(libs.jctools) - testImplementation("org.snakeyaml:snakeyaml-engine:2.9") testImplementation(project(":utils:test-utils")) testImplementation(libs.bundles.junit5) diff --git a/internal-api/internal-api-9/build.gradle.kts b/internal-api/internal-api-9/build.gradle.kts index 374b343d34a..ebe6d0ab048 100644 --- a/internal-api/internal-api-9/build.gradle.kts +++ b/internal-api/internal-api-9/build.gradle.kts @@ -39,7 +39,7 @@ val minimumInstructionCoverage by extra(0.8) dependencies { api(project(":internal-api")) - api(libs.jctools) // probably the Queues factory should be moved away from there + implementation(libs.jctools) // probably the Queues factory should be moved away from there testImplementation(project(":dd-java-agent:testing")) testImplementation(libs.slf4j) diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCBlockingConsumerQueueBenchmark.java similarity index 54% rename from internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java rename to internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCBlockingConsumerQueueBenchmark.java index 29a84b43501..ab71345c8cf 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPMCQueueBenchmark.java +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCBlockingConsumerQueueBenchmark.java @@ -1,6 +1,5 @@ -package datadog.trace.util.stacktrace.queue; +package datadog.trace.util.queue; -import datadog.trace.util.queue.SpmcArrayQueueVarHandle; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; @@ -20,58 +19,62 @@ import org.openjdk.jmh.infra.Blackhole; /* -MPSCQueueBenchmark.queueTest 1024 thrpt 145.261 ops/us -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 84.185 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 61.076 ops/us -MPSCQueueBenchmark.queueTest 65536 thrpt 187.609 ops/us -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 117.097 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 70.512 ops/us - */ +Benchmark (capacity) Mode Cnt Score Error Units +MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 121.534 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:async 1024 thrpt NaN --- +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 110.962 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 10.572 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 126.856 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:async 65536 thrpt NaN --- +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 113.213 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 13.644 ops/us +*/ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) @Measurement(iterations = 1, time = 30) @Fork(1) @OutputTimeUnit(TimeUnit.MICROSECONDS) @State(Scope.Benchmark) -public class SPMCQueueBenchmark { +public class MPSCBlockingConsumerQueueBenchmark { @State(Scope.Group) public static class QueueState { - SpmcArrayQueueVarHandle queue; - CountDownLatch producerReady; + MpscBlockingConsumerArrayQueueVarHandle queue; + CountDownLatch consumerReady; @Param({"1024", "65536"}) int capacity; @Setup(Level.Iteration) public void setup() { - queue = new SpmcArrayQueueVarHandle<>(capacity); - producerReady = new CountDownLatch(1); + queue = new MpscBlockingConsumerArrayQueueVarHandle<>(capacity); + consumerReady = new CountDownLatch(1); } } @Benchmark @Group("queueTest") @GroupThreads(4) - public void consume(QueueState state, Blackhole bh) { + public void produce(QueueState state) { try { - state.producerReady.await(); // wait until consumer is ready + state.consumerReady.await(); // wait until consumer is ready } catch (InterruptedException ignored) { } - Integer v = state.queue.poll(); - if (v != null) { - bh.consume(v); + + // bounded attempt: try once, then yield if full + boolean offered = state.queue.offer(0); + if (!offered) { + Thread.yield(); } } @Benchmark @Group("queueTest") @GroupThreads(1) - public void produce(QueueState state) { - state.producerReady.countDown(); // signal consumers can start - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); + public void consume(QueueState state, Blackhole bh) { + state.consumerReady.countDown(); // signal producers can start + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); } } } diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java similarity index 78% rename from internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java rename to internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java index f0e15c4350d..b7dc3b45f76 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/MPSCQueueBenchmark.java +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java @@ -1,6 +1,5 @@ -package datadog.trace.util.stacktrace.queue; +package datadog.trace.util.queue; -import datadog.trace.util.queue.MpscArrayQueueVarHandle; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; @@ -21,15 +20,14 @@ /* Benchmark (capacity) Mode Cnt Score Error Units -MPSCQueueBenchmark.queueTest 65536 thrpt 208.469 ops/us -MPSCQueueBenchmark.queueTest:async 65536 thrpt NaN --- -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 199.309 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 9.161 ops/us -MPSCQueueBenchmark.queueTest 1024 thrpt 195.200 ops/us +MPSCQueueBenchmark.queueTest 1024 thrpt 146.530 ops/us MPSCQueueBenchmark.queueTest:async 1024 thrpt NaN --- -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 185.929 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 9.272 ops/us - */ +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 108.357 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 38.172 ops/us +MPSCQueueBenchmark.queueTest 65536 thrpt 179.177 ops/us +MPSCQueueBenchmark.queueTest:async 65536 thrpt NaN --- +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 140.968 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 38.209 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) @Measurement(iterations = 1, time = 30) diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java similarity index 95% rename from internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java rename to internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java index 26db96ef1a0..6e829f43dc6 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/stacktrace/queue/SPSCQueueBenchmark.java +++ b/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java @@ -1,6 +1,5 @@ -package datadog.trace.util.stacktrace.queue; +package datadog.trace.util.queue; -import datadog.trace.util.queue.SpscArrayQueueVarHandle; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java index ae16a042f20..264453c03fa 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java @@ -4,11 +4,7 @@ import javax.annotation.Nonnull; public interface BlockingConsumerNonBlockingQueue extends NonBlockingQueue { - boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException; - E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException; - void put(E e) throws InterruptedException; - E take() throws InterruptedException; } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java index 6c69615200d..a5381167ca7 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java @@ -2,7 +2,6 @@ import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; -import java.util.concurrent.locks.LockSupport; import javax.annotation.Nonnull; import org.jctools.queues.MpscBlockingConsumerArrayQueue; @@ -17,98 +16,13 @@ public JctoolsMpscBlockingConsumerWrappedQueue( this.blockingQueueDelegate = delegate; } - @Override - public void put(E e) throws InterruptedException { - blockingQueueDelegate.put(e); - } - @Override public E take() throws InterruptedException { return blockingQueueDelegate.take(); } - /** - * Timed offer with progressive backoff. - * - *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → - * park backoff strategy to reduce CPU usage under contention. - * - * @param e the element to insert - * @param timeout maximum time to wait - * @param unit time unit of timeout - * @return {@code true} if inserted, {@code false} if timeout expires - * @throws InterruptedException if interrupted while waiting - */ - @Override - public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - if (offer(e)) { - return true; // successfully inserted - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return false; // timeout - } - - // Progressive backoff - if (idleCount < 100) { - // spin (busy-wait) - } else if (idleCount < 1_000) { - Thread.yield(); // give up CPU to other threads - } else { - // park for a short duration, up to 1 ms - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } - } - - /** - * Polls with a timeout using progressive backoff. - * - * @param timeout max wait time - * @param unit time unit - * @return the head element, or null if timed out - * @throws InterruptedException if interrupted - */ @Override public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - E value = poll(); - if (value != null) { - return value; - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return null; - } - - // Progressive backoff - if (idleCount < 100) { - // spin - } else if (idleCount < 1_000) { - Thread.yield(); - } else { - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } + return blockingQueueDelegate.poll(timeout, unit); } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java index 759e82ae97b..7ab57801698 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java @@ -4,6 +4,7 @@ import java.lang.invoke.MethodHandles.Lookup; import java.lang.invoke.VarHandle; import java.util.Objects; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.locks.LockSupport; /** @@ -85,23 +86,6 @@ public boolean offer(E e) { // jctools does the same local copy to have the jitter optimise the accesses final Object[] localBuffer = this.buffer; - // depending on the thread id, choose a different backoff strategy. - // Note: it reduces fairness but also the contention on the cas. - boolean s0 = false, s1 = false, s2 = false; - switch ((int) (Thread.currentThread().getId() & 3)) { - case 0: - s0 = true; - break; - case 1: - s1 = true; - break; - case 2: - s2 = true; - break; - default: - break; - } - long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); long cachedHead = 0L; // Local cache of head to reduce volatile reads @@ -114,7 +98,9 @@ public boolean offer(E e) { cachedHead = (long) HEAD_HANDLE.getVolatile(this); localProducerLimit = cachedHead + capacity; - if (currentTail >= localProducerLimit) return false; // queue full + if (currentTail >= localProducerLimit) { + return false; // queue full + } // Update producerLimit so other producers also benefit PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); @@ -130,9 +116,20 @@ public boolean offer(E e) { } // Backoff to reduce contention - if (s0) Thread.onSpinWait(); - else if (s1) Thread.yield(); - else if (s2) LockSupport.parkNanos(1); + switch (ThreadLocalRandom.current().nextInt(0, 4)) { + case 0: + Thread.yield(); + break; + case 1: + LockSupport.parkNanos(1); + break; + case 2: + Thread.onSpinWait(); + break; + default: + // busy spin + break; + } } } @@ -151,7 +148,9 @@ public E poll() { // Acquire-load ensures visibility of producer write Object value = ARRAY_HANDLE.getAcquire(localBuffer, index); - if (value == null) return null; + if (value == null) { + return null; + } // Clear the slot without additional fence ARRAY_HANDLE.setOpaque(localBuffer, index, null); diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java index 71485e55a48..6b2b292469b 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -5,9 +5,9 @@ import javax.annotation.Nonnull; /** - * JCtools-like MpscBlockingConsumerArrayQueue implemented without Unsafe. + * A MPSC Array queue offering blocking methods (take and timed poll) for a single consumer. * - *

It features nonblocking offer/poll methods and blocking (condition based) take/put. + *

The wait is performed by parking/unparking the consumer thread. */ public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle implements BlockingConsumerNonBlockingQueue { @@ -22,15 +22,15 @@ public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { public boolean offer(E e) { final boolean success = super.offer(e); if (success) { - Thread c = consumerThread; - if (c != null) LockSupport.unpark(c); + try { + final Thread c = consumerThread; + LockSupport.unpark(c); // unpark is safe if the arg is null + } finally { + consumerThread = null; + } } - return success; - } - public void put(E e) throws InterruptedException { - // in this variant we should not use a blocking put since we do not support blocking producers - throw new UnsupportedOperationException(); + return success; } /** @@ -39,62 +39,14 @@ public void put(E e) throws InterruptedException { * @return the next element (never null) * @throws InterruptedException if interrupted while waiting */ + @Override public E take() throws InterruptedException { consumerThread = Thread.currentThread(); - while (true) { - E e = poll(); - if (e != null) return e; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } - // Block until producer unparks us - LockSupport.park(this); - } - } - - /** - * Timed offer with progressive backoff. - * - *

Tries to insert an element into the queue within the given timeout. Uses a spin → yield → - * park backoff strategy to reduce CPU usage under contention. - * - * @param e the element to insert - * @param timeout maximum time to wait - * @param unit time unit of timeout - * @return {@code true} if inserted, {@code false} if timeout expires - * @throws InterruptedException if interrupted while waiting - */ - public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - int idleCount = 0; - - while (true) { - if (offer(e)) { - return true; // successfully inserted - } - - long remaining = deadline - System.nanoTime(); - if (remaining <= 0) { - return false; // timeout - } - - // Progressive backoff - if (idleCount < 100) { - // spin (busy-wait) - } else if (idleCount < 1_000) { - Thread.yield(); // give up CPU to other threads - } else { - // park for a short duration, up to 1 ms - LockSupport.parkNanos(Math.min(remaining, 1_000_000L)); - } - - idleCount++; - - if (Thread.interrupted()) { - throw new InterruptedException(); - } + E e; + while ((e = poll()) != null) { + parkUntilNext(-1); } + return e; } /** @@ -105,27 +57,48 @@ public boolean offer(E e, long timeout, @Nonnull TimeUnit unit) throws Interrupt * @return the head element, or null if timed out * @throws InterruptedException if interrupted */ + @Override public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { - final long deadline = System.nanoTime() + unit.toNanos(timeout); - E e = poll(); if (e != null) { return e; } - // register this thread as the waiting consumer - consumerThread = Thread.currentThread(); - final long remaining = deadline - System.nanoTime(); - - if (remaining <= 0) { - consumerThread = null; + final long parkNanos = unit.toNanos(timeout); + if (parkNanos <= 0) { return null; } - LockSupport.parkNanos(this, remaining); - if (Thread.interrupted()) { - throw new InterruptedException(); - } + parkUntilNext(parkNanos); + return poll(); } + + /** + * Blocks (parks) until an element becomes available or until the specified timeout elapses. + * + *

It is safe if only one thread is waiting (it's the case for this single consumer + * implementation). + * + * @param nanos max wait time in nanoseconds. If negative, it will park indefinably until waken or + * interrupted + * @throws InterruptedException if interrupted + */ + private void parkUntilNext(long nanos) throws InterruptedException { + try { + // register this thread as the waiting consumer + consumerThread = Thread.currentThread(); + if (nanos <= 0) { + LockSupport.park(this); + } else { + LockSupport.parkNanos(this, nanos); + } + if (Thread.interrupted()) { + throw new InterruptedException(); + } + } finally { + // free the variable not to reference the consumer thread anymore + consumerThread = null; + } + } } diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java index b5ac23abec0..2f5b28fc8c9 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java @@ -78,7 +78,9 @@ public boolean offer(E e) { long wrapPoint = currentTail - capacity; long currentHead = (long) HEAD_HANDLE.getVolatile(this); - if (wrapPoint >= currentHead) return false; // queue full + if (wrapPoint >= currentHead) { + return false; // queue full + } int index = (int) (currentTail & mask); @@ -126,7 +128,6 @@ public E poll() { ARRAY_HANDLE.setOpaque(localBuffer, index, null); return (E) value; } - // CAS failed, retry loop } } diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy new file mode 100644 index 00000000000..f6a82b911e0 --- /dev/null +++ b/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy @@ -0,0 +1,42 @@ +package datadog.trace.util.queue + +import datadog.trace.test.util.DDSpecification +import java.util.concurrent.TimeUnit +import java.util.function.Consumer +import java.util.function.Supplier +import org.jctools.queues.MpscBlockingConsumerArrayQueue + +class JctoolsWrapppersTest extends DDSpecification { + + def "should wrap the method #method to the jctools delegate #wrapperClass"() { + setup: + // will work for both wrapper classes + def delegate = Mock(MpscBlockingConsumerArrayQueue) + def queue = wrapperClass.newInstance(delegate) as NonBlockingQueue + + when: + queue.invokeMethod(method, args.toArray()) + + then: + 1 * delegate."$method"(*_) + + where: + method | args | wrapperClass + "poll" | [] | JctoolsWrappedQueue + "offer" | ["test"] | JctoolsWrappedQueue + "capacity" | [] | JctoolsWrappedQueue + "peek" | [] | JctoolsWrappedQueue + "drain" | [Mock(Consumer)] | JctoolsWrappedQueue + "drain" | [Mock(Consumer), 1] | JctoolsWrappedQueue + "fill" | [Mock(Supplier), 1] | JctoolsWrappedQueue + "poll" | [] | JctoolsMpscBlockingConsumerWrappedQueue + "offer" | ["test"] | JctoolsMpscBlockingConsumerWrappedQueue + "capacity" | [] | JctoolsMpscBlockingConsumerWrappedQueue + "peek" | [] | JctoolsMpscBlockingConsumerWrappedQueue + "drain" | [Mock(Consumer)] | JctoolsMpscBlockingConsumerWrappedQueue + "drain" | [Mock(Consumer), 1] | JctoolsMpscBlockingConsumerWrappedQueue + "fill" | [Mock(Supplier), 1] | JctoolsMpscBlockingConsumerWrappedQueue + "poll" | [1, TimeUnit.SECONDS] | JctoolsMpscBlockingConsumerWrappedQueue + "take" | [] | JctoolsMpscBlockingConsumerWrappedQueue + } +} From a69d8b29c87ee7776ee5f0164507b66279454336 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Fri, 7 Nov 2025 09:07:18 +0100 Subject: [PATCH 09/18] Simplify and make more fair the spin wait --- .../util/queue/MpscArrayQueueVarHandle.java | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java index 7ab57801698..fdddb91e0a2 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java @@ -4,7 +4,6 @@ import java.lang.invoke.MethodHandles.Lookup; import java.lang.invoke.VarHandle; import java.util.Objects; -import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.locks.LockSupport; /** @@ -89,6 +88,9 @@ public boolean offer(E e) { long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); long cachedHead = 0L; // Local cache of head to reduce volatile reads + int spinCycles = 0; + boolean parkOnSpin = (Thread.currentThread().getId() & 1) == 0; + while (true) { long currentTail = (long) TAIL_HANDLE.getVolatile(this); @@ -116,20 +118,16 @@ public boolean offer(E e) { } // Backoff to reduce contention - switch (ThreadLocalRandom.current().nextInt(0, 4)) { - case 0: - Thread.yield(); - break; - case 1: + if ((spinCycles & 1) == 0) { + Thread.onSpinWait(); + } else { + if (parkOnSpin) { LockSupport.parkNanos(1); - break; - case 2: - Thread.onSpinWait(); - break; - default: - // busy spin - break; + } else { + Thread.yield(); + } } + spinCycles++; } } From b8d0af16dbc8e1f8ba63bc7b933ff075a4e50930 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Fri, 7 Nov 2025 11:24:57 +0100 Subject: [PATCH 10/18] Fix the wait condition --- .../util/queue/MpscBlockingConsumerArrayQueueVarHandle.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java index 6b2b292469b..bfd79d39efa 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -43,7 +43,7 @@ public boolean offer(E e) { public E take() throws InterruptedException { consumerThread = Thread.currentThread(); E e; - while ((e = poll()) != null) { + while ((e = poll()) == null) { parkUntilNext(-1); } return e; From 16dcf621f36d61a65ed78eece59796d3188c7001 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Fri, 7 Nov 2025 14:16:26 +0100 Subject: [PATCH 11/18] Use getAndAdd for the offer fast path --- .../util/queue/MpscArrayQueueVarHandle.java | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java index fdddb91e0a2..3c5d6e0a3ae 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java +++ b/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java @@ -82,9 +82,7 @@ public MpscArrayQueueVarHandle(int requestedCapacity) { public boolean offer(E e) { Objects.requireNonNull(e); - // jctools does the same local copy to have the jitter optimise the accesses final Object[] localBuffer = this.buffer; - long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); long cachedHead = 0L; // Local cache of head to reduce volatile reads @@ -94,9 +92,8 @@ public boolean offer(E e) { while (true) { long currentTail = (long) TAIL_HANDLE.getVolatile(this); - // Check if producer limit exceeded + // Slow path: refresh producer limit when queue is near full if (currentTail >= localProducerLimit) { - // Refresh head only when necessary cachedHead = (long) HEAD_HANDLE.getVolatile(this); localProducerLimit = cachedHead + capacity; @@ -104,23 +101,36 @@ public boolean offer(E e) { return false; // queue full } - // Update producerLimit so other producers also benefit PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); } - // Attempt to claim a slot + long freeSlots = localProducerLimit - currentTail; + + // Fast path: getAndAdd if occupancy < 75% + if (freeSlots > (long) (capacity * 0.25)) { // more than 25% free + long slot = (long) TAIL_HANDLE.getAndAdd(this, 1L); + final int index = (int) (slot & mask); + + // Release-store ensures visibility to consumer + ARRAY_HANDLE.setRelease(localBuffer, index, e); + return true; + } + + // Slow path: CAS near limit if (TAIL_HANDLE.compareAndSet(this, currentTail, currentTail + 1)) { final int index = (int) (currentTail & mask); - // Release-store ensures producer's write is visible to consumer + // Release-store ensures visibility to consumer ARRAY_HANDLE.setRelease(localBuffer, index, e); return true; } // Backoff to reduce contention if ((spinCycles & 1) == 0) { + // spin each even cycles Thread.onSpinWait(); } else { + // use a 'random' alternate backoff on odd cycles if (parkOnSpin) { LockSupport.parkNanos(1); } else { From b7e8b60af6da1ff14b5c62b65b1bc69755aaf8b8 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Fri, 7 Nov 2025 15:44:54 +0100 Subject: [PATCH 12/18] Move to queue-utils --- dd-java-agent/agent-llmobs/build.gradle | 2 +- .../trace/llmobs/EvalProcessingWorker.java | 4 +- dd-java-agent/build.gradle | 3 ++ dd-trace-core/build.gradle | 2 +- .../trace/common/metrics/Aggregator.java | 2 +- .../metrics/ConflatingMetricsAggregator.java | 4 +- .../trace/common/metrics/OkHttpSink.java | 4 +- .../common/writer/SpanSamplingWorker.java | 6 +-- .../common/writer/TraceProcessingWorker.java | 6 +-- .../trace/core/PendingTraceBuffer.java | 4 +- .../DefaultDataStreamsMonitoring.java | 4 +- gradle/dependencies.gradle | 1 + internal-api/internal-api-9/build.gradle.kts | 1 - settings.gradle.kts | 1 + utils/queue-utils/build.gradle.kts | 51 +++++++++++++++++++ .../MPSCBlockingConsumerQueueBenchmark.java | 2 +- .../common}/queue/MPSCQueueBenchmark.java | 2 +- .../common}/queue/SPSCQueueBenchmark.java | 2 +- .../java/datadog/common}/queue/BaseQueue.java | 4 +- .../BlockingConsumerNonBlockingQueue.java | 2 +- ...toolsMpscBlockingConsumerWrappedQueue.java | 4 +- .../common}/queue/JctoolsWrappedQueue.java | 6 +-- .../queue/MpscArrayQueueVarHandle.java | 4 +- ...scBlockingConsumerArrayQueueVarHandle.java | 4 +- .../common}/queue/NonBlockingQueue.java | 2 +- .../java/datadog/common}/queue/Queues.java | 2 +- .../queue/SpmcArrayQueueVarHandle.java | 4 +- .../queue/SpscArrayQueueVarHandle.java | 4 +- .../common}/queue/AbstractQueueTest.groovy | 2 +- .../common}/queue/JctoolsWrapppersTest.groovy | 2 +- .../queue/MpscArrayQueueVarHandleTest.groovy | 2 +- ...kingConsumerArrayQueueVarHandleTest.groovy | 2 +- .../queue/SpmcArrayQueueVarHandleTest.groovy | 2 +- .../queue/SpscArrayQueueVarHandleTest.groovy | 2 +- 34 files changed, 101 insertions(+), 48 deletions(-) create mode 100644 utils/queue-utils/build.gradle.kts rename {internal-api/internal-api-9/src/jmh/java/datadog/trace/util => utils/queue-utils/src/jmh/java/datadog/common}/queue/MPSCBlockingConsumerQueueBenchmark.java (98%) rename {internal-api/internal-api-9/src/jmh/java/datadog/trace/util => utils/queue-utils/src/jmh/java/datadog/common}/queue/MPSCQueueBenchmark.java (98%) rename {internal-api/internal-api-9/src/jmh/java/datadog/trace/util => utils/queue-utils/src/jmh/java/datadog/common}/queue/SPSCQueueBenchmark.java (98%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/BaseQueue.java (95%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/BlockingConsumerNonBlockingQueue.java (88%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/JctoolsMpscBlockingConsumerWrappedQueue.java (85%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/JctoolsWrappedQueue.java (88%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/MpscArrayQueueVarHandle.java (98%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/MpscBlockingConsumerArrayQueueVarHandle.java (95%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/NonBlockingQueue.java (91%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/Queues.java (97%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/SpmcArrayQueueVarHandle.java (97%) rename {internal-api/internal-api-9/src/main/java/datadog/trace/util => utils/queue-utils/src/main/java/datadog/common}/queue/SpscArrayQueueVarHandle.java (97%) rename {internal-api/internal-api-9/src/test/groovy/datadog/trace/util => utils/queue-utils/src/test/java/datadog/common}/queue/AbstractQueueTest.groovy (98%) rename {internal-api/internal-api-9/src/test/groovy/datadog/trace/util => utils/queue-utils/src/test/java/datadog/common}/queue/JctoolsWrapppersTest.groovy (98%) rename {internal-api/internal-api-9/src/test/groovy/datadog/trace/util => utils/queue-utils/src/test/java/datadog/common}/queue/MpscArrayQueueVarHandleTest.groovy (97%) rename {internal-api/internal-api-9/src/test/groovy/datadog/trace/util => utils/queue-utils/src/test/java/datadog/common}/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy (99%) rename {internal-api/internal-api-9/src/test/groovy/datadog/trace/util => utils/queue-utils/src/test/java/datadog/common}/queue/SpmcArrayQueueVarHandleTest.groovy (97%) rename {internal-api/internal-api-9/src/test/groovy/datadog/trace/util => utils/queue-utils/src/test/java/datadog/common}/queue/SpscArrayQueueVarHandleTest.groovy (96%) diff --git a/dd-java-agent/agent-llmobs/build.gradle b/dd-java-agent/agent-llmobs/build.gradle index 09a6f540f7f..95ce12c1e85 100644 --- a/dd-java-agent/agent-llmobs/build.gradle +++ b/dd-java-agent/agent-llmobs/build.gradle @@ -29,7 +29,7 @@ dependencies { implementation project(':communication') implementation project(':components:json') implementation project(':internal-api') - implementation project(':internal-api:internal-api-9') + implementation project(':utils:queue-utils') testImplementation project(':dd-java-agent:testing') diff --git a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java index 0c8b3cc5487..20ba18b5507 100644 --- a/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java +++ b/dd-java-agent/agent-llmobs/src/main/java/datadog/trace/llmobs/EvalProcessingWorker.java @@ -6,14 +6,14 @@ import com.squareup.moshi.JsonAdapter; import com.squareup.moshi.Moshi; +import datadog.common.queue.BlockingConsumerNonBlockingQueue; +import datadog.common.queue.Queues; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; import datadog.communication.ddagent.SharedCommunicationObjects; import datadog.communication.http.HttpRetryPolicy; import datadog.communication.http.OkHttpUtils; import datadog.trace.api.Config; import datadog.trace.llmobs.domain.LLMObsEval; -import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; diff --git a/dd-java-agent/build.gradle b/dd-java-agent/build.gradle index 8831b322867..2f716c944da 100644 --- a/dd-java-agent/build.gradle +++ b/dd-java-agent/build.gradle @@ -351,6 +351,9 @@ dependencies { sharedShadowInclude project(':utils:socket-utils'), { transitive = false } + sharedShadowInclude project(':utils:queue-utils'), { + transitive = false + } sharedShadowInclude project(':utils:version-utils'), { transitive = false } diff --git a/dd-trace-core/build.gradle b/dd-trace-core/build.gradle index 6f992e45ec3..212125b23c3 100644 --- a/dd-trace-core/build.gradle +++ b/dd-trace-core/build.gradle @@ -66,10 +66,10 @@ dependencies { api project(':dd-trace-api') api project(':communication') api project(':internal-api') - api project(':internal-api:internal-api-9') implementation project(':components:json') implementation project(':utils:container-utils') implementation project(':utils:socket-utils') + implementation project(':utils:queue-utils') // for span exception debugging compileOnly project(':dd-java-agent:agent-debugger:debugger-bootstrap') diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java index e043ea5dfe2..f72be366e7c 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/Aggregator.java @@ -2,9 +2,9 @@ import static java.util.concurrent.TimeUnit.MILLISECONDS; +import datadog.common.queue.NonBlockingQueue; import datadog.trace.common.metrics.SignalItem.StopSignal; import datadog.trace.core.util.LRUCache; -import datadog.trace.util.queue.NonBlockingQueue; import java.util.Iterator; import java.util.Map; import java.util.Queue; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java index 5d39995c7f4..cda4b05bb9d 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/ConflatingMetricsAggregator.java @@ -19,6 +19,8 @@ import static java.util.Collections.unmodifiableSet; import static java.util.concurrent.TimeUnit.SECONDS; +import datadog.common.queue.NonBlockingQueue; +import datadog.common.queue.Queues; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; import datadog.communication.ddagent.SharedCommunicationObjects; import datadog.trace.api.Config; @@ -33,8 +35,6 @@ import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.core.monitor.HealthMetrics; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.NonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java index b91994617d4..0143d84cc3b 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/metrics/OkHttpSink.java @@ -9,9 +9,9 @@ import static datadog.trace.common.metrics.EventListener.EventType.OK; import static java.util.concurrent.TimeUnit.SECONDS; +import datadog.common.queue.NonBlockingQueue; +import datadog.common.queue.Queues; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.NonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Collections; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java index ef8b5f479aa..36635684c3e 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/SpanSamplingWorker.java @@ -5,13 +5,13 @@ import static datadog.trace.util.AgentThreadFactory.newAgentThread; import static java.util.concurrent.TimeUnit.MILLISECONDS; +import datadog.common.queue.BlockingConsumerNonBlockingQueue; +import datadog.common.queue.NonBlockingQueue; +import datadog.common.queue.Queues; import datadog.communication.ddagent.DroppingPolicy; import datadog.trace.common.sampling.SingleSpanSampler; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; -import datadog.trace.util.queue.NonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.util.ArrayList; import java.util.List; import java.util.Queue; diff --git a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java index 77e8624a36c..42202f04e28 100644 --- a/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java +++ b/dd-trace-core/src/main/java/datadog/trace/common/writer/TraceProcessingWorker.java @@ -5,6 +5,9 @@ import static datadog.trace.util.AgentThreadFactory.newAgentThread; import static java.util.concurrent.TimeUnit.MILLISECONDS; +import datadog.common.queue.BlockingConsumerNonBlockingQueue; +import datadog.common.queue.NonBlockingQueue; +import datadog.common.queue.Queues; import datadog.communication.ddagent.DroppingPolicy; import datadog.trace.api.Config; import datadog.trace.bootstrap.instrumentation.api.SpanPostProcessor; @@ -15,9 +18,6 @@ import datadog.trace.core.CoreSpan; import datadog.trace.core.DDSpan; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; -import datadog.trace.util.queue.NonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; diff --git a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java index 9d89a3ae895..8f67c6c662a 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/PendingTraceBuffer.java @@ -6,14 +6,14 @@ import static datadog.trace.util.AgentThreadFactory.newAgentThread; import static java.util.Comparator.comparingLong; +import datadog.common.queue.BlockingConsumerNonBlockingQueue; +import datadog.common.queue.Queues; import datadog.communication.ddagent.SharedCommunicationObjects; import datadog.trace.api.Config; import datadog.trace.api.flare.TracerFlare; import datadog.trace.api.time.TimeSource; import datadog.trace.common.writer.TraceDumpJsonExporter; import datadog.trace.core.monitor.HealthMetrics; -import datadog.trace.util.queue.BlockingConsumerNonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.io.IOException; import java.util.ArrayList; import java.util.Comparator; diff --git a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java index ab27407f357..ae5026a492d 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/datastreams/DefaultDataStreamsMonitoring.java @@ -11,6 +11,8 @@ import static datadog.trace.util.AgentThreadFactory.THREAD_JOIN_TIMOUT_MS; import static datadog.trace.util.AgentThreadFactory.newAgentThread; +import datadog.common.queue.NonBlockingQueue; +import datadog.common.queue.Queues; import datadog.communication.ddagent.DDAgentFeaturesDiscovery; import datadog.communication.ddagent.SharedCommunicationObjects; import datadog.context.propagation.Propagator; @@ -28,8 +30,6 @@ import datadog.trace.core.DDSpan; import datadog.trace.core.DDTraceCoreInfo; import datadog.trace.util.AgentTaskScheduler; -import datadog.trace.util.queue.NonBlockingQueue; -import datadog.trace.util.queue.Queues; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 4c18ee280e7..cac5b601f5e 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -25,6 +25,7 @@ final class CachedData { exclude(project(':telemetry')) exclude(project(':utils:config-utils')) exclude(project(':utils:container-utils')) + exclude(project(':utils:queue-utils')) exclude(project(':utils:socket-utils')) exclude(project(':utils:time-utils')) exclude(project(':utils:version-utils')) diff --git a/internal-api/internal-api-9/build.gradle.kts b/internal-api/internal-api-9/build.gradle.kts index ebe6d0ab048..fa799f17919 100644 --- a/internal-api/internal-api-9/build.gradle.kts +++ b/internal-api/internal-api-9/build.gradle.kts @@ -39,7 +39,6 @@ val minimumInstructionCoverage by extra(0.8) dependencies { api(project(":internal-api")) - implementation(libs.jctools) // probably the Queues factory should be moved away from there testImplementation(project(":dd-java-agent:testing")) testImplementation(libs.slf4j) diff --git a/settings.gradle.kts b/settings.gradle.kts index 92aba9c108e..5720d66fba9 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -147,6 +147,7 @@ include( ":utils:config-utils", ":utils:container-utils", ":utils:flare-utils", + ":utils:queue-utils", ":utils:socket-utils", ":utils:test-agent-utils:decoder", ":utils:test-utils", diff --git a/utils/queue-utils/build.gradle.kts b/utils/queue-utils/build.gradle.kts new file mode 100644 index 00000000000..081e2cdcd47 --- /dev/null +++ b/utils/queue-utils/build.gradle.kts @@ -0,0 +1,51 @@ +import groovy.lang.Closure +import org.gradle.kotlin.dsl.extra + +plugins { + `java-library` + id("de.thetaphi.forbiddenapis") version "3.8" + id("me.champeau.jmh") + idea +} + +val minJavaVersionForTests by extra(JavaVersion.VERSION_11) + +apply(from = "$rootDir/gradle/java.gradle") + +java { + toolchain { + languageVersion = JavaLanguageVersion.of(11) + } +} + +tasks.withType().configureEach() { + javadocTool = javaToolchains.javadocToolFor(java.toolchain) +} + +fun AbstractCompile.configureCompiler(javaVersionInteger: Int, compatibilityVersion: JavaVersion? = null, unsetReleaseFlagReason: String? = null) { + (project.extra["configureCompiler"] as Closure<*>).call(this, javaVersionInteger, compatibilityVersion, unsetReleaseFlagReason) +} + +listOf(JavaCompile::class.java, GroovyCompile::class.java).forEach { compileTaskType -> + tasks.withType(compileTaskType).configureEach { + configureCompiler(11, JavaVersion.VERSION_1_8) + } +} + +dependencies { + api(project(":internal-api")) + api(libs.jctools) + + testImplementation(project(":dd-java-agent:testing")) + testImplementation(libs.slf4j) +} + +tasks.forbiddenApisMain { + failOnMissingClasses = false +} + +idea { + module { + jdkName = "11" + } +} diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCBlockingConsumerQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java similarity index 98% rename from internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCBlockingConsumerQueueBenchmark.java rename to utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java index ab71345c8cf..7e20e7a1e7d 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCBlockingConsumerQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java similarity index 98% rename from internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java rename to utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java index b7dc3b45f76..1c450669f37 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/MPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; diff --git a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java similarity index 98% rename from internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java rename to utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java index 6e829f43dc6..b0ab1b34998 100644 --- a/internal-api/internal-api-9/src/jmh/java/datadog/trace/util/queue/SPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java similarity index 95% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java rename to utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java index f9b2cbada49..5a6502abec8 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BaseQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import static datadog.trace.util.BitUtils.nextPowerOfTwo; @@ -8,7 +8,7 @@ import java.util.function.Supplier; import javax.annotation.Nonnull; -public abstract class BaseQueue extends AbstractQueue implements NonBlockingQueue { +abstract class BaseQueue extends AbstractQueue implements NonBlockingQueue { /** The capacity of the queue (must be a power of two) */ protected final int capacity; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java similarity index 88% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java rename to utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java index 264453c03fa..a6b7b336a5a 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/BlockingConsumerNonBlockingQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java similarity index 85% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java rename to utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java index a5381167ca7..3ccf4fe983f 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsMpscBlockingConsumerWrappedQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java @@ -1,11 +1,11 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import org.jctools.queues.MpscBlockingConsumerArrayQueue; -public class JctoolsMpscBlockingConsumerWrappedQueue extends JctoolsWrappedQueue +class JctoolsMpscBlockingConsumerWrappedQueue extends JctoolsWrappedQueue implements BlockingConsumerNonBlockingQueue { private final BlockingQueue blockingQueueDelegate; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java similarity index 88% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java rename to utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java index 32b456129bd..ac0c559cc00 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/JctoolsWrappedQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java @@ -1,6 +1,4 @@ -package datadog.trace.util.queue; - -import static java.lang.Math.E; +package datadog.common.queue; import java.util.AbstractQueue; import java.util.Iterator; @@ -9,7 +7,7 @@ import javax.annotation.Nonnull; import org.jctools.queues.MessagePassingQueue; -public class JctoolsWrappedQueue extends AbstractQueue implements NonBlockingQueue { +class JctoolsWrappedQueue extends AbstractQueue implements NonBlockingQueue { private final MessagePassingQueue delegate; public JctoolsWrappedQueue(@Nonnull MessagePassingQueue delegate) { diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java similarity index 98% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java rename to utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java index 3c5d6e0a3ae..3f3dfbfd39a 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles.Lookup; @@ -14,7 +14,7 @@ * * @param the type of elements stored */ -public class MpscArrayQueueVarHandle extends BaseQueue { +class MpscArrayQueueVarHandle extends BaseQueue { private static final VarHandle ARRAY_HANDLE; private static final VarHandle HEAD_HANDLE; private static final VarHandle TAIL_HANDLE; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java similarity index 95% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java rename to utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java index bfd79d39efa..a5ffe531122 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.LockSupport; @@ -9,7 +9,7 @@ * *

The wait is performed by parking/unparking the consumer thread. */ -public class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle +class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle implements BlockingConsumerNonBlockingQueue { /** Consumer thread reference for wake-up. */ private volatile Thread consumerThread; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java similarity index 91% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java rename to utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java index 489ccdb9295..939577f5961 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/NonBlockingQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.util.Queue; import java.util.function.Consumer; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java b/utils/queue-utils/src/main/java/datadog/common/queue/Queues.java similarity index 97% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java rename to utils/queue-utils/src/main/java/datadog/common/queue/Queues.java index 3ea29ae17fb..38370f0d216 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/Queues.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/Queues.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import datadog.environment.JavaVirtualMachine; import org.jctools.queues.MpscArrayQueue; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java similarity index 97% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java rename to utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java index 2f5b28fc8c9..d93e8f08e07 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpmcArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; @@ -11,7 +11,7 @@ * * @param the element type */ -public class SpmcArrayQueueVarHandle extends BaseQueue { +class SpmcArrayQueueVarHandle extends BaseQueue { private static final VarHandle HEAD_HANDLE; private static final VarHandle TAIL_HANDLE; diff --git a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java similarity index 97% rename from internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java rename to utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java index e1fe55e0300..9f772063822 100644 --- a/internal-api/internal-api-9/src/main/java/datadog/trace/util/queue/SpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java @@ -1,4 +1,4 @@ -package datadog.trace.util.queue; +package datadog.common.queue; import java.lang.invoke.MethodHandles; import java.lang.invoke.VarHandle; @@ -10,7 +10,7 @@ * * @param the type of elements held in this queue */ -public class SpscArrayQueueVarHandle extends BaseQueue { +class SpscArrayQueueVarHandle extends BaseQueue { /** Backing array storing elements. */ private final Object[] buffer; diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy b/utils/queue-utils/src/test/java/datadog/common/queue/AbstractQueueTest.groovy similarity index 98% rename from internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy rename to utils/queue-utils/src/test/java/datadog/common/queue/AbstractQueueTest.groovy index 6ba053c9a6f..139f992ce86 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/AbstractQueueTest.groovy +++ b/utils/queue-utils/src/test/java/datadog/common/queue/AbstractQueueTest.groovy @@ -1,4 +1,4 @@ -package datadog.trace.util.queue +package datadog.common.queue import datadog.trace.test.util.DDSpecification diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy b/utils/queue-utils/src/test/java/datadog/common/queue/JctoolsWrapppersTest.groovy similarity index 98% rename from internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy rename to utils/queue-utils/src/test/java/datadog/common/queue/JctoolsWrapppersTest.groovy index f6a82b911e0..e763004c6e8 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/JctoolsWrapppersTest.groovy +++ b/utils/queue-utils/src/test/java/datadog/common/queue/JctoolsWrapppersTest.groovy @@ -1,4 +1,4 @@ -package datadog.trace.util.queue +package datadog.common.queue import datadog.trace.test.util.DDSpecification import java.util.concurrent.TimeUnit diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy b/utils/queue-utils/src/test/java/datadog/common/queue/MpscArrayQueueVarHandleTest.groovy similarity index 97% rename from internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy rename to utils/queue-utils/src/test/java/datadog/common/queue/MpscArrayQueueVarHandleTest.groovy index 7f8bbcc1d0b..e8484492f28 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscArrayQueueVarHandleTest.groovy +++ b/utils/queue-utils/src/test/java/datadog/common/queue/MpscArrayQueueVarHandleTest.groovy @@ -1,4 +1,4 @@ -package datadog.trace.util.queue +package datadog.common.queue import java.util.concurrent.CountDownLatch diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy b/utils/queue-utils/src/test/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy similarity index 99% rename from internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy rename to utils/queue-utils/src/test/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy index 8a14534d8e3..9404a850da9 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy +++ b/utils/queue-utils/src/test/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandleTest.groovy @@ -1,4 +1,4 @@ -package datadog.trace.util.queue +package datadog.common.queue import static java.util.concurrent.TimeUnit.NANOSECONDS diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy b/utils/queue-utils/src/test/java/datadog/common/queue/SpmcArrayQueueVarHandleTest.groovy similarity index 97% rename from internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy rename to utils/queue-utils/src/test/java/datadog/common/queue/SpmcArrayQueueVarHandleTest.groovy index 9038ab058a1..996d9c42d95 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpmcArrayQueueVarHandleTest.groovy +++ b/utils/queue-utils/src/test/java/datadog/common/queue/SpmcArrayQueueVarHandleTest.groovy @@ -1,4 +1,4 @@ -package datadog.trace.util.queue +package datadog.common.queue import java.util.concurrent.CountDownLatch diff --git a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpscArrayQueueVarHandleTest.groovy b/utils/queue-utils/src/test/java/datadog/common/queue/SpscArrayQueueVarHandleTest.groovy similarity index 96% rename from internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpscArrayQueueVarHandleTest.groovy rename to utils/queue-utils/src/test/java/datadog/common/queue/SpscArrayQueueVarHandleTest.groovy index e1a04dc397b..2bccbe7cbbe 100644 --- a/internal-api/internal-api-9/src/test/groovy/datadog/trace/util/queue/SpscArrayQueueVarHandleTest.groovy +++ b/utils/queue-utils/src/test/java/datadog/common/queue/SpscArrayQueueVarHandleTest.groovy @@ -1,4 +1,4 @@ -package datadog.trace.util.queue +package datadog.common.queue import java.util.concurrent.atomic.AtomicInteger From e2b7276c6ca286d38ad579e7fd2b0d878d398672 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Fri, 7 Nov 2025 17:05:35 +0100 Subject: [PATCH 13/18] Revert "Use getAndAdd for the offer fast path" This reverts commit 14cc59757eb00dda40ec97b56c282ebd77b70b4e. --- .../common/queue/MpscArrayQueueVarHandle.java | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java index 3f3dfbfd39a..1ad6d079405 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java @@ -82,7 +82,9 @@ public MpscArrayQueueVarHandle(int requestedCapacity) { public boolean offer(E e) { Objects.requireNonNull(e); + // jctools does the same local copy to have the jitter optimise the accesses final Object[] localBuffer = this.buffer; + long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); long cachedHead = 0L; // Local cache of head to reduce volatile reads @@ -92,8 +94,9 @@ public boolean offer(E e) { while (true) { long currentTail = (long) TAIL_HANDLE.getVolatile(this); - // Slow path: refresh producer limit when queue is near full + // Check if producer limit exceeded if (currentTail >= localProducerLimit) { + // Refresh head only when necessary cachedHead = (long) HEAD_HANDLE.getVolatile(this); localProducerLimit = cachedHead + capacity; @@ -101,36 +104,23 @@ public boolean offer(E e) { return false; // queue full } + // Update producerLimit so other producers also benefit PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); } - long freeSlots = localProducerLimit - currentTail; - - // Fast path: getAndAdd if occupancy < 75% - if (freeSlots > (long) (capacity * 0.25)) { // more than 25% free - long slot = (long) TAIL_HANDLE.getAndAdd(this, 1L); - final int index = (int) (slot & mask); - - // Release-store ensures visibility to consumer - ARRAY_HANDLE.setRelease(localBuffer, index, e); - return true; - } - - // Slow path: CAS near limit + // Attempt to claim a slot if (TAIL_HANDLE.compareAndSet(this, currentTail, currentTail + 1)) { final int index = (int) (currentTail & mask); - // Release-store ensures visibility to consumer + // Release-store ensures producer's write is visible to consumer ARRAY_HANDLE.setRelease(localBuffer, index, e); return true; } // Backoff to reduce contention if ((spinCycles & 1) == 0) { - // spin each even cycles Thread.onSpinWait(); } else { - // use a 'random' alternate backoff on odd cycles if (parkOnSpin) { LockSupport.parkNanos(1); } else { From 152f53f1e911a97790ce490319736cfad5389f16 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Fri, 7 Nov 2025 18:32:50 +0100 Subject: [PATCH 14/18] let it inline --- .../MPSCBlockingConsumerQueueBenchmark.java | 15 +- .../common/queue/MPSCQueueBenchmark.java | 33 +-- ...scBlockingConsumerArrayQueueVarHandle.java | 250 +++++++++++++++--- 3 files changed, 224 insertions(+), 74 deletions(-) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java index 7e20e7a1e7d..f778f61c478 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java @@ -47,31 +47,20 @@ public static class QueueState { @Setup(Level.Iteration) public void setup() { queue = new MpscBlockingConsumerArrayQueueVarHandle<>(capacity); - consumerReady = new CountDownLatch(1); } } @Benchmark @Group("queueTest") @GroupThreads(4) - public void produce(QueueState state) { - try { - state.consumerReady.await(); // wait until consumer is ready - } catch (InterruptedException ignored) { - } - - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } + public void produce(QueueState state, Blackhole bh) { + bh.consume(state.queue.offer(1)); } @Benchmark @Group("queueTest") @GroupThreads(1) public void consume(QueueState state, Blackhole bh) { - state.consumerReady.countDown(); // signal producers can start Integer v = state.queue.poll(); if (v != null) { bh.consume(v); diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java index 1c450669f37..44b3a58421b 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java @@ -1,6 +1,5 @@ package datadog.common.queue; -import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -20,14 +19,14 @@ /* Benchmark (capacity) Mode Cnt Score Error Units -MPSCQueueBenchmark.queueTest 1024 thrpt 146.530 ops/us -MPSCQueueBenchmark.queueTest:async 1024 thrpt NaN --- -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 108.357 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 38.172 ops/us -MPSCQueueBenchmark.queueTest 65536 thrpt 179.177 ops/us -MPSCQueueBenchmark.queueTest:async 65536 thrpt NaN --- -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 140.968 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 38.209 ops/us */ +MPSCQueueBenchmark.queueTest 1024 thrpt 272.751 ops/us +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 258.737 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 14.013 ops/us +MPSCQueueBenchmark.queueTest:·async 1024 thrpt NaN --- +MPSCQueueBenchmark.queueTest 65536 thrpt 120.776 ops/us +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 108.595 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 12.182 ops/us + */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) @Measurement(iterations = 1, time = 30) @@ -38,7 +37,6 @@ public class MPSCQueueBenchmark { @State(Scope.Group) public static class QueueState { MpscArrayQueueVarHandle queue; - CountDownLatch consumerReady; @Param({"1024", "65536"}) int capacity; @@ -46,31 +44,20 @@ public static class QueueState { @Setup(Level.Iteration) public void setup() { queue = new MpscArrayQueueVarHandle<>(capacity); - consumerReady = new CountDownLatch(1); } } @Benchmark @Group("queueTest") @GroupThreads(4) - public void produce(QueueState state) { - try { - state.consumerReady.await(); // wait until consumer is ready - } catch (InterruptedException ignored) { - } - - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } + public void produce(QueueState state, Blackhole blackhole) { + blackhole.consume(state.queue.offer(0)); } @Benchmark @Group("queueTest") @GroupThreads(1) public void consume(QueueState state, Blackhole bh) { - state.consumerReady.countDown(); // signal producers can start Integer v = state.queue.poll(); if (v != null) { bh.consume(v); diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java index a5ffe531122..9fb83cbed01 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -1,52 +1,182 @@ package datadog.common.queue; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.VarHandle; +import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.LockSupport; import javax.annotation.Nonnull; /** - * A MPSC Array queue offering blocking methods (take and timed poll) for a single consumer. + * A Multiple-Producer, Single-Consumer (MPSC) bounded lock-free queue using a circular array and + * VarHandles. It adds blocking capabilities for a single consumer (take, timed offer). * - *

The wait is performed by parking/unparking the consumer thread. + *

All operations are wait-free for the consumer and lock-free for producers. + * + * @param the type of elements stored */ -class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle +class MpscBlockingConsumerArrayQueueVarHandle extends BaseQueue implements BlockingConsumerNonBlockingQueue { - /** Consumer thread reference for wake-up. */ + private static final VarHandle ARRAY_HANDLE; + private static final VarHandle HEAD_HANDLE; + private static final VarHandle TAIL_HANDLE; + private static final VarHandle PRODUCER_LIMIT_HANDLE; + private static final VarHandle CONSUMER_THREAD_HANDLE; + + static { + try { + final Lookup lookup = MethodHandles.lookup(); + TAIL_HANDLE = + lookup.findVarHandle(MpscBlockingConsumerArrayQueueVarHandle.class, "tail", long.class); + HEAD_HANDLE = + lookup.findVarHandle(MpscBlockingConsumerArrayQueueVarHandle.class, "head", long.class); + ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); + PRODUCER_LIMIT_HANDLE = + lookup.findVarHandle( + MpscBlockingConsumerArrayQueueVarHandle.class, "producerLimit", long.class); + CONSUMER_THREAD_HANDLE = + lookup.findVarHandle( + MpscBlockingConsumerArrayQueueVarHandle.class, "consumerThread", Thread.class); + } catch (Throwable t) { + throw new IllegalStateException(t); + } + } + + /** The backing array (plain Java array for VarHandle access) */ + private final Object[] buffer; + + // Padding to prevent false sharing + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** Next free slot for producers (multi-threaded) */ + private volatile long tail = 0L; + + // Padding around tail + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + + /** Cached producer limit to reduce volatile head reads */ + private volatile long producerLimit = 0L; + + // Padding around producerLimit + @SuppressWarnings("unused") + private long r0, r1, r2, r3, r4, r5, r6; + + /** Next slot to consume (single-threaded) */ + private volatile long head = 0L; + + // Padding around head + @SuppressWarnings("unused") + private long s0, s1, s2, s3, s4, s5, s6; + + /** Reference to the waiting consumer thread (set atomically). */ private volatile Thread consumerThread; - public MpscBlockingConsumerArrayQueueVarHandle(int capacity) { - super(capacity); + /** + * Creates a new MPSC queue. + * + * @param requestedCapacity the desired capacity, rounded up to next power of two + */ + public MpscBlockingConsumerArrayQueueVarHandle(int requestedCapacity) { + super(requestedCapacity); + this.buffer = new Object[capacity]; + this.producerLimit = capacity; } + /** + * Attempts to add an element to the queue. + * + * @param e the element to add (must be non-null) + * @return true if element was enqueued, false if queue is full + */ @Override public boolean offer(E e) { - final boolean success = super.offer(e); - if (success) { - try { - final Thread c = consumerThread; - LockSupport.unpark(c); // unpark is safe if the arg is null - } finally { - consumerThread = null; + Objects.requireNonNull(e); + + // jctools does the same local copy to have the jitter optimise the accesses + final Object[] localBuffer = this.buffer; + + long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); + long cachedHead = 0L; // Local cache of head to reduce volatile reads + + int spinCycles = 0; + boolean parkOnSpin = (Thread.currentThread().getId() & 1) == 0; + + while (true) { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + + // Check if producer limit exceeded + if (currentTail >= localProducerLimit) { + // Refresh head only when necessary + cachedHead = (long) HEAD_HANDLE.getVolatile(this); + localProducerLimit = cachedHead + capacity; + + if (currentTail >= localProducerLimit) { + return false; // queue full + } + + // Update producerLimit so other producers also benefit + PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); + } + + // Attempt to claim a slot + if (TAIL_HANDLE.compareAndSet(this, currentTail, currentTail + 1)) { + final int index = (int) (currentTail & mask); + + // Release-store ensures producer's write is visible to consumer + ARRAY_HANDLE.setRelease(localBuffer, index, e); + + // Atomically clear and unpark the consumer if waiting + Thread c = (Thread) CONSUMER_THREAD_HANDLE.getAndSet(this, null); + if (c != null) { + LockSupport.unpark(c); + } + + return true; } - } - return success; + // Backoff to reduce contention + if ((spinCycles & 1) == 0) { + Thread.onSpinWait(); + } else { + if (parkOnSpin) { + LockSupport.parkNanos(1); + } else { + Thread.yield(); + } + } + spinCycles++; + } } /** - * Retrieves and removes the head element, waiting if necessary until one becomes available. + * Removes and returns the next element, or null if empty. * - * @return the next element (never null) - * @throws InterruptedException if interrupted while waiting + * @return dequeued element, or null if queue empty */ @Override - public E take() throws InterruptedException { - consumerThread = Thread.currentThread(); - E e; - while ((e = poll()) == null) { - parkUntilNext(-1); + @SuppressWarnings("unchecked") + public E poll() { + final Object[] localBuffer = this.buffer; + + long currentHead = (long) HEAD_HANDLE.getOpaque(this); + final int index = (int) (currentHead & mask); + + // Acquire-load ensures visibility of producer write + Object value = ARRAY_HANDLE.getAcquire(localBuffer, index); + if (value == null) { + return null; } - return e; + + // Clear the slot without additional fence + ARRAY_HANDLE.setOpaque(localBuffer, index, null); + + // Advance head using opaque write (consumer-only) + HEAD_HANDLE.setOpaque(this, currentHead + 1); + + return (E) value; } /** @@ -74,6 +204,50 @@ public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException return poll(); } + /** + * Retrieves and removes the head element, waiting if necessary until one becomes available. + * + * @return the next element (never null) + * @throws InterruptedException if interrupted while waiting + */ + @Override + public E take() throws InterruptedException { + consumerThread = Thread.currentThread(); + E e; + while ((e = poll()) == null) { + parkUntilNext(-1); + } + return e; + } + + /** + * Returns next element without removing it. + * + *

The memory visibility is only correct if the consumer calls it. + * + * @return next element or null if empty + */ + @Override + @SuppressWarnings("unchecked") + public E peek() { + final int index = (int) ((long) HEAD_HANDLE.getOpaque(this) & mask); + return (E) ARRAY_HANDLE.getVolatile(buffer, index); + } + + /** + * Returns number of elements in queue. + * + *

Volatile reads of tail and head ensure accurate result in multi-threaded context. + * + * @return current size + */ + @Override + public int size() { + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + return (int) (currentTail - currentHead); + } + /** * Blocks (parks) until an element becomes available or until the specified timeout elapses. * @@ -85,20 +259,20 @@ public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException * @throws InterruptedException if interrupted */ private void parkUntilNext(long nanos) throws InterruptedException { - try { - // register this thread as the waiting consumer - consumerThread = Thread.currentThread(); - if (nanos <= 0) { - LockSupport.park(this); - } else { - LockSupport.parkNanos(this, nanos); - } - if (Thread.interrupted()) { - throw new InterruptedException(); - } - } finally { - // free the variable not to reference the consumer thread anymore - consumerThread = null; + Thread current = Thread.currentThread(); + // Publish the consumer thread (no ordering required) + CONSUMER_THREAD_HANDLE.setOpaque(this, current); + if (nanos <= 0) { + LockSupport.park(this); + } else { + LockSupport.parkNanos(this, nanos); } + + if (Thread.interrupted()) { + throw new InterruptedException(); + } + + // Cleanup (no fence needed, single consumer) + CONSUMER_THREAD_HANDLE.setOpaque(this, null); } } From dc0acd83cb1376a7be4390fcc02b57ce1f8298d2 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Mon, 10 Nov 2025 09:01:43 +0100 Subject: [PATCH 15/18] Refine documentation --- .../java/datadog/common/queue/BaseQueue.java | 44 +++------------ .../BlockingConsumerNonBlockingQueue.java | 37 +++++++++++++ ...toolsMpscBlockingConsumerWrappedQueue.java | 9 ++++ .../common/queue/JctoolsWrappedQueue.java | 9 ++++ .../common/queue/MpscArrayQueueVarHandle.java | 25 --------- ...scBlockingConsumerArrayQueueVarHandle.java | 41 +------------- .../common/queue/NonBlockingQueue.java | 54 +++++++++++++++++++ .../java/datadog/common/queue/Queues.java | 46 ++++++++++++++++ .../common/queue/SpmcArrayQueueVarHandle.java | 23 -------- .../common/queue/SpscArrayQueueVarHandle.java | 15 ------ 10 files changed, 162 insertions(+), 141 deletions(-) diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java index 5a6502abec8..2ee78253340 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java @@ -8,6 +8,11 @@ import java.util.function.Supplier; import javax.annotation.Nonnull; +/** + * Base class for non-blocking queuing operations. + * + * @param the type of elements held by this queue + */ abstract class BaseQueue extends AbstractQueue implements NonBlockingQueue { /** The capacity of the queue (must be a power of two) */ protected final int capacity; @@ -20,32 +25,13 @@ public BaseQueue(int capacity) { this.mask = this.capacity - 1; } - /** - * Drains all available elements from the queue to a consumer. - * - *

This is efficient since it avoids repeated size() checks and returns immediately when empty. - * - * @param consumer a consumer to accept elements - * @return number of elements drained - */ @Override public int drain(Consumer consumer) { return drain(consumer, Integer.MAX_VALUE); } - /** - * Drains up to {@code limit} elements from the queue to a consumer. - * - *

This method is useful for batch processing. - * - *

Each element is removed atomically using poll() and passed to the consumer. - * - * @param consumer a consumer to accept elements - * @param limit maximum number of elements to drain - * @return number of elements drained - */ @Override - public int drain(Consumer consumer, int limit) { + public int drain(@Nonnull Consumer consumer, int limit) { int count = 0; E e; while (count < limit && (e = poll()) != null) { @@ -55,14 +41,6 @@ public int drain(Consumer consumer, int limit) { return count; } - /** - * Fills the queue with elements provided by the supplier until either: - the queue is full, or - - * the supplier runs out of elements (returns null) - * - * @param supplier a supplier of elements - * @param limit maximum number of elements to attempt to insert - * @return number of elements successfully enqueued - */ @Override public int fill(@Nonnull Supplier supplier, int limit) { if (limit <= 0) { @@ -95,21 +73,11 @@ public Iterator iterator() { throw new UnsupportedOperationException(); } - /** - * Returns the remaining capacity. - * - * @return number of additional elements this queue can accept - */ @Override public int remainingCapacity() { return capacity - size(); } - /** - * Returns the maximum queue capacity. - * - * @return number of total elements this queue can accept - */ @Override public int capacity() { return capacity; diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java index a6b7b336a5a..159483073bd 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/BlockingConsumerNonBlockingQueue.java @@ -3,8 +3,45 @@ import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; +/** + * A hybrid queue interface combining non-blocking producer semantics with blocking consumer + * operations. + * + *

This interface extends {@link NonBlockingQueue} and adds methods that allow consumers to block + * while waiting for elements to become available. It is intended for use in scenarios with: + * + *

    + *
  • Multiple or single producers enqueue elements using non-blocking operations (e.g., + * {@link #offer(Object)}). + *
  • A single consumer that may block until elements are ready (i.e., using {@link + * #take()} or {@link #poll(long, TimeUnit)}). + *
+ * + * @param the type of elements held in this queue + */ public interface BlockingConsumerNonBlockingQueue extends NonBlockingQueue { + + /** + * Retrieves and removes the head of this queue, waiting up to the specified wait time if + * necessary for an element to become available. + * + * @param timeout how long to wait before giving up, in units of {@code unit} + * @param unit the time unit of the {@code timeout} argument; must not be {@code null} + * @return the head of this queue, or {@code null} if the specified waiting time elapses before an + * element becomes available + * @throws InterruptedException if interrupted while waiting + */ E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException; + /** + * Retrieves and removes the head of this queue, waiting if necessary until an element becomes + * available. + * + *

This operation blocks the consumer thread if the queue is empty, while producers continue to + * operate in a non-blocking manner. + * + * @return the head of this queue + * @throws InterruptedException if interrupted while waiting + */ E take() throws InterruptedException; } diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java index 3ccf4fe983f..ce4e2b79cc0 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsMpscBlockingConsumerWrappedQueue.java @@ -5,6 +5,15 @@ import javax.annotation.Nonnull; import org.jctools.queues.MpscBlockingConsumerArrayQueue; +/** + * A {@link BlockingConsumerNonBlockingQueue} implementation that wraps a JCTools {@link + * MpscBlockingConsumerArrayQueue}. + * + *

All operations delegate directly to the underlying JCTools queue to preserve performance and + * memory semantics. + * + * @param the type of elements held in this queue + */ class JctoolsMpscBlockingConsumerWrappedQueue extends JctoolsWrappedQueue implements BlockingConsumerNonBlockingQueue { diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java index ac0c559cc00..5e72a4a7331 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/JctoolsWrappedQueue.java @@ -7,6 +7,15 @@ import javax.annotation.Nonnull; import org.jctools.queues.MessagePassingQueue; +/** + * A {@link NonBlockingQueue} implementation that wraps a {@link MessagePassingQueue} from the + * JCTools library to provide a consistent, framework-independent interface. + * + *

This adapter bridges JCTools’ queue APIs with the {@link NonBlockingQueue} abstraction used by + * this library. All operations are directly delegated to the underlying {@code MessagePassingQueue} + * + * @param the type of elements held in this queue + */ class JctoolsWrappedQueue extends AbstractQueue implements NonBlockingQueue { private final MessagePassingQueue delegate; diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java index 1ad6d079405..10fda10cb11 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java @@ -72,12 +72,6 @@ public MpscArrayQueueVarHandle(int requestedCapacity) { this.producerLimit = capacity; } - /** - * Attempts to add an element to the queue. - * - * @param e the element to add (must be non-null) - * @return true if element was enqueued, false if queue is full - */ @Override public boolean offer(E e) { Objects.requireNonNull(e); @@ -131,11 +125,6 @@ public boolean offer(E e) { } } - /** - * Removes and returns the next element, or null if empty. - * - * @return dequeued element, or null if queue empty - */ @Override @SuppressWarnings("unchecked") public E poll() { @@ -159,13 +148,6 @@ public E poll() { return (E) value; } - /** - * Returns next element without removing it. - * - *

The memory visibility is only correct if the consumer calls it. - * - * @return next element or null if empty - */ @Override @SuppressWarnings("unchecked") public E peek() { @@ -173,13 +155,6 @@ public E peek() { return (E) ARRAY_HANDLE.getVolatile(buffer, index); } - /** - * Returns number of elements in queue. - * - *

Volatile reads of tail and head ensure accurate result in multi-threaded context. - * - * @return current size - */ @Override public int size() { long currentHead = (long) HEAD_HANDLE.getVolatile(this); diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java index 9fb83cbed01..76fb95adfa7 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -85,12 +85,6 @@ public MpscBlockingConsumerArrayQueueVarHandle(int requestedCapacity) { this.producerLimit = capacity; } - /** - * Attempts to add an element to the queue. - * - * @param e the element to add (must be non-null) - * @return true if element was enqueued, false if queue is full - */ @Override public boolean offer(E e) { Objects.requireNonNull(e); @@ -151,11 +145,6 @@ public boolean offer(E e) { } } - /** - * Removes and returns the next element, or null if empty. - * - * @return dequeued element, or null if queue empty - */ @Override @SuppressWarnings("unchecked") public E poll() { @@ -179,14 +168,6 @@ public E poll() { return (E) value; } - /** - * Polls with a timeout. - * - * @param timeout max wait time - * @param unit time unit - * @return the head element, or null if timed out - * @throws InterruptedException if interrupted - */ @Override public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { E e = poll(); @@ -204,12 +185,6 @@ public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException return poll(); } - /** - * Retrieves and removes the head element, waiting if necessary until one becomes available. - * - * @return the next element (never null) - * @throws InterruptedException if interrupted while waiting - */ @Override public E take() throws InterruptedException { consumerThread = Thread.currentThread(); @@ -220,13 +195,6 @@ public E take() throws InterruptedException { return e; } - /** - * Returns next element without removing it. - * - *

The memory visibility is only correct if the consumer calls it. - * - * @return next element or null if empty - */ @Override @SuppressWarnings("unchecked") public E peek() { @@ -234,13 +202,6 @@ public E peek() { return (E) ARRAY_HANDLE.getVolatile(buffer, index); } - /** - * Returns number of elements in queue. - * - *

Volatile reads of tail and head ensure accurate result in multi-threaded context. - * - * @return current size - */ @Override public int size() { long currentHead = (long) HEAD_HANDLE.getVolatile(this); @@ -256,7 +217,7 @@ public int size() { * * @param nanos max wait time in nanoseconds. If negative, it will park indefinably until waken or * interrupted - * @throws InterruptedException if interrupted + * @throws InterruptedException if interrupted while waiting */ private void parkUntilNext(long nanos) throws InterruptedException { Thread current = Thread.currentThread(); diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java index 939577f5961..52a50b3e6d6 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/NonBlockingQueue.java @@ -5,14 +5,68 @@ import java.util.function.Supplier; import javax.annotation.Nonnull; +/** + * A non-blocking, concurrent queue supporting high-performance operations for producer-consumer + * scenarios. This interface extends {@link Queue} and adds specialized methods for bulk draining + * and filling, as well as querying the queue’s fixed capacity. + * + *

Unlike typical {@link java.util.concurrent.BlockingQueue} implementations, this interface does + * not provide blocking operations. Instead, producers and consumers are expected to retry or yield + * when the queue is full or empty, respectively. + * + *

Implementations are typically array-backed and rely on non-blocking atomic operations (such as + * VarHandles or Unsafe-based CAS) to achieve concurrent performance without locks. + * + * @param the type of elements held in this queue + * @see java.util.Queue + * @see java.util.concurrent.ConcurrentLinkedQueue + */ public interface NonBlockingQueue extends Queue { + + /** + * Drains all available elements from this queue, passing each to the given {@link Consumer}. + * + *

This method will consume as many elements as are currently available, up to the queue’s size + * at the time of the call. + * + * @param consumer the consumer that will process each element; must not be {@code null} + * @return the number of elements drained + * @throws NullPointerException if {@code consumer} is {@code null} + */ int drain(Consumer consumer); + /** + * Drains up to the specified number of elements from this queue, passing each to the given {@link + * Consumer}. + * + * @param consumer the consumer that will process each element; must not be {@code null} + * @param limit the maximum number of elements to drain + * @return the actual number of elements drained (maybe less than {@code limit}) + */ int drain(Consumer consumer, int limit); + /** + * Fills the queue with elements supplied by the given {@link Supplier}, up to the specified limit + * or until the queue becomes full. + * + * @param supplier the supplier that provides elements to insert; must not be {@code null} + * @param limit the maximum number of elements to insert + * @return the number of elements successfully added (maybe less than {@code limit}) + */ int fill(@Nonnull Supplier supplier, int limit); + /** + * Returns the number of additional elements that can be inserted into this queue without + * exceeding its capacity. + * + * @return the number of remaining slots available for insertion + */ int remainingCapacity(); + /** + * Returns the total fixed capacity of this queue. + * + * @return the maximum number of elements this queue can hold + */ int capacity(); } diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/Queues.java b/utils/queue-utils/src/main/java/datadog/common/queue/Queues.java index 38370f0d216..997a7159c95 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/Queues.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/Queues.java @@ -6,11 +6,33 @@ import org.jctools.queues.SpmcArrayQueue; import org.jctools.queues.SpscArrayQueue; +/** + * A utility class for creating various high-performance queue implementations used for inter-thread + * communication. This class provides factory methods for creating non-blocking and + * partially-blocking queues optimized for different producer-consumer configurations. + * + *

Depending on the Java runtime version, this class will choose the most efficient + * implementation available: + * + *

    + *
  • For Java 9 and above, {@code VarHandle}-based queue implementations are used for improved + * performance and without relying on {@code sun.misc.Unsafe}. + *
  • For Java 8, {@code JCTools}-based wrappers are used instead. + *
+ */ public final class Queues { + private static final boolean CAN_USE_VARHANDLES = JavaVirtualMachine.isJavaVersionAtLeast(9); private Queues() {} + /** + * Creates a Multiple Producer, Single Consumer (MPSC) array-backed queue. + * + * @param requestedCapacity the requested capacity of the queue. Will be rounded to the next power + * of two. + * @return a new {@link NonBlockingQueue} instance suitable for MPSC usage + */ public static NonBlockingQueue mpscArrayQueue(int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new MpscArrayQueueVarHandle<>(requestedCapacity); @@ -18,6 +40,14 @@ public static NonBlockingQueue mpscArrayQueue(int requestedCapacity) { return new JctoolsWrappedQueue<>(new MpscArrayQueue<>(requestedCapacity)); } + /** + * Creates a Single Producer, Multiple Consumer (SPMC) array-backed queue. + * + *

\ * @param requestedCapacity the requested capacity of the queue. Will be rounded to the + * next power of two. + * + * @return a new {@link NonBlockingQueue} instance suitable for SPMC usage + */ public static NonBlockingQueue spmcArrayQueue(int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new SpmcArrayQueueVarHandle<>(requestedCapacity); @@ -25,6 +55,15 @@ public static NonBlockingQueue spmcArrayQueue(int requestedCapacity) { return new JctoolsWrappedQueue<>(new SpmcArrayQueue<>(requestedCapacity)); } + /** + * Creates a Multiple Producer, Single Consumer (MPSC) array-backed queue that allows blocking + * behavior for the consumer. + * + * @param requestedCapacity the requested capacity of the queue. Will be rounded to the next power + * of two. + * @return a new {@link BlockingConsumerNonBlockingQueue} instance suitable for MPSC usage with + * blocking consumption + */ public static BlockingConsumerNonBlockingQueue mpscBlockingConsumerArrayQueue( int requestedCapacity) { if (CAN_USE_VARHANDLES) { @@ -34,6 +73,13 @@ public static BlockingConsumerNonBlockingQueue mpscBlockingConsumerArrayQ new MpscBlockingConsumerArrayQueue<>(requestedCapacity)); } + /** + * Creates a Single Producer, Single Consumer (SPSC) array-backed queue. + * + * @param requestedCapacity the requested capacity of the queue. Will be rounded to the next power + * of two. + * @return a new {@link NonBlockingQueue} instance suitable for SPSC usage + */ public static NonBlockingQueue spscArrayQueue(int requestedCapacity) { if (CAN_USE_VARHANDLES) { return new SpscArrayQueueVarHandle<>(requestedCapacity); diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java index d93e8f08e07..6358cbe6f91 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java @@ -62,14 +62,6 @@ public SpmcArrayQueueVarHandle(int requestedCapacity) { this.buffer = new Object[capacity]; } - /** - * Adds an element to the queue. - * - *

Single-producer: no CAS needed. Uses a release-store to ensure consumers see the write. - * - * @param e element to enqueue (must be non-null) - * @return true if element was added, false if queue is full - */ @Override public boolean offer(E e) { Objects.requireNonNull(e); @@ -92,11 +84,6 @@ public boolean offer(E e) { return true; } - /** - * Removes and returns the next element, or null if empty. - * - * @return dequeued element, or null if queue is empty - */ @Override @SuppressWarnings("unchecked") public E poll() { @@ -132,11 +119,6 @@ public E poll() { } } - /** - * Returns the next element without removing it. - * - * @return next element or null if queue empty - */ @Override @SuppressWarnings("unchecked") public E peek() { @@ -150,11 +132,6 @@ public E peek() { return (E) ARRAY_HANDLE.getAcquire(localBuffer, index); // acquire-load ensures visibility } - /** - * Returns the approximate number of elements in the queue. - * - * @return current queue size - */ @Override public int size() { long currentTail = (long) TAIL_HANDLE.getVolatile(this); diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java index 9f772063822..9d8f6bf8fa7 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java @@ -60,13 +60,6 @@ public SpscArrayQueueVarHandle(int requestedCapacity) { this.buffer = new Object[capacity]; } - /** - * Enqueues an element if space is available. - * - * @param e the element to enqueue - * @return {@code true} if enqueued, {@code false} if the queue is full - * @throws NullPointerException if {@code e} is null - */ @Override public boolean offer(E e) { Objects.requireNonNull(e); @@ -87,14 +80,6 @@ public boolean offer(E e) { return true; } - /** - * Dequeues and returns the next element, or {@code null} if the queue is empty. - * - *

Since only one consumer exists, this method is race-free and does not need CAS. It uses - * acquire semantics to ensure the element is fully visible. - * - * @return the dequeued element, or {@code null} if empty - */ @Override @SuppressWarnings("unchecked") public E poll() { From 0a7258797dbac9e05a3c9f9e57854da497dd62a3 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Mon, 10 Nov 2025 13:40:56 +0100 Subject: [PATCH 16/18] Final updates --- ...olsMPSCBlockingConsumerQueueBenchmark.java | 70 ++++++++++++++ .../queue/JctoolsMPSCQueueBenchmark.java | 66 +++++++++++++ .../queue/JctoolsSPSCQueueBenchmark.java | 66 +++++++++++++ .../MPSCBlockingConsumerQueueBenchmark.java | 14 ++- .../common/queue/MPSCQueueBenchmark.java | 13 ++- .../common/queue/SPSCQueueBenchmark.java | 23 ++--- .../java/datadog/common/queue/BaseQueue.java | 52 ++++++++++- .../common/queue/MpscArrayQueueVarHandle.java | 58 ++---------- ...scBlockingConsumerArrayQueueVarHandle.java | 89 ++---------------- .../common/queue/SpmcArrayQueueVarHandle.java | 93 +++++++------------ .../common/queue/SpscArrayQueueVarHandle.java | 50 +--------- 11 files changed, 324 insertions(+), 270 deletions(-) create mode 100644 utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java create mode 100644 utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java create mode 100644 utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java new file mode 100644 index 00000000000..1007ef9b03a --- /dev/null +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java @@ -0,0 +1,70 @@ +package datadog.common.queue; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.jctools.queues.MpscBlockingConsumerArrayQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 121.534 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:async 1024 thrpt NaN --- +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 110.962 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 10.572 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 126.856 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:async 65536 thrpt NaN --- +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 113.213 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 13.644 ops/us +*/ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class JctoolsMPSCBlockingConsumerQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + MpscBlockingConsumerArrayQueue queue; + CountDownLatch consumerReady; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new MpscBlockingConsumerArrayQueue<>(capacity); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(4) + public void produce(QueueState state, Blackhole bh) { + bh.consume(state.queue.offer(1)); + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java new file mode 100644 index 00000000000..e1c1e1cfa0b --- /dev/null +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java @@ -0,0 +1,66 @@ +package datadog.common.queue; + +import java.util.concurrent.TimeUnit; +import org.jctools.queues.MpscArrayQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +JctoolsMPSCQueueBenchmark.queueTest 1024 thrpt 29.444 ops/us +JctoolsMPSCQueueBenchmark.queueTest:consume 1024 thrpt 21.230 ops/us +JctoolsMPSCQueueBenchmark.queueTest:produce 1024 thrpt 8.214 ops/us +JctoolsMPSCQueueBenchmark.queueTest 65536 thrpt 30.218 ops/us +JctoolsMPSCQueueBenchmark.queueTest:consume 65536 thrpt 22.846 ops/us +JctoolsMPSCQueueBenchmark.queueTest:produce 65536 thrpt 7.372 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 1, time = 30) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class JctoolsMPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + MpscArrayQueue queue; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new MpscArrayQueue<>(capacity); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(4) + public void produce(QueueState state, Blackhole blackhole) { + blackhole.consume(state.queue.offer(0)); + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java new file mode 100644 index 00000000000..7219cb12f73 --- /dev/null +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java @@ -0,0 +1,66 @@ +package datadog.common.queue; + +import java.util.concurrent.TimeUnit; +import org.jctools.queues.SpscArrayQueue; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Group; +import org.openjdk.jmh.annotations.GroupThreads; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/* +Benchmark (capacity) Mode Cnt Score Error Units +JctoolsSPSCQueueBenchmark.queueTest 1024 thrpt 268.927 ops/us +JctoolsSPSCQueueBenchmark.queueTest:consume 1024 thrpt 135.287 ops/us +JctoolsSPSCQueueBenchmark.queueTest:produce 1024 thrpt 133.640 ops/us +JctoolsSPSCQueueBenchmark.queueTest 65536 thrpt 531.895 ops/us +JctoolsSPSCQueueBenchmark.queueTest:consume 65536 thrpt 266.084 ops/us +JctoolsSPSCQueueBenchmark.queueTest:produce 65536 thrpt 265.811 ops/us + */ +@BenchmarkMode(Mode.Throughput) +@Warmup(iterations = 3, time = 10) +@Measurement(iterations = 1, time = 30) +@Fork(1) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@State(Scope.Benchmark) +public class JctoolsSPSCQueueBenchmark { + @State(Scope.Group) + public static class QueueState { + SpscArrayQueue queue; + + @Param({"1024", "65536"}) + int capacity; + + @Setup(Level.Iteration) + public void setup() { + queue = new SpscArrayQueue<>(capacity); + } + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void produce(QueueState state, Blackhole bh) { + bh.consume(state.queue.offer(0)); + } + + @Benchmark + @Group("queueTest") + @GroupThreads(1) + public void consume(QueueState state, Blackhole bh) { + Integer v = state.queue.poll(); + if (v != null) { + bh.consume(v); + } + } +} diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java index f778f61c478..a3af314e1c7 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java @@ -20,14 +20,12 @@ /* Benchmark (capacity) Mode Cnt Score Error Units -MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 121.534 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:async 1024 thrpt NaN --- -MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 110.962 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 10.572 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 126.856 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:async 65536 thrpt NaN --- -MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 113.213 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 13.644 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 237.384 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 225.826 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 11.558 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 120.258 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 112.679 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 7.579 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java index 44b3a58421b..cc6eead0110 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java @@ -19,13 +19,12 @@ /* Benchmark (capacity) Mode Cnt Score Error Units -MPSCQueueBenchmark.queueTest 1024 thrpt 272.751 ops/us -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 258.737 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 14.013 ops/us -MPSCQueueBenchmark.queueTest:·async 1024 thrpt NaN --- -MPSCQueueBenchmark.queueTest 65536 thrpt 120.776 ops/us -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 108.595 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 12.182 ops/us +MPSCQueueBenchmark.queueTest 1024 thrpt 1229.501 ops/us +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 503.469 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 726.033 ops/us +MPSCQueueBenchmark.queueTest 65536 thrpt 136.218 ops/us +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 122.937 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 13.281 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java index b0ab1b34998..6190d7d8a4e 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java @@ -19,15 +19,15 @@ /* Benchmark (capacity) Mode Cnt Score Error Units -SPSCQueueBenchmark.queueTest 1024 thrpt 91.112 ops/us -SPSCQueueBenchmark.queueTest:consume 1024 thrpt 52.640 ops/us -SPSCQueueBenchmark.queueTest:produce 1024 thrpt 38.472 ops/us -SPSCQueueBenchmark.queueTest 65536 thrpt 140.663 ops/us -SPSCQueueBenchmark.queueTest:consume 65536 thrpt 70.363 ops/us -SPSCQueueBenchmark.queueTest:produce 65536 thrpt 70.300 ops/us +SPSCQueueBenchmark.queueTest 1024 thrpt 115.861 ops/us +SPSCQueueBenchmark.queueTest:consume 1024 thrpt 83.922 ops/us +SPSCQueueBenchmark.queueTest:produce 1024 thrpt 31.939 ops/us +SPSCQueueBenchmark.queueTest 65536 thrpt 543,237 ops/us +SPSCQueueBenchmark.queueTest:consume 65536 thrpt 280,208 ops/us +SPSCQueueBenchmark.queueTest:produce 65536 thrpt 263,029 ops/us */ @BenchmarkMode(Mode.Throughput) -@Warmup(iterations = 1, time = 30) +@Warmup(iterations = 3, time = 10) @Measurement(iterations = 1, time = 30) @Fork(1) @OutputTimeUnit(TimeUnit.MICROSECONDS) @@ -49,13 +49,8 @@ public void setup() { @Benchmark @Group("queueTest") @GroupThreads(1) - public void produce(QueueState state) { - - // bounded attempt: try once, then yield if full - boolean offered = state.queue.offer(0); - if (!offered) { - Thread.yield(); - } + public void produce(QueueState state, Blackhole bh) { + bh.consume(state.queue.offer(0)); } @Benchmark diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java index 2ee78253340..6ece9fd588b 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java @@ -2,6 +2,8 @@ import static datadog.trace.util.BitUtils.nextPowerOfTwo; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; import java.util.AbstractQueue; import java.util.Iterator; import java.util.function.Consumer; @@ -14,15 +16,52 @@ * @param the type of elements held by this queue */ abstract class BaseQueue extends AbstractQueue implements NonBlockingQueue { + protected static final VarHandle HEAD_HANDLE; + protected static final VarHandle TAIL_HANDLE; + protected static final VarHandle ARRAY_HANDLE; + + static { + try { + final MethodHandles.Lookup lookup = MethodHandles.lookup(); + HEAD_HANDLE = lookup.findVarHandle(BaseQueue.class, "head", long.class); + TAIL_HANDLE = lookup.findVarHandle(BaseQueue.class, "tail", long.class); + ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); + } catch (ReflectiveOperationException e) { + throw new ExceptionInInitializerError(e); + } + } + /** The capacity of the queue (must be a power of two) */ protected final int capacity; /** Mask for fast modulo operation (index = pos & mask) */ protected final int mask; + /** The backing array (plain Java array for VarHandle access) */ + protected final Object[] buffer; + + // Padding to avoid false sharing + @SuppressWarnings("unused") + private long p0, p1, p2, p3, p4, p5, p6; + + /** Next free slot for producer (single-threaded) */ + protected volatile long tail = 0L; + + // Padding around tail + @SuppressWarnings("unused") + private long q0, q1, q2, q3, q4, q5, q6; + + /** Next slot to consume (multi-threaded) */ + protected volatile long head = 0L; + + // Padding around head + @SuppressWarnings("unused") + private long r0, r1, r2, r3, r4, r5, r6; + public BaseQueue(int capacity) { this.capacity = nextPowerOfTwo(capacity); this.mask = this.capacity - 1; + this.buffer = new Object[capacity]; } @Override @@ -69,17 +108,24 @@ public int fill(@Nonnull Supplier supplier, int limit) { * @throws UnsupportedOperationException always */ @Override - public Iterator iterator() { + public final Iterator iterator() { throw new UnsupportedOperationException(); } @Override - public int remainingCapacity() { + public final int remainingCapacity() { return capacity - size(); } @Override - public int capacity() { + public final int capacity() { return capacity; } + + @Override + public final int size() { + long currentTail = (long) TAIL_HANDLE.getVolatile(this); + long currentHead = (long) HEAD_HANDLE.getVolatile(this); + return (int) (currentTail - currentHead); + } } diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java index 10fda10cb11..9e3e0fb65cf 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscArrayQueueVarHandle.java @@ -1,8 +1,5 @@ package datadog.common.queue; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.MethodHandles.Lookup; -import java.lang.invoke.VarHandle; import java.util.Objects; import java.util.concurrent.locks.LockSupport; @@ -15,51 +12,16 @@ * @param the type of elements stored */ class MpscArrayQueueVarHandle extends BaseQueue { - private static final VarHandle ARRAY_HANDLE; - private static final VarHandle HEAD_HANDLE; - private static final VarHandle TAIL_HANDLE; - private static final VarHandle PRODUCER_LIMIT_HANDLE; - - static { - try { - final Lookup lookup = MethodHandles.lookup(); - TAIL_HANDLE = lookup.findVarHandle(MpscArrayQueueVarHandle.class, "tail", long.class); - HEAD_HANDLE = lookup.findVarHandle(MpscArrayQueueVarHandle.class, "head", long.class); - ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); - PRODUCER_LIMIT_HANDLE = - lookup.findVarHandle(MpscArrayQueueVarHandle.class, "producerLimit", long.class); - } catch (Throwable t) { - throw new IllegalStateException(t); - } - } - - /** The backing array (plain Java array for VarHandle access) */ - private final Object[] buffer; - // Padding to prevent false sharing @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; - /** Next free slot for producers (multi-threaded) */ - private volatile long tail = 0L; - - // Padding around tail - @SuppressWarnings("unused") - private long q0, q1, q2, q3, q4, q5, q6; - /** Cached producer limit to reduce volatile head reads */ - private volatile long producerLimit = 0L; + protected volatile long producerLimit = 0L; // Padding around producerLimit @SuppressWarnings("unused") - private long r0, r1, r2, r3, r4, r5, r6; - - /** Next slot to consume (single-threaded) */ - private volatile long head = 0L; - - // Padding around head - @SuppressWarnings("unused") - private long s0, s1, s2, s3, s4, s5, s6; + private long q0, q1, q2, q3, q4, q5, q6; /** * Creates a new MPSC queue. @@ -68,7 +30,6 @@ class MpscArrayQueueVarHandle extends BaseQueue { */ public MpscArrayQueueVarHandle(int requestedCapacity) { super(requestedCapacity); - this.buffer = new Object[capacity]; this.producerLimit = capacity; } @@ -79,7 +40,7 @@ public boolean offer(E e) { // jctools does the same local copy to have the jitter optimise the accesses final Object[] localBuffer = this.buffer; - long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); + long localProducerLimit = producerLimit; long cachedHead = 0L; // Local cache of head to reduce volatile reads int spinCycles = 0; @@ -99,7 +60,7 @@ public boolean offer(E e) { } // Update producerLimit so other producers also benefit - PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); + producerLimit = localProducerLimit; } // Attempt to claim a slot @@ -127,7 +88,7 @@ public boolean offer(E e) { @Override @SuppressWarnings("unchecked") - public E poll() { + public final E poll() { final Object[] localBuffer = this.buffer; long currentHead = (long) HEAD_HANDLE.getOpaque(this); @@ -150,15 +111,8 @@ public E poll() { @Override @SuppressWarnings("unchecked") - public E peek() { + public final E peek() { final int index = (int) ((long) HEAD_HANDLE.getOpaque(this) & mask); return (E) ARRAY_HANDLE.getVolatile(buffer, index); } - - @Override - public int size() { - long currentHead = (long) HEAD_HANDLE.getVolatile(this); - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - return (int) (currentTail - currentHead); - } } diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java index 76fb95adfa7..26bec3d0968 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/MpscBlockingConsumerArrayQueueVarHandle.java @@ -16,64 +16,32 @@ * * @param the type of elements stored */ -class MpscBlockingConsumerArrayQueueVarHandle extends BaseQueue +class MpscBlockingConsumerArrayQueueVarHandle extends MpscArrayQueueVarHandle implements BlockingConsumerNonBlockingQueue { - private static final VarHandle ARRAY_HANDLE; - private static final VarHandle HEAD_HANDLE; - private static final VarHandle TAIL_HANDLE; - private static final VarHandle PRODUCER_LIMIT_HANDLE; private static final VarHandle CONSUMER_THREAD_HANDLE; static { try { final Lookup lookup = MethodHandles.lookup(); - TAIL_HANDLE = - lookup.findVarHandle(MpscBlockingConsumerArrayQueueVarHandle.class, "tail", long.class); - HEAD_HANDLE = - lookup.findVarHandle(MpscBlockingConsumerArrayQueueVarHandle.class, "head", long.class); - ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); - PRODUCER_LIMIT_HANDLE = - lookup.findVarHandle( - MpscBlockingConsumerArrayQueueVarHandle.class, "producerLimit", long.class); CONSUMER_THREAD_HANDLE = lookup.findVarHandle( MpscBlockingConsumerArrayQueueVarHandle.class, "consumerThread", Thread.class); } catch (Throwable t) { - throw new IllegalStateException(t); + throw new ExceptionInInitializerError(t); } } - /** The backing array (plain Java array for VarHandle access) */ - private final Object[] buffer; - // Padding to prevent false sharing @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; - /** Next free slot for producers (multi-threaded) */ - private volatile long tail = 0L; + /** Reference to the waiting consumer thread (set atomically). */ + private volatile Thread consumerThread; - // Padding around tail + // Padding around consumerThread @SuppressWarnings("unused") private long q0, q1, q2, q3, q4, q5, q6; - /** Cached producer limit to reduce volatile head reads */ - private volatile long producerLimit = 0L; - - // Padding around producerLimit - @SuppressWarnings("unused") - private long r0, r1, r2, r3, r4, r5, r6; - - /** Next slot to consume (single-threaded) */ - private volatile long head = 0L; - - // Padding around head - @SuppressWarnings("unused") - private long s0, s1, s2, s3, s4, s5, s6; - - /** Reference to the waiting consumer thread (set atomically). */ - private volatile Thread consumerThread; - /** * Creates a new MPSC queue. * @@ -81,18 +49,16 @@ class MpscBlockingConsumerArrayQueueVarHandle extends BaseQueue */ public MpscBlockingConsumerArrayQueueVarHandle(int requestedCapacity) { super(requestedCapacity); - this.buffer = new Object[capacity]; - this.producerLimit = capacity; } @Override - public boolean offer(E e) { + public final boolean offer(E e) { Objects.requireNonNull(e); // jctools does the same local copy to have the jitter optimise the accesses final Object[] localBuffer = this.buffer; - long localProducerLimit = (long) PRODUCER_LIMIT_HANDLE.getVolatile(this); + long localProducerLimit = producerLimit; long cachedHead = 0L; // Local cache of head to reduce volatile reads int spinCycles = 0; @@ -112,7 +78,7 @@ public boolean offer(E e) { } // Update producerLimit so other producers also benefit - PRODUCER_LIMIT_HANDLE.setVolatile(this, localProducerLimit); + producerLimit = localProducerLimit; } // Attempt to claim a slot @@ -146,30 +112,7 @@ public boolean offer(E e) { } @Override - @SuppressWarnings("unchecked") - public E poll() { - final Object[] localBuffer = this.buffer; - - long currentHead = (long) HEAD_HANDLE.getOpaque(this); - final int index = (int) (currentHead & mask); - - // Acquire-load ensures visibility of producer write - Object value = ARRAY_HANDLE.getAcquire(localBuffer, index); - if (value == null) { - return null; - } - - // Clear the slot without additional fence - ARRAY_HANDLE.setOpaque(localBuffer, index, null); - - // Advance head using opaque write (consumer-only) - HEAD_HANDLE.setOpaque(this, currentHead + 1); - - return (E) value; - } - - @Override - public E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { + public final E poll(long timeout, @Nonnull TimeUnit unit) throws InterruptedException { E e = poll(); if (e != null) { return e; @@ -195,20 +138,6 @@ public E take() throws InterruptedException { return e; } - @Override - @SuppressWarnings("unchecked") - public E peek() { - final int index = (int) ((long) HEAD_HANDLE.getOpaque(this) & mask); - return (E) ARRAY_HANDLE.getVolatile(buffer, index); - } - - @Override - public int size() { - long currentHead = (long) HEAD_HANDLE.getVolatile(this); - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - return (int) (currentTail - currentHead); - } - /** * Blocks (parks) until an element becomes available or until the specified timeout elapses. * diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java index 6358cbe6f91..e33cbc2a35f 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/SpmcArrayQueueVarHandle.java @@ -1,8 +1,7 @@ package datadog.common.queue; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.VarHandle; import java.util.Objects; +import java.util.concurrent.locks.LockSupport; /** * A Single-Producer, Multiple-Consumer (SPMC) bounded, lock-free queue based on a circular array. @@ -11,46 +10,17 @@ * * @param the element type */ -class SpmcArrayQueueVarHandle extends BaseQueue { - - private static final VarHandle HEAD_HANDLE; - private static final VarHandle TAIL_HANDLE; - private static final VarHandle ARRAY_HANDLE; - - static { - try { - final MethodHandles.Lookup lookup = MethodHandles.lookup(); - HEAD_HANDLE = lookup.findVarHandle(SpmcArrayQueueVarHandle.class, "head", long.class); - TAIL_HANDLE = lookup.findVarHandle(SpmcArrayQueueVarHandle.class, "tail", long.class); - ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); - } catch (ReflectiveOperationException e) { - throw new ExceptionInInitializerError(e); - } - } - - /** The backing array (plain Java array for VarHandle access) */ - private final Object[] buffer; - - // Padding to avoid false sharing +final class SpmcArrayQueueVarHandle extends BaseQueue { + // Padding around consumerLimit @SuppressWarnings("unused") private long p0, p1, p2, p3, p4, p5, p6; - /** Next free slot for producer (single-threaded) */ - private volatile long tail = 0L; - - // Padding around tail - @SuppressWarnings("unused") - private long q0, q1, q2, q3, q4, q5, q6; - - /** Next slot to consume (multi-threaded) */ - private volatile long head = 0L; - /** Cached consumer limit to avoid repeated volatile tail reads */ private volatile long consumerLimit = 0L; - // Padding around head + // Padding around consumerLimit @SuppressWarnings("unused") - private long r0, r1, r2, r3, r4, r5, r6; + private long q0, q1, q2, q3, q4, q5, q6; /** * Creates a new SPMC queue. @@ -59,7 +29,6 @@ class SpmcArrayQueueVarHandle extends BaseQueue { */ public SpmcArrayQueueVarHandle(int requestedCapacity) { super(requestedCapacity); - this.buffer = new Object[capacity]; } @Override @@ -89,53 +58,61 @@ public boolean offer(E e) { public E poll() { final Object[] localBuffer = this.buffer; + int spinCycles = 0; + boolean parkOnSpin = (Thread.currentThread().getId() & 1) == 0; + while (true) { long currentHead = (long) HEAD_HANDLE.getVolatile(this); - long limit = consumerLimit; // local cached tail + long limit = consumerLimit; // cached tail if (currentHead >= limit) { + // refresh limit once from tail volatile limit = (long) TAIL_HANDLE.getVolatile(this); if (currentHead >= limit) { - return null; // empty + return null; // queue empty } - consumerLimit = limit; // update local view + consumerLimit = limit; // update local cache } // Attempt to claim this slot - if (HEAD_HANDLE.compareAndSet(this, currentHead, currentHead + 1)) { - int index = (int) (currentHead & mask); - Object value; - - // Wait for the producer to publish the value - while ((value = ARRAY_HANDLE.getAcquire(localBuffer, index)) == null) { + if (!HEAD_HANDLE.compareAndSet(this, currentHead, currentHead + 1)) { + // CAS failed. Backoff to reduce contention + if ((spinCycles & 1) == 0) { Thread.onSpinWait(); + } else { + if (parkOnSpin) { + LockSupport.parkNanos(1); + } else { + Thread.yield(); + } } + spinCycles++; + continue; + } + + int index = (int) (currentHead & mask); + Object value; - // Clear slot - ARRAY_HANDLE.setOpaque(localBuffer, index, null); - return (E) value; + // Spin-wait until producer publishes + while ((value = ARRAY_HANDLE.getAcquire(localBuffer, index)) == null) { + Thread.onSpinWait(); } - // CAS failed, retry loop + + // Clear slot for GC + ARRAY_HANDLE.setOpaque(localBuffer, index, null); + return (E) value; } } @Override @SuppressWarnings("unchecked") public E peek() { - final Object[] localBuffer = this.buffer; long currentHead = (long) HEAD_HANDLE.getVolatile(this); long currentTail = (long) TAIL_HANDLE.getVolatile(this); if (currentHead >= currentTail) return null; int index = (int) (currentHead & mask); - return (E) ARRAY_HANDLE.getAcquire(localBuffer, index); // acquire-load ensures visibility - } - - @Override - public int size() { - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - long currentHead = (long) HEAD_HANDLE.getVolatile(this); - return (int) (currentTail - currentHead); + return (E) ARRAY_HANDLE.getAcquire(buffer, index); // acquire-load ensures visibility } } diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java b/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java index 9d8f6bf8fa7..77908cc692b 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/SpscArrayQueueVarHandle.java @@ -1,55 +1,17 @@ package datadog.common.queue; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.VarHandle; import java.util.Objects; /** - * A high-performance Single-Producer, Single-Consumer (SPSC) bounded queue using a circular buffer - * and VarHandle-based release/acquire memory semantics. + * A high-performance Single-Producer, Single-Consumer (SPSC) bounded queue using a circular buffer. * * @param the type of elements held in this queue */ -class SpscArrayQueueVarHandle extends BaseQueue { - /** Backing array storing elements. */ - private final Object[] buffer; - - @SuppressWarnings("unused") - private long p0, p1, p2, p3, p4, p5, p6; - - /** Tail index (producer writes). */ - private volatile long tail = 0L; - - @SuppressWarnings("unused") - private long q0, q1, q2, q3, q4, q5, q6; - @SuppressWarnings("unused") - private long p10, p11, p12, p13, p14, p15, p16; - - /** Head index (consumer writes). */ - private volatile long head = 0L; - - @SuppressWarnings("unused") - private long q10, q11, q12, q13, q14, q15, q16; - +final class SpscArrayQueueVarHandle extends BaseQueue { // These caches eliminate redundant volatile reads private long cachedHead = 0L; // visible only to producer private long cachedTail = 0L; // visible only to consumer - private static final VarHandle HEAD_HANDLE; - private static final VarHandle TAIL_HANDLE; - private static final VarHandle ARRAY_HANDLE; - - static { - try { - final MethodHandles.Lookup lookup = MethodHandles.lookup(); - HEAD_HANDLE = lookup.findVarHandle(SpscArrayQueueVarHandle.class, "head", long.class); - TAIL_HANDLE = lookup.findVarHandle(SpscArrayQueueVarHandle.class, "tail", long.class); - ARRAY_HANDLE = MethodHandles.arrayElementVarHandle(Object[].class); - } catch (Throwable t) { - throw new IllegalStateException(t); - } - } - /** * Creates a new SPSC queue with the specified capacity. Capacity must be a power of two. * @@ -57,7 +19,6 @@ class SpscArrayQueueVarHandle extends BaseQueue { */ public SpscArrayQueueVarHandle(int requestedCapacity) { super(requestedCapacity); - this.buffer = new Object[capacity]; } @Override @@ -106,11 +67,4 @@ public E peek() { final int index = (int) ((long) HEAD_HANDLE.getOpaque(this) & mask); return (E) ARRAY_HANDLE.getVolatile(buffer, index); } - - @Override - public int size() { - long currentTail = (long) TAIL_HANDLE.getVolatile(this); - long currentHead = (long) HEAD_HANDLE.getVolatile(this); - return (int) (currentTail - currentHead); - } } From 259eeb565560e299acfc3b99a5ea132ad146e153 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Mon, 10 Nov 2025 15:02:46 +0100 Subject: [PATCH 17/18] oups --- .../src/main/java/datadog/common/queue/BaseQueue.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java index 6ece9fd588b..0fecf772a94 100644 --- a/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java +++ b/utils/queue-utils/src/main/java/datadog/common/queue/BaseQueue.java @@ -58,8 +58,8 @@ abstract class BaseQueue extends AbstractQueue implements NonBlockingQueue @SuppressWarnings("unused") private long r0, r1, r2, r3, r4, r5, r6; - public BaseQueue(int capacity) { - this.capacity = nextPowerOfTwo(capacity); + public BaseQueue(int requestedCapacity) { + this.capacity = nextPowerOfTwo(requestedCapacity); this.mask = this.capacity - 1; this.buffer = new Object[capacity]; } From b2850b38480d41d7a893c4f1f4a31e375f9ff835 Mon Sep 17 00:00:00 2001 From: Andrea Marziali Date: Mon, 10 Nov 2025 17:19:49 +0100 Subject: [PATCH 18/18] Update bench figures --- ...ctoolsMPSCBlockingConsumerQueueBenchmark.java | 16 +++++++--------- .../common/queue/JctoolsMPSCQueueBenchmark.java | 14 +++++++------- .../common/queue/JctoolsSPSCQueueBenchmark.java | 14 +++++++------- .../MPSCBlockingConsumerQueueBenchmark.java | 14 +++++++------- .../datadog/common/queue/MPSCQueueBenchmark.java | 14 +++++++------- .../datadog/common/queue/SPSCQueueBenchmark.java | 14 +++++++------- 6 files changed, 42 insertions(+), 44 deletions(-) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java index 1007ef9b03a..ebc6347c131 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCBlockingConsumerQueueBenchmark.java @@ -20,15 +20,13 @@ import org.openjdk.jmh.infra.Blackhole; /* -Benchmark (capacity) Mode Cnt Score Error Units -MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 121.534 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:async 1024 thrpt NaN --- -MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 110.962 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 10.572 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 126.856 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:async 65536 thrpt NaN --- -MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 113.213 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 13.644 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +JctoolsMPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 41,149 ops/us +JctoolsMPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 30,661 ops/us +JctoolsMPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 10,488 ops/us +JctoolsMPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 32,413 ops/us +JctoolsMPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 24,680 ops/us +JctoolsMPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 7,733 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java index e1c1e1cfa0b..a7ba1fa433e 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsMPSCQueueBenchmark.java @@ -19,13 +19,13 @@ import org.openjdk.jmh.infra.Blackhole; /* -Benchmark (capacity) Mode Cnt Score Error Units -JctoolsMPSCQueueBenchmark.queueTest 1024 thrpt 29.444 ops/us -JctoolsMPSCQueueBenchmark.queueTest:consume 1024 thrpt 21.230 ops/us -JctoolsMPSCQueueBenchmark.queueTest:produce 1024 thrpt 8.214 ops/us -JctoolsMPSCQueueBenchmark.queueTest 65536 thrpt 30.218 ops/us -JctoolsMPSCQueueBenchmark.queueTest:consume 65536 thrpt 22.846 ops/us -JctoolsMPSCQueueBenchmark.queueTest:produce 65536 thrpt 7.372 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +JctoolsMPSCQueueBenchmark.queueTest 1024 thrpt 41,784 ops/us +JctoolsMPSCQueueBenchmark.queueTest:consume 1024 thrpt 31,070 ops/us +JctoolsMPSCQueueBenchmark.queueTest:produce 1024 thrpt 10,715 ops/us +JctoolsMPSCQueueBenchmark.queueTest 65536 thrpt 39,589 ops/us +JctoolsMPSCQueueBenchmark.queueTest:consume 65536 thrpt 32,370 ops/us +JctoolsMPSCQueueBenchmark.queueTest:produce 65536 thrpt 7,219 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java index 7219cb12f73..8ecb13329fd 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/JctoolsSPSCQueueBenchmark.java @@ -19,13 +19,13 @@ import org.openjdk.jmh.infra.Blackhole; /* -Benchmark (capacity) Mode Cnt Score Error Units -JctoolsSPSCQueueBenchmark.queueTest 1024 thrpt 268.927 ops/us -JctoolsSPSCQueueBenchmark.queueTest:consume 1024 thrpt 135.287 ops/us -JctoolsSPSCQueueBenchmark.queueTest:produce 1024 thrpt 133.640 ops/us -JctoolsSPSCQueueBenchmark.queueTest 65536 thrpt 531.895 ops/us -JctoolsSPSCQueueBenchmark.queueTest:consume 65536 thrpt 266.084 ops/us -JctoolsSPSCQueueBenchmark.queueTest:produce 65536 thrpt 265.811 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +JctoolsSPSCQueueBenchmark.queueTest 1024 thrpt 259,418 ops/us +JctoolsSPSCQueueBenchmark.queueTest:consume 1024 thrpt 129,694 ops/us +JctoolsSPSCQueueBenchmark.queueTest:produce 1024 thrpt 129,724 ops/us +JctoolsSPSCQueueBenchmark.queueTest 65536 thrpt 537,111 ops/us +JctoolsSPSCQueueBenchmark.queueTest:consume 65536 thrpt 268,577 ops/us +JctoolsSPSCQueueBenchmark.queueTest:produce 65536 thrpt 268,534 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 3, time = 10) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java index a3af314e1c7..4bb42274b00 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCBlockingConsumerQueueBenchmark.java @@ -19,13 +19,13 @@ import org.openjdk.jmh.infra.Blackhole; /* -Benchmark (capacity) Mode Cnt Score Error Units -MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 237.384 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 225.826 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 11.558 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 120.258 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 112.679 ops/us -MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 7.579 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +MPSCBlockingConsumerQueueBenchmark.queueTest 1024 thrpt 258,074 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 1024 thrpt 246,683 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 1024 thrpt 11,391 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest 65536 thrpt 224,982 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:consume 65536 thrpt 217,498 ops/us +MPSCBlockingConsumerQueueBenchmark.queueTest:produce 65536 thrpt 7,485 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java index cc6eead0110..66b096e4ece 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/MPSCQueueBenchmark.java @@ -18,13 +18,13 @@ import org.openjdk.jmh.infra.Blackhole; /* -Benchmark (capacity) Mode Cnt Score Error Units -MPSCQueueBenchmark.queueTest 1024 thrpt 1229.501 ops/us -MPSCQueueBenchmark.queueTest:consume 1024 thrpt 503.469 ops/us -MPSCQueueBenchmark.queueTest:produce 1024 thrpt 726.033 ops/us -MPSCQueueBenchmark.queueTest 65536 thrpt 136.218 ops/us -MPSCQueueBenchmark.queueTest:consume 65536 thrpt 122.937 ops/us -MPSCQueueBenchmark.queueTest:produce 65536 thrpt 13.281 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +MPSCQueueBenchmark.queueTest 1024 thrpt 238,609 ops/us +MPSCQueueBenchmark.queueTest:consume 1024 thrpt 222,383 ops/us +MPSCQueueBenchmark.queueTest:produce 1024 thrpt 16,226 ops/us +MPSCQueueBenchmark.queueTest 65536 thrpt 262,729 ops/us +MPSCQueueBenchmark.queueTest:consume 65536 thrpt 250,627 ops/us +MPSCQueueBenchmark.queueTest:produce 65536 thrpt 12,102 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 1, time = 30) diff --git a/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java b/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java index 6190d7d8a4e..784d78f77c1 100644 --- a/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java +++ b/utils/queue-utils/src/jmh/java/datadog/common/queue/SPSCQueueBenchmark.java @@ -18,13 +18,13 @@ import org.openjdk.jmh.infra.Blackhole; /* -Benchmark (capacity) Mode Cnt Score Error Units -SPSCQueueBenchmark.queueTest 1024 thrpt 115.861 ops/us -SPSCQueueBenchmark.queueTest:consume 1024 thrpt 83.922 ops/us -SPSCQueueBenchmark.queueTest:produce 1024 thrpt 31.939 ops/us -SPSCQueueBenchmark.queueTest 65536 thrpt 543,237 ops/us -SPSCQueueBenchmark.queueTest:consume 65536 thrpt 280,208 ops/us -SPSCQueueBenchmark.queueTest:produce 65536 thrpt 263,029 ops/us +Benchmark (capacity) Mode Cnt Score Error Units +SPSCQueueBenchmark.queueTest 1024 thrpt 101,007 ops/us +SPSCQueueBenchmark.queueTest:consume 1024 thrpt 72,542 ops/us +SPSCQueueBenchmark.queueTest:produce 1024 thrpt 28,465 ops/us +SPSCQueueBenchmark.queueTest 65536 thrpt 353,161 ops/us +SPSCQueueBenchmark.queueTest:consume 65536 thrpt 191,188 ops/us +SPSCQueueBenchmark.queueTest:produce 65536 thrpt 161,973 ops/us */ @BenchmarkMode(Mode.Throughput) @Warmup(iterations = 3, time = 10)