Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Capture span stack trace #21

Merged
merged 12 commits into from
Nov 14, 2023
7 changes: 7 additions & 0 deletions custom/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,11 @@ dependencies {
// needs to be added in order to allow access to AgentListener interface
// this is currently required because autoconfigure is currently not exposed to the extension API.
compileOnly("io.opentelemetry:opentelemetry-sdk-extension-autoconfigure")

// test dependencies
testImplementation("io.opentelemetry:opentelemetry-sdk")
testImplementation("io.opentelemetry.javaagent:opentelemetry-testing-common")
testImplementation("io.opentelemetry:opentelemetry-sdk-testing")
testImplementation("org.assertj:assertj-core:3.24.2") // TODO : remove version for assertj
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[for reviewer] this will likely be handled with #8


}
9 changes: 6 additions & 3 deletions custom/src/main/java/co/elastic/otel/ElasticAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,14 @@
import io.opentelemetry.api.common.AttributeKey;

public interface ElasticAttributes {
AttributeKey<Long> SELF_TIME_ATTRIBUTE = AttributeKey.longKey("elastic.span.self_time");
AttributeKey<Long> SELF_TIME = AttributeKey.longKey("elastic.span.self_time");
AttributeKey<String> LOCAL_ROOT_ID = AttributeKey.stringKey("elastic.span.local_root.id");
AttributeKey<String> LOCAL_ROOT_NAME = AttributeKey.stringKey("elastic.local_root.name");
AttributeKey<String> LOCAL_ROOT_TYPE = AttributeKey.stringKey("elastic.local_root.type");
AttributeKey<Boolean> IS_LOCAL_ROOT = AttributeKey.booleanKey("elastic.span.is_local_root");
AttributeKey<String> ELASTIC_SPAN_TYPE = AttributeKey.stringKey("elastic.span.type");
AttributeKey<String> ELASTIC_SPAN_SUBTYPE = AttributeKey.stringKey("elastic.span.subtype");
AttributeKey<String> SPAN_TYPE = AttributeKey.stringKey("elastic.span.type");
AttributeKey<String> SPAN_SUBTYPE = AttributeKey.stringKey("elastic.span.subtype");

// TODO : replace this with semantic conventions v1.24.0 equivalent once released
AttributeKey<String> SPAN_STACKTRACE = AttributeKey.stringKey("code.stacktrace");
}
29 changes: 15 additions & 14 deletions custom/src/main/java/co/elastic/otel/ElasticBreakdownMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,21 @@

public class ElasticBreakdownMetrics {

private final ConcurrentHashMap<SpanContext, SpanContextData> elasticSpanData;
private final ConcurrentHashMap<SpanContext, BreakdownData> elasticSpanData;

private ElasticSpanExporter spanExporter;

private LongCounter breakDownCounter;

// sidecar object we store for every span
public static class SpanContextData {
private static class BreakdownData {

private ReadableSpan localRoot;

private final ChildDuration childDuration;
private long selfTime;

public SpanContextData(ReadableSpan localRoot, long start) {
public BreakdownData(ReadableSpan localRoot, long start) {
this.localRoot = localRoot;
this.childDuration = new ChildDuration(start);
this.selfTime = Long.MIN_VALUE;
Expand Down Expand Up @@ -108,7 +108,7 @@ public void onSpanStart(Context parentContext, ReadWriteSpan span) {
// the span is a local root span
localRootSpanContext = spanContext;

elasticSpanData.put(spanContext, new SpanContextData(span, spanStart));
elasticSpanData.put(spanContext, new BreakdownData(span, spanStart));

} else {
ReadableSpan parentSpan = getReadableSpanFromContext(parentContext);
Expand All @@ -118,7 +118,7 @@ public void onSpanStart(Context parentContext, ReadWriteSpan span) {
ReadableSpan localRoot = lookupLocalRootSpan(parentSpan);
localRootSpanContext = localRoot.getSpanContext();
if (localRootSpanContext.isValid()) {
elasticSpanData.put(spanContext, new SpanContextData(localRoot, spanStart));
elasticSpanData.put(spanContext, new BreakdownData(localRoot, spanStart));
}

// update direct parent span child durations for self-time
Expand Down Expand Up @@ -157,11 +157,11 @@ public void onSpanEnd(ReadableSpan span) {
SpanData spanData = span.toSpanData();

// children duration for current span
SpanContextData spanContextData = elasticSpanData.get(spanContext);
BreakdownData spanContextData = elasticSpanData.get(spanContext);
Objects.requireNonNull(spanContextData, "missing elastic span data");

// update children duration for direct parent
SpanContextData parentSpanContextData = elasticSpanData.get(span.getParentSpanContext());
BreakdownData parentSpanContextData = elasticSpanData.get(span.getParentSpanContext());

if (parentSpanContextData != null) { // parent might be already terminated
parentSpanContextData.childDuration.endChild(spanData.getEndEpochNanos());
Expand All @@ -176,14 +176,15 @@ public void onSpanEnd(ReadableSpan span) {
// put measured metric as span attribute to allow using an ingest pipeline to alter
// storage
// ingest pipelines do not have access to _source and thus can't read the metric as-is.
.put(ElasticAttributes.SELF_TIME_ATTRIBUTE, selfTime);
.put(ElasticAttributes.SELF_TIME, selfTime);

// unfortunately here we get a read-only span that has already been ended, thus even a cast to
// ReadWriteSpan
// does not allow us from adding extra span attributes
if (spanExporter != null) {
spanContextData.setSelfTime(selfTime);
spanExporter.report(spanContext, spanContextData);
spanExporter.addAttribute(
spanContext, ElasticAttributes.SELF_TIME, spanContextData.getSelfTime());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[for reviewer] this is the only change in this file, the rest is mostly renaming things.

}

breakDownCounter.add(selfTime, metricAttributes.build());
Expand All @@ -194,25 +195,25 @@ private static AttributesBuilder buildCounterAttributes(Attributes spanAttribute
AttributesBuilder builder =
Attributes.builder()
// default to app/internal unless other span attributes
.put(ElasticAttributes.ELASTIC_SPAN_TYPE, "app")
.put(ElasticAttributes.ELASTIC_SPAN_SUBTYPE, "internal");
.put(ElasticAttributes.SPAN_TYPE, "app")
.put(ElasticAttributes.SPAN_SUBTYPE, "internal");

spanAttributes.forEach(
(k, v) -> {
String key = k.getKey();
if (AttributeType.STRING.equals(k.getType())) {
int index = key.indexOf(".system");
if (index > 0) {
builder.put(ElasticAttributes.ELASTIC_SPAN_TYPE, key.substring(0, index));
builder.put(ElasticAttributes.ELASTIC_SPAN_SUBTYPE, v.toString());
builder.put(ElasticAttributes.SPAN_TYPE, key.substring(0, index));
builder.put(ElasticAttributes.SPAN_SUBTYPE, v.toString());
}
}
});
return builder;
}

private ReadableSpan lookupLocalRootSpan(ReadableSpan span) {
SpanContextData spanContextData = elasticSpanData.get(span.getSpanContext());
BreakdownData spanContextData = elasticSpanData.get(span.getSpanContext());
return spanContextData != null ? spanContextData.localRoot : (ReadableSpan) Span.getInvalid();
}

Expand Down
1 change: 1 addition & 0 deletions custom/src/main/java/co/elastic/otel/ElasticExtension.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ public SpanExporter wrapSpanExporter(SpanExporter toWrap) {
profiler.registerExporter(toWrap);
spanExporter = new ElasticSpanExporter(toWrap);
breakdownMetrics.registerSpanExporter(spanExporter);
spanProcessor.registerSpanExporter(spanExporter);
return spanExporter;
}

Expand Down
32 changes: 21 additions & 11 deletions custom/src/main/java/co/elastic/otel/ElasticSpanExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
*/
package co.elastic.otel;

import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.sdk.common.CompletableResultCode;
import io.opentelemetry.sdk.resources.Resource;
Expand All @@ -34,34 +36,34 @@ public class ElasticSpanExporter implements SpanExporter {

private final SpanExporter delegate;

private ConcurrentHashMap<SpanContext, ElasticBreakdownMetrics.SpanContextData> storage;
private final ConcurrentHashMap<SpanContext, AttributesBuilder> attributes;

public ElasticSpanExporter(SpanExporter delegate) {
this.delegate = delegate;
this.storage = new ConcurrentHashMap<>();
this.attributes = new ConcurrentHashMap<>();
}

@Override
public CompletableResultCode export(Collection<SpanData> spans) {
// shortcut in the rare case where no filtering is required
if (storage.isEmpty()) {
if (attributes.isEmpty()) {
return delegate.export(spans);
}

List<SpanData> toSend = new ArrayList<>(spans.size());
for (SpanData span : spans) {
SpanContext spanContext = span.getSpanContext();
ElasticBreakdownMetrics.SpanContextData data = storage.remove(spanContext);
if (data == null) {
AttributesBuilder extraAttributes = attributes.remove(spanContext);
if (extraAttributes == null) {
toSend.add(span);
} else {
toSend.add(
new DelegatingSpanData(span) {
@Override
public Attributes getAttributes() {
return span.getAttributes().toBuilder()
.put(ElasticAttributes.SELF_TIME_ATTRIBUTE, data.getSelfTime())
.build();
AttributesBuilder builder = span.getAttributes().toBuilder();
SylvainJuge marked this conversation as resolved.
Show resolved Hide resolved
builder.putAll(extraAttributes.build());
return builder.build();
}

@Override
Expand All @@ -75,13 +77,21 @@ public Resource getResource() {
return delegate.export(toSend);
}

public void report(SpanContext spanContext, ElasticBreakdownMetrics.SpanContextData data) {
this.storage.put(spanContext, data);
public <T> void addAttribute(SpanContext spanContext, AttributeKey<T> key, T value) {
attributes.compute(
spanContext,
(k, builder) -> {
if (builder == null) {
builder = Attributes.builder();
}
builder.put(key, value);
return builder;
});
}

@Override
public CompletableResultCode flush() {
storage.clear();
attributes.clear();
return delegate.flush();
}

Expand Down
28 changes: 28 additions & 0 deletions custom/src/main/java/co/elastic/otel/ElasticSpanProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
import io.opentelemetry.sdk.trace.ReadWriteSpan;
import io.opentelemetry.sdk.trace.ReadableSpan;
import io.opentelemetry.sdk.trace.SpanProcessor;
import java.io.PrintWriter;
import java.io.StringWriter;

public class ElasticSpanProcessor implements SpanProcessor {

private final ElasticProfiler profiler;
private final ElasticBreakdownMetrics breakdownMetrics;
private ElasticSpanExporter spanExporter;

public ElasticSpanProcessor(ElasticProfiler profiler, ElasticBreakdownMetrics breakdownMetrics) {
this.profiler = profiler;
Expand All @@ -49,6 +52,8 @@ public boolean isStartRequired() {
public void onEnd(ReadableSpan span) {
profiler.onSpanEnd(span);
breakdownMetrics.onSpanEnd(span);

captureStackTrace(span);
}

@Override
Expand All @@ -61,4 +66,27 @@ public CompletableResultCode shutdown() {
profiler.shutdown();
return CompletableResultCode.ofSuccess();
}

public void registerSpanExporter(ElasticSpanExporter spanExporter) {
this.spanExporter = spanExporter;
}

private void captureStackTrace(ReadableSpan span) {
if (spanExporter == null) {
return;
}
Throwable exception = new Throwable();
SylvainJuge marked this conversation as resolved.
Show resolved Hide resolved
StringWriter stringWriter = new StringWriter();
try (PrintWriter printWriter = new PrintWriter(stringWriter)) {
exception.printStackTrace(printWriter);
}

// do not overwrite stacktrace if present
if (span.getAttribute(ElasticAttributes.SPAN_STACKTRACE) == null) {
// TODO should we filter-out the calling code that is within the agent: at least onEnd +
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[for reviewer] I don't know if any filtering on the stack trace is applied here, thus I'll leave it as-it for now and wait to have the kibana implementation to see if any filtering is applied on the UI side.

// captureStackTrace will be included here
spanExporter.addAttribute(
span.getSpanContext(), ElasticAttributes.SPAN_STACKTRACE, stringWriter.toString());
}
}
}
91 changes: 91 additions & 0 deletions custom/src/test/java/co/elastic/otel/ElasticSpanProcessorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package co.elastic.otel;

import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.assertThat;
import static io.opentelemetry.sdk.testing.assertj.OpenTelemetryAssertions.satisfies;
import static org.mockito.Mockito.mock;

import io.opentelemetry.api.trace.Tracer;
import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter;
import io.opentelemetry.sdk.trace.SdkTracerProvider;
import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor;
import org.assertj.core.api.AbstractCharSequenceAssert;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class ElasticSpanProcessorTest {

private static final Tracer tracer;
private static final InMemorySpanExporter testExporter;

static {
ElasticSpanProcessor elasticSpanProcessor =
new ElasticSpanProcessor(mock(ElasticProfiler.class), mock(ElasticBreakdownMetrics.class));

testExporter = InMemorySpanExporter.create();
ElasticSpanExporter elasticSpanExporter = new ElasticSpanExporter(testExporter);
elasticSpanProcessor.registerSpanExporter(elasticSpanExporter);

tracer =
SdkTracerProvider.builder()
.addSpanProcessor(elasticSpanProcessor)
.addSpanProcessor(SimpleSpanProcessor.create(elasticSpanExporter))
Comment on lines +48 to +49
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[for reviewer] this emulates how our exporter is usually set up in the agent: first our exporter that modifies spans, then another that exports it. Here we are using the simple exporter that does not use batching and is synchronous.

.build()
.get("for-testing");
}

@BeforeEach
public void before() {
testExporter.reset();
}

@Test
void spanStackTraceCapture() {
tracer.spanBuilder("span").startSpan().end();

assertThat(testExporter.getFinishedSpanItems())
.hasSize(1)
.first()
.satisfies(
spanData ->
assertThat(spanData)
.hasAttributesSatisfying(
satisfies(
ElasticAttributes.SPAN_STACKTRACE,
AbstractCharSequenceAssert::isNotEmpty)));
Comment on lines +63 to +72
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[for reviewer] otel test utilities provide the assertj extension for easily testing for custom attributes.

}

@Test
void spanStackTraceCaptureDoesNotOverwrite() {
String value = "dummy";
tracer
.spanBuilder("span")
.setAttribute(ElasticAttributes.SPAN_STACKTRACE, value)
.startSpan()
.end();

assertThat(testExporter.getFinishedSpanItems())
.hasSize(1)
.first()
.satisfies(
spanData ->
assertThat(spanData).hasAttribute(ElasticAttributes.SPAN_STACKTRACE, value));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ public void healthcheck() throws InterruptedException {
.containsKeys(
"elastic.span.is_local_root",
"elastic.span.local_root.id",
"elastic.span.self_time");
"elastic.span.self_time",
"code.stacktrace");
});
}

Expand Down