Skip to content

Commit c877b2e

Browse files
authored
fix: cure span logs by cutting them by half every time we make a pass. (#87)
* fix: cure span logs by cutting them by half every time we make a pass. * compute minSpanLogsArrSize constant
1 parent 8fcea70 commit c877b2e

File tree

2 files changed

+204
-4
lines changed

2 files changed

+204
-4
lines changed

exporter/kafkaexporter/jaeger_marshaler_curer.go

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"encoding/hex"
88
"fmt"
99
"log"
10+
"math"
1011
"strconv"
1112
"strings"
1213

@@ -25,9 +26,13 @@ const (
2526
maxTruncationTries = 5 // maximum number of times to attempt to truncate tag values.
2627
// suffix used for new attributes created for those whose values have been truncated
2728
// while curing the spans
28-
truncationTagSuffix = ".htcollector.truncated"
29+
truncationTagSuffix = ".htcollector.truncated"
30+
spanLogsTruncationTagName = "htcollector.spanlogstruncated"
2931
)
3032

33+
// We will attempt to truncate span logs only if the number is greater than 2 ^ maxTruncationTries
34+
var minSpanLogsArrSize = int(math.Pow(float64(2), float64(maxTruncationTries)))
35+
3136
type jaegerMarshalerCurer struct {
3237
marshaler jaegerSpanMarshaler
3338
version sarama.KafkaVersion
@@ -127,7 +132,9 @@ func (j jaegerMarshalerCurer) spanAsString(span *jaegerproto.Span) string {
127132
}
128133
sb.WriteString("},")
129134
}
130-
sb.WriteString("]")
135+
sb.WriteString("],")
136+
sb.WriteString(fmt.Sprintf("logs count: %d", len(span.Logs)))
137+
sb.WriteString("}")
131138

132139
return sb.String()
133140
}
@@ -199,9 +206,56 @@ func (j jaegerMarshalerCurer) cureSpan(span *jaegerproto.Span, topic string) (*s
199206
attributeValueSize = attributeValueSize / 2
200207
}
201208

209+
// truncating span attributes did not work. try truncating span logs if they are available.
210+
// attempt to truncate only if the number of span logs is greater than 2 ^ maxTruncationTries
211+
if len(span.Logs) >= minSpanLogsArrSize {
212+
return j.cureSpanLogs(span, topic)
213+
}
214+
202215
return nil, fmt.Errorf("unable to cure span in %d truncation tries", maxTruncationTries)
203216
}
204217

218+
// if log events are causing the span to be over 1MiB, then this is because there's a
219+
// whole bunch of them and most probably they are repeated. I don't think logs going over 1MiB
220+
// could be caused by the size of the log messages themselves.
221+
// we cut the log events by half every time we make a pass.
222+
func (j jaegerMarshalerCurer) cureSpanLogs(span *jaegerproto.Span, topic string) (*sarama.ProducerMessage, error) {
223+
var appendedTruncationTag bool
224+
for truncationTry := 0; truncationTry < maxTruncationTries; truncationTry++ {
225+
span.Logs = cutSpanLogsByHalf(span.Logs)
226+
227+
// append the "htcollector.spanlogstruncated" attribute to the span tags if it's not been added.
228+
if !appendedTruncationTag {
229+
span.Tags = append(span.Tags, jaegerproto.KeyValue{
230+
Key: spanLogsTruncationTagName,
231+
VType: jaegerproto.ValueType_BOOL,
232+
VBool: true,
233+
})
234+
appendedTruncationTag = true
235+
}
236+
237+
bts, err := j.marshaler.marshal(span)
238+
// return err if there is a problem marshaling
239+
if err != nil {
240+
return nil, err
241+
}
242+
key := []byte(span.TraceID.String())
243+
msg := &sarama.ProducerMessage{
244+
Topic: topic,
245+
Value: sarama.ByteEncoder(bts),
246+
Key: sarama.ByteEncoder(key),
247+
}
248+
249+
// Check if the size is less than the max and if it is return. Otherwise do another pass and cut the logs by half
250+
messageSize := byteSize(msg, j.version)
251+
if messageSize <= j.maxMessageBytes {
252+
return msg, nil
253+
}
254+
}
255+
256+
return nil, fmt.Errorf("unable to cure span logs in %d truncation tries", maxTruncationTries)
257+
}
258+
205259
func valueToString(kv jaegerproto.KeyValue) string {
206260
if kv.VType == jaegerproto.ValueType_STRING {
207261
return kv.GetVStr()
@@ -255,3 +309,17 @@ func byteSize(m *sarama.ProducerMessage, v sarama.KafkaVersion) int {
255309
}
256310
return size
257311
}
312+
313+
// cutSpanLogsByHalf returns the spanLogs with an even-numbered index in the span
314+
// logs array effectively returning half of the span logs in the array.
315+
func cutSpanLogsByHalf(origArr []jaegerproto.Log) []jaegerproto.Log {
316+
newSize := len(origArr) / 2
317+
if len(origArr)%2 != 0 {
318+
newSize = newSize + 1
319+
}
320+
truncatedSpanLogs := make([]jaegerproto.Log, newSize)
321+
for i := 0; i < newSize; i++ {
322+
truncatedSpanLogs[i] = origArr[i*2]
323+
}
324+
return truncatedSpanLogs
325+
}

exporter/kafkaexporter/jaeger_marshaler_curer_test.go

Lines changed: 134 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"strings"
77
"testing"
8+
"time"
89

910
"github.com/Shopify/sarama"
1011
"github.com/gogo/protobuf/jsonpb"
@@ -21,6 +22,7 @@ func TestJaegerMarshalerCurer(t *testing.T) {
2122
maxMessageBytes := 1024
2223
maxAttributeValueSize := 256
2324
jsonMarshaler := &jsonpb.Marshaler{}
25+
ts := pcommon.NewTimestampFromTime(time.Now())
2426

2527
td := ptrace.NewTraces()
2628
rs := td.ResourceSpans().AppendEmpty()
@@ -59,6 +61,38 @@ func TestJaegerMarshalerCurer(t *testing.T) {
5961
}
6062
span.Attributes().Insert("big-tag", pcommon.NewValueString(createLongString(maxMessageBytes, "a")))
6163

64+
// Will cure this span by curing the span logs
65+
span = ils.Spans().AppendEmpty()
66+
span.SetName("bar")
67+
span.SetStartTimestamp(pcommon.Timestamp(101))
68+
span.SetEndTimestamp(pcommon.Timestamp(226))
69+
span.SetTraceID(pcommon.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}))
70+
span.SetSpanID(pcommon.NewSpanID([8]byte{1, 2, 3, 4, 5, 6, 7, 8}))
71+
span.Attributes().Insert("tag10", pcommon.NewValueString("tag10-val"))
72+
span.Attributes().Insert("tag11", pcommon.NewValueString("tag11-val"))
73+
// Add events to span
74+
for i := 0; i < 128; i++ {
75+
se := span.Events().AppendEmpty()
76+
se.SetName(createLongString(1, "a"))
77+
se.SetTimestamp(ts)
78+
}
79+
80+
// Will be unable to cure this span
81+
span = ils.Spans().AppendEmpty()
82+
span.SetName("bar")
83+
span.SetStartTimestamp(pcommon.Timestamp(102))
84+
span.SetEndTimestamp(pcommon.Timestamp(227))
85+
span.SetTraceID(pcommon.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}))
86+
span.SetSpanID(pcommon.NewSpanID([8]byte{1, 2, 3, 4, 5, 6, 7, 8}))
87+
span.Attributes().Insert("tag10", pcommon.NewValueString("tag10-val"))
88+
span.Attributes().Insert("tag11", pcommon.NewValueString("tag11-val"))
89+
// Add events to span
90+
for i := 0; i < 1024; i++ {
91+
se := span.Events().AppendEmpty()
92+
se.SetName(createLongString(1, "a"))
93+
se.SetTimestamp(ts)
94+
}
95+
6296
batches, err := jaeger.ProtoFromTraces(td)
6397
require.NoError(t, err)
6498

@@ -71,7 +105,7 @@ func TestJaegerMarshalerCurer(t *testing.T) {
71105
jsonByteBuffer0 := new(bytes.Buffer)
72106
require.NoError(t, jsonMarshaler.Marshal(jsonByteBuffer0, batches[0].Spans[0]))
73107

74-
// Get the marshalled bytes of the 2nd span that cannot be cured. Will be the needed for expected value of the tests
108+
// Get the marshalled bytes of the 3rd span that cannot be cured. Will be the needed for expected value of the tests
75109
// depending on whether dropSpans is turned on or not.
76110
batches[0].Spans[2].Process = batches[0].Process
77111
jaegerProtoBytes2, err := batches[0].Spans[2].Marshal()
@@ -81,8 +115,19 @@ func TestJaegerMarshalerCurer(t *testing.T) {
81115
jsonByteBuffer2 := new(bytes.Buffer)
82116
require.NoError(t, jsonMarshaler.Marshal(jsonByteBuffer2, batches[0].Spans[2]))
83117

118+
// Get the marshalled bytes of the 5th span that cannot be cured. Will be the needed for expected value of the tests
119+
// depending on whether dropSpans is turned on or not.
120+
batches[0].Spans[4].Process = batches[0].Process
121+
jaegerProtoBytes4, err := batches[0].Spans[4].Marshal()
122+
require.NoError(t, err)
123+
require.NotNil(t, jaegerProtoBytes4)
124+
125+
jsonByteBuffer4 := new(bytes.Buffer)
126+
require.NoError(t, jsonMarshaler.Marshal(jsonByteBuffer4, batches[0].Spans[4]))
127+
84128
// expected cured spans should be similar to spans that came in as if they were already cured.
85-
// batches[0].Spans[1] when cured will be the same as curedBatches[0].Spans[0]
129+
// batches[0].Spans[1] when cured will be the same as curedBatches[0].Spans[0] except for the
130+
// cured attribute.
86131
curedTd := ptrace.NewTraces()
87132
curedRs := curedTd.ResourceSpans().AppendEmpty()
88133
curedRs.Resource().Attributes().Insert("test-key", pcommon.NewValueString("test-val"))
@@ -98,6 +143,42 @@ func TestJaegerMarshalerCurer(t *testing.T) {
98143
curedSpan.Attributes().Insert("big-tag", pcommon.NewValueString(createLongString(maxAttributeValueSize, "a")))
99144
curedSpan.Attributes().Insert("big-tag"+truncationTagSuffix, pcommon.NewValueBool(true))
100145

146+
// batches[0].Spans[3] when cured will be the same as curedBatches[0].Spans[1] except for the truncated log events.
147+
curedSpan = curedIls.Spans().AppendEmpty()
148+
curedSpan.SetName("bar")
149+
curedSpan.SetStartTimestamp(pcommon.Timestamp(101))
150+
curedSpan.SetEndTimestamp(pcommon.Timestamp(226))
151+
curedSpan.SetTraceID(pcommon.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}))
152+
curedSpan.SetSpanID(pcommon.NewSpanID([8]byte{1, 2, 3, 4, 5, 6, 7, 8}))
153+
curedSpan.Attributes().Insert("tag10", pcommon.NewValueString("tag10-val"))
154+
curedSpan.Attributes().Insert("tag11", pcommon.NewValueString("tag11-val"))
155+
curedSpan.Attributes().Insert(spanLogsTruncationTagName, pcommon.NewValueBool(true))
156+
// Add events to span
157+
for i := 0; i < 16; i++ {
158+
se := curedSpan.Events().AppendEmpty()
159+
se.SetName(createLongString(1, "a"))
160+
se.SetTimestamp(ts)
161+
}
162+
163+
// For the jaegerJSONSpanMarshaler{ pbMarshaler: &jsonpb.Marshaler{}}, the marshaled log events
164+
// are large even for small log messages. So still the same as batches[0].Spans[3] except for the
165+
// truncated log events.
166+
curedSpan = curedIls.Spans().AppendEmpty()
167+
curedSpan.SetName("bar")
168+
curedSpan.SetStartTimestamp(pcommon.Timestamp(101))
169+
curedSpan.SetEndTimestamp(pcommon.Timestamp(226))
170+
curedSpan.SetTraceID(pcommon.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}))
171+
curedSpan.SetSpanID(pcommon.NewSpanID([8]byte{1, 2, 3, 4, 5, 6, 7, 8}))
172+
curedSpan.Attributes().Insert("tag10", pcommon.NewValueString("tag10-val"))
173+
curedSpan.Attributes().Insert("tag11", pcommon.NewValueString("tag11-val"))
174+
curedSpan.Attributes().Insert(spanLogsTruncationTagName, pcommon.NewValueBool(true))
175+
// Add events to span
176+
for i := 0; i < 4; i++ {
177+
se := curedSpan.Events().AppendEmpty()
178+
se.SetName(createLongString(1, "a"))
179+
se.SetTimestamp(ts)
180+
}
181+
101182
curedBatches, err := jaeger.ProtoFromTraces(curedTd)
102183
require.NoError(t, err)
103184

@@ -109,6 +190,22 @@ func TestJaegerMarshalerCurer(t *testing.T) {
109190
curedJsonByteBuffer1 := new(bytes.Buffer)
110191
require.NoError(t, jsonMarshaler.Marshal(curedJsonByteBuffer1, curedBatches[0].Spans[0]))
111192

193+
curedBatches[0].Spans[1].Process = curedBatches[0].Process
194+
curedJaegerProtoBytes2, err := curedBatches[0].Spans[1].Marshal()
195+
require.NoError(t, err)
196+
require.NotNil(t, curedJaegerProtoBytes2)
197+
198+
curedJsonByteBuffer2 := new(bytes.Buffer)
199+
require.NoError(t, jsonMarshaler.Marshal(curedJsonByteBuffer2, curedBatches[0].Spans[1]))
200+
201+
curedBatches[0].Spans[2].Process = curedBatches[0].Process
202+
curedJaegerProtoBytes3, err := curedBatches[0].Spans[2].Marshal()
203+
require.NoError(t, err)
204+
require.NotNil(t, curedJaegerProtoBytes3)
205+
206+
curedJsonByteBuffer3 := new(bytes.Buffer)
207+
require.NoError(t, jsonMarshaler.Marshal(curedJsonByteBuffer3, curedBatches[0].Spans[2]))
208+
112209
tests := []struct {
113210
unmarshaler TracesMarshaler
114211
encoding string
@@ -126,6 +223,8 @@ func TestJaegerMarshalerCurer(t *testing.T) {
126223
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes0), Key: sarama.ByteEncoder(messageKey)},
127224
{Topic: "topic", Value: sarama.ByteEncoder(curedJaegerProtoBytes1), Key: sarama.ByteEncoder(messageKey)},
128225
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes2), Key: sarama.ByteEncoder(messageKey)},
226+
{Topic: "topic", Value: sarama.ByteEncoder(curedJaegerProtoBytes2), Key: sarama.ByteEncoder(messageKey)},
227+
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes4), Key: sarama.ByteEncoder(messageKey)},
129228
},
130229
},
131230
{
@@ -142,6 +241,8 @@ func TestJaegerMarshalerCurer(t *testing.T) {
142241
{Topic: "topic", Value: sarama.ByteEncoder(jsonByteBuffer0.Bytes()), Key: sarama.ByteEncoder(messageKey)},
143242
{Topic: "topic", Value: sarama.ByteEncoder(curedJsonByteBuffer1.Bytes()), Key: sarama.ByteEncoder(messageKey)},
144243
{Topic: "topic", Value: sarama.ByteEncoder(jsonByteBuffer2.Bytes()), Key: sarama.ByteEncoder(messageKey)},
244+
{Topic: "topic", Value: sarama.ByteEncoder(curedJsonByteBuffer3.Bytes()), Key: sarama.ByteEncoder(messageKey)},
245+
{Topic: "topic", Value: sarama.ByteEncoder(jsonByteBuffer4.Bytes()), Key: sarama.ByteEncoder(messageKey)},
145246
},
146247
},
147248
{
@@ -157,6 +258,8 @@ func TestJaegerMarshalerCurer(t *testing.T) {
157258
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes0), Key: sarama.ByteEncoder(messageKey)},
158259
{Topic: "topic", Value: sarama.ByteEncoder(curedJaegerProtoBytes1), Key: sarama.ByteEncoder(messageKey)},
159260
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes2), Key: sarama.ByteEncoder(messageKey)},
261+
{Topic: "topic", Value: sarama.ByteEncoder(curedJaegerProtoBytes2), Key: sarama.ByteEncoder(messageKey)},
262+
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes4), Key: sarama.ByteEncoder(messageKey)},
160263
},
161264
},
162265
{
@@ -171,6 +274,7 @@ func TestJaegerMarshalerCurer(t *testing.T) {
171274
messages: []*sarama.ProducerMessage{
172275
{Topic: "topic", Value: sarama.ByteEncoder(jaegerProtoBytes0), Key: sarama.ByteEncoder(messageKey)},
173276
{Topic: "topic", Value: sarama.ByteEncoder(curedJaegerProtoBytes1), Key: sarama.ByteEncoder(messageKey)},
277+
{Topic: "topic", Value: sarama.ByteEncoder(curedJaegerProtoBytes2), Key: sarama.ByteEncoder(messageKey)},
174278
},
175279
},
176280
}
@@ -351,6 +455,34 @@ func TestJaegerMarshalerCurerCureSpansFail(t *testing.T) {
351455
assert.Nil(t, msg)
352456
}
353457

458+
func TestCutSpanLogsByHalf(t *testing.T) {
459+
now := time.Now()
460+
jpl1 := jaegerproto.Log{Timestamp: now}
461+
jpl2 := jaegerproto.Log{Timestamp: now.Add(time.Minute * 2)}
462+
jpl3 := jaegerproto.Log{Timestamp: now.Add(time.Minute * 4)}
463+
jpl4 := jaegerproto.Log{Timestamp: now.Add(time.Minute * 6)}
464+
jpl5 := jaegerproto.Log{Timestamp: now.Add(time.Minute * 8)}
465+
jpl6 := jaegerproto.Log{Timestamp: now.Add(time.Minute * 10)}
466+
assert.Equal(t,
467+
[]jaegerproto.Log{jpl1},
468+
cutSpanLogsByHalf([]jaegerproto.Log{jpl1}))
469+
assert.Equal(t,
470+
[]jaegerproto.Log{jpl1},
471+
cutSpanLogsByHalf([]jaegerproto.Log{jpl1, jpl2}))
472+
assert.Equal(t,
473+
[]jaegerproto.Log{jpl1, jpl3},
474+
cutSpanLogsByHalf([]jaegerproto.Log{jpl1, jpl2, jpl3}))
475+
assert.Equal(t,
476+
[]jaegerproto.Log{jpl1, jpl3},
477+
cutSpanLogsByHalf([]jaegerproto.Log{jpl1, jpl2, jpl3, jpl4}))
478+
assert.Equal(t,
479+
[]jaegerproto.Log{jpl1, jpl3, jpl5},
480+
cutSpanLogsByHalf([]jaegerproto.Log{jpl1, jpl2, jpl3, jpl4, jpl5}))
481+
assert.Equal(t,
482+
[]jaegerproto.Log{jpl1, jpl3, jpl5},
483+
cutSpanLogsByHalf([]jaegerproto.Log{jpl1, jpl2, jpl3, jpl4, jpl5, jpl6}))
484+
}
485+
354486
func createLongString(n int, s string) string {
355487
var b strings.Builder
356488
b.Grow(n * len(s))

0 commit comments

Comments
 (0)