Skip to content

[APPSEC-57239] Extract derivative schema compression into a module #4573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/forced-tests-list.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{

}
"DEFAULT": [
"tests/appsec/test_fingerprinting.py::Test_Fingerprinting_Endpoint",
"tests/appsec/test_fingerprinting.py::Test_Fingerprinting_Header_And_Network"
]
}
40 changes: 40 additions & 0 deletions lib/datadog/appsec/compressed_json.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# frozen_string_literal: true

require 'json'
require 'zlib'
require 'stringio'

require_relative '../core/utils/base64'

module Datadog
module AppSec
# Converts derivative schema payloads into JSON and compresses them into a
# base64 encoded string if the payload is worth compressing.
#
# See: https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747221082
module CompressedJson
MIN_SIZE_FOR_COMPRESSION = 260

def self.dump(payload)
value = JSON.dump(payload)
return value if value.bytesize < MIN_SIZE_FOR_COMPRESSION

compress_and_encode(value)
rescue ArgumentError, Encoding::UndefinedConversionError, JSON::JSONError => e
AppSec.telemetry.report(e, description: 'AppSec: Failed to convert value into JSON')

nil
end

private_class_method def self.compress_and_encode(payload)
Core::Utils::Base64.strict_encode64(
Zlib.gzip(payload, level: Zlib::BEST_SPEED, strategy: Zlib::DEFAULT_STRATEGY)
)
rescue Zlib::Error, TypeError => e
AppSec.telemetry.report(e, description: 'AppSec: Failed to compress and encode value')

nil
end
end
end
end
66 changes: 17 additions & 49 deletions lib/datadog/appsec/event.rb
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# frozen_string_literal: true

require 'json'
require 'zlib'

require_relative 'rate_limiter'
require_relative '../core/utils/base64'
require_relative 'compressed_json'

module Datadog
module AppSec
# AppSec event
module Event
DERIVATIVE_SCHEMA_KEY_PREFIX = '_dd.appsec.s.'
DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE = 25000
ALLOWED_REQUEST_HEADERS = %w[
X-Forwarded-For
X-Client-IP
Expand Down Expand Up @@ -38,11 +38,6 @@ module Event
Content-Language
].map!(&:downcase).freeze

MAX_ENCODED_SCHEMA_SIZE = 25000
# For more information about this number
# please check https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747221082
MIN_SCHEMA_SIZE_FOR_COMPRESSION = 260

# Record events for a trace
#
# This is expected to be called only once per trace for the rate limiter
Expand Down Expand Up @@ -80,7 +75,6 @@ def record_via_span(span, *events)
end
end

# rubocop:disable Metrics/MethodLength
def build_service_entry_tags(event_group)
waf_events = []
entry_tags = event_group.each_with_object({ '_dd.origin' => 'appsec' }) do |event, tags|
Expand All @@ -106,26 +100,17 @@ def build_service_entry_tags(event_group)
waf_events += waf_result.events

waf_result.derivatives.each do |key, value|
parsed_value = json_parse(value)
next unless parsed_value

parsed_value_size = parsed_value.size

schema_value = if parsed_value_size >= MIN_SCHEMA_SIZE_FOR_COMPRESSION
compressed_and_base64_encoded(parsed_value)
else
parsed_value
end
next unless schema_value

if schema_value.size >= MAX_ENCODED_SCHEMA_SIZE
Datadog.logger.debug do
"Schema key: #{key} exceeds the max size value. It will not be included as part of the span tags"
end
next tags[key] = value unless key.start_with?(DERIVATIVE_SCHEMA_KEY_PREFIX)

value = CompressedJson.dump(value)
next if value.nil?

if value.size >= DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE
Datadog.logger.debug { "AppSec: Schema key '#{key}' will not be included into span tags due to it's size" }
next
end

tags[key] = schema_value
tags[key] = value
end

tags
Expand All @@ -135,7 +120,6 @@ def build_service_entry_tags(event_group)
entry_tags['_dd.appsec.json'] = appsec_events if appsec_events
entry_tags
end
# rubocop:enable Metrics/MethodLength

def tag_and_keep!(context, waf_result)
# We want to keep the trace in case of security event
Expand All @@ -154,31 +138,15 @@ def tag_and_keep!(context, waf_result)

private

def compressed_and_base64_encoded(value)
Datadog::Core::Utils::Base64.strict_encode64(gzip(value))
rescue TypeError => e
Datadog.logger.debug do
"Failed to compress and encode value when populating AppSec::Event. Error: #{e.message}"
end
nil
end

# NOTE: Handling of Encoding::UndefinedConversionError is added as a quick fix to
# the issue between Ruby encoded strings and libddwaf produced events and now
# is under investigation.
def json_parse(value)
JSON.dump(value)
rescue ArgumentError => e
Datadog.logger.debug do
"Failed to parse value to JSON when populating AppSec::Event. Error: #{e.message}"
end
nil
end
rescue ArgumentError, Encoding::UndefinedConversionError, JSON::JSONError => e
AppSec.telemetry.report(e, description: 'AppSec: Failed to convert value into JSON')

def gzip(value)
sio = StringIO.new
# For an in depth comparison of Zlib options check https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747215473
gz = Zlib::GzipWriter.new(sio, Zlib::BEST_SPEED, Zlib::DEFAULT_STRATEGY)
gz.write(value)
gz.close
sio.string
nil
end

# Propagate to downstream services the information that the current distributed trace is
Expand Down
11 changes: 11 additions & 0 deletions sig/datadog/appsec/compressed_json.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module Datadog
module AppSec
module CompressedJson
MIN_SIZE_FOR_COMPRESSION: ::Integer

def self.dump: (untyped payload) -> ::String?

def self.compress_and_encode: (::String payload) -> ::String?
end
end
end
25 changes: 11 additions & 14 deletions sig/datadog/appsec/event.rbs
Original file line number Diff line number Diff line change
@@ -1,30 +1,27 @@
module Datadog
module AppSec
module Event
ALLOWED_REQUEST_HEADERS: untyped
DERIVATIVE_SCHEMA_KEY_PREFIX: ::String

ALLOWED_RESPONSE_HEADERS: untyped
DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE: ::Integer

MAX_ENCODED_SCHEMA_SIZE: Numeric
MIN_SCHEMA_SIZE_FOR_COMPRESSION: Numeric
ALLOWED_REQUEST_HEADERS: ::Array[::String]

def self.record: (Tracing::SpanOperation, *untyped events) -> (nil | untyped)
ALLOWED_RESPONSE_HEADERS: ::Array[::String]

def self.record_via_span: (Tracing::SpanOperation, *untyped events) -> untyped
def self.record: (Tracing::SpanOperation span, *untyped events) -> void

def self.build_service_entry_tags: (Array[Hash[::Symbol, untyped]] event_group) -> Hash[::String, untyped]
def self.record_via_span: (Tracing::SpanOperation span, *untyped events) -> void

def self.tag_and_keep!: (Context scope, WAF::Result waf_result) -> void
def self.build_service_entry_tags: (untyped event_group) -> ::Hash[::String, untyped]

private

def self.compressed_and_base64_encoded: (untyped value) -> untyped
def self.tag_and_keep!: (untyped context, untyped waf_result) -> void

def self.json_parse: (untyped value) -> untyped
private

def self.gzip: (untyped value) -> untyped
def self.json_parse: (untyped value) -> ::String?

def self.add_distributed_tags: (Tracing::TraceOperation trace) -> void
def self.add_distributed_tags: (untyped trace) -> void
end
end
end
61 changes: 61 additions & 0 deletions spec/datadog/appsec/compressed_json_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# frozen_string_literal: true

require 'datadog/appsec/spec_helper'
require 'datadog/appsec/compressed_json'

RSpec.describe Datadog::AppSec::CompressedJson do
before { allow(Datadog::AppSec).to receive(:telemetry).and_return(telemetry) }

let(:telemetry) { spy(Datadog::Core::Telemetry::Component) }

describe '.dump' do
context 'when payload is nil' do
it { expect(described_class.dump(nil)).to eq('null') }
end

context 'when payload is small' do
before { stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 1000) }

it { expect(described_class.dump({ foo: 'bar' })).to eq('{"foo":"bar"}') }
end

context 'when payload is large' do
before { stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 10) }

it 'returns compressed data' do
compressed = described_class.dump({ foo: 'block_request_and_redirect_to_login_page' })

expect(compressed).to_not eq('{"foo":"block_request_and_redirect_to_login_page"}')
expect(compressed).to match(%r{^[-A-Za-z0-9+/]*={0,3}$})
end
end

context 'when JSON conversion fails' do
before { allow(JSON).to receive(:dump).and_raise(ArgumentError) }

it 'reports the error and returns nil' do
expect(described_class.dump({ foo: 'something bad' })).to be_nil
expect(Datadog::AppSec.telemetry).to have_received(:report)
end
end

context 'when JSON dump fails' do
it 'reports the error and returns nil' do
expect(described_class.dump({ foo: "\xC2" })).to be_nil
expect(Datadog::AppSec.telemetry).to have_received(:report)
end
end

context 'when compression fails' do
before do
stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 1)
allow(Zlib).to receive(:gzip).and_raise(Zlib::DataError)
end

it 'reports the error and returns nil' do
expect(described_class.dump({ foo: 'bar' })).to be_nil
expect(Datadog::AppSec.telemetry).to have_received(:report)
end
end
end
end
21 changes: 9 additions & 12 deletions spec/datadog/appsec/event_spec.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'datadog/appsec/spec_helper'
require 'datadog/appsec/event'

Expand Down Expand Up @@ -209,7 +211,7 @@

context 'JSON payload' do
it 'uses JSON string when do not exceeds MIN_SCHEMA_SIZE_FOR_COMPRESSION' do
stub_const('Datadog::AppSec::Event::MIN_SCHEMA_SIZE_FOR_COMPRESSION', 3000)
stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 3000)
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to eq('[{"host":[8],"version":[8]}]')
Expand All @@ -218,13 +220,10 @@

context 'Compressed payload' do
it 'uses compressed value when JSON string is bigger than MIN_SCHEMA_SIZE_FOR_COMPRESSION' do
result = 'H4sIAOYoHGUAA4aphwAAAA='
stub_const('Datadog::AppSec::Event::MIN_SCHEMA_SIZE_FOR_COMPRESSION', 1)
expect(described_class).to receive(:compressed_and_base64_encoded).and_return(result)

meta = top_level_span.meta
stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 1)
allow(Datadog::AppSec::CompressedJson).to receive(:dump).and_return('H4sIAOYoHGUAA4aphwAAAA=')

expect(meta['_dd.appsec.s.req.headers']).to eq(result)
expect(top_level_span.meta['_dd.appsec.s.req.headers']).to eq('H4sIAOYoHGUAA4aphwAAAA=')
end

context 'with big derivatives' do
Expand Down Expand Up @@ -267,16 +266,14 @@
end

it 'has no newlines when encoded' do
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to_not match(/\n/)
expect(top_level_span.meta['_dd.appsec.s.req.headers']).to_not match(/\n/)
end
end
end

context 'derivative values exceed Event::MAX_ENCODED_SCHEMA_SIZE value' do
context 'derivative values exceed Event::DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE value' do
it 'do not add derivative key to meta' do
stub_const('Datadog::AppSec::Event::MAX_ENCODED_SCHEMA_SIZE', 1)
stub_const('Datadog::AppSec::Event::DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE', 1)
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to be_nil
Expand Down
Loading