diff --git a/.github/forced-tests-list.json b/.github/forced-tests-list.json index 077404aaa41..07ca8b3e2a6 100644 --- a/.github/forced-tests-list.json +++ b/.github/forced-tests-list.json @@ -1,3 +1,6 @@ { - -} \ No newline at end of file + "DEFAULT": [ + "tests/appsec/test_fingerprinting.py::Test_Fingerprinting_Endpoint", + "tests/appsec/test_fingerprinting.py::Test_Fingerprinting_Header_And_Network" + ] +} diff --git a/lib/datadog/appsec/compressed_json.rb b/lib/datadog/appsec/compressed_json.rb new file mode 100644 index 00000000000..14c64bbb641 --- /dev/null +++ b/lib/datadog/appsec/compressed_json.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +require 'json' +require 'zlib' +require 'stringio' + +require_relative '../core/utils/base64' + +module Datadog + module AppSec + # Converts derivative schema payloads into JSON and compresses them into a + # base64 encoded string if the payload is worth compressing. + # + # See: https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747221082 + module CompressedJson + MIN_SIZE_FOR_COMPRESSION = 260 + + def self.dump(payload) + value = JSON.dump(payload) + return value if value.bytesize < MIN_SIZE_FOR_COMPRESSION + + compress_and_encode(value) + rescue ArgumentError, Encoding::UndefinedConversionError, JSON::JSONError => e + AppSec.telemetry.report(e, description: 'AppSec: Failed to convert value into JSON') + + nil + end + + private_class_method def self.compress_and_encode(payload) + Core::Utils::Base64.strict_encode64( + Zlib.gzip(payload, level: Zlib::BEST_SPEED, strategy: Zlib::DEFAULT_STRATEGY) + ) + rescue Zlib::Error, TypeError => e + AppSec.telemetry.report(e, description: 'AppSec: Failed to compress and encode value') + + nil + end + end + end +end diff --git a/lib/datadog/appsec/event.rb b/lib/datadog/appsec/event.rb index cc334eccf67..5c6089d590c 100644 --- a/lib/datadog/appsec/event.rb +++ b/lib/datadog/appsec/event.rb @@ -1,15 +1,15 @@ # frozen_string_literal: true require 'json' -require 'zlib' - require_relative 'rate_limiter' -require_relative '../core/utils/base64' +require_relative 'compressed_json' module Datadog module AppSec # AppSec event module Event + DERIVATIVE_SCHEMA_KEY_PREFIX = '_dd.appsec.s.' + DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE = 25000 ALLOWED_REQUEST_HEADERS = %w[ X-Forwarded-For X-Client-IP @@ -38,11 +38,6 @@ module Event Content-Language ].map!(&:downcase).freeze - MAX_ENCODED_SCHEMA_SIZE = 25000 - # For more information about this number - # please check https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747221082 - MIN_SCHEMA_SIZE_FOR_COMPRESSION = 260 - # Record events for a trace # # This is expected to be called only once per trace for the rate limiter @@ -80,7 +75,6 @@ def record_via_span(span, *events) end end - # rubocop:disable Metrics/MethodLength def build_service_entry_tags(event_group) waf_events = [] entry_tags = event_group.each_with_object({ '_dd.origin' => 'appsec' }) do |event, tags| @@ -106,26 +100,17 @@ def build_service_entry_tags(event_group) waf_events += waf_result.events waf_result.derivatives.each do |key, value| - parsed_value = json_parse(value) - next unless parsed_value - - parsed_value_size = parsed_value.size - - schema_value = if parsed_value_size >= MIN_SCHEMA_SIZE_FOR_COMPRESSION - compressed_and_base64_encoded(parsed_value) - else - parsed_value - end - next unless schema_value - - if schema_value.size >= MAX_ENCODED_SCHEMA_SIZE - Datadog.logger.debug do - "Schema key: #{key} exceeds the max size value. It will not be included as part of the span tags" - end + next tags[key] = value unless key.start_with?(DERIVATIVE_SCHEMA_KEY_PREFIX) + + value = CompressedJson.dump(value) + next if value.nil? + + if value.size >= DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE + Datadog.logger.debug { "AppSec: Schema key '#{key}' will not be included into span tags due to it's size" } next end - tags[key] = schema_value + tags[key] = value end tags @@ -135,7 +120,6 @@ def build_service_entry_tags(event_group) entry_tags['_dd.appsec.json'] = appsec_events if appsec_events entry_tags end - # rubocop:enable Metrics/MethodLength def tag_and_keep!(context, waf_result) # We want to keep the trace in case of security event @@ -154,31 +138,15 @@ def tag_and_keep!(context, waf_result) private - def compressed_and_base64_encoded(value) - Datadog::Core::Utils::Base64.strict_encode64(gzip(value)) - rescue TypeError => e - Datadog.logger.debug do - "Failed to compress and encode value when populating AppSec::Event. Error: #{e.message}" - end - nil - end - + # NOTE: Handling of Encoding::UndefinedConversionError is added as a quick fix to + # the issue between Ruby encoded strings and libddwaf produced events and now + # is under investigation. def json_parse(value) JSON.dump(value) - rescue ArgumentError => e - Datadog.logger.debug do - "Failed to parse value to JSON when populating AppSec::Event. Error: #{e.message}" - end - nil - end + rescue ArgumentError, Encoding::UndefinedConversionError, JSON::JSONError => e + AppSec.telemetry.report(e, description: 'AppSec: Failed to convert value into JSON') - def gzip(value) - sio = StringIO.new - # For an in depth comparison of Zlib options check https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747215473 - gz = Zlib::GzipWriter.new(sio, Zlib::BEST_SPEED, Zlib::DEFAULT_STRATEGY) - gz.write(value) - gz.close - sio.string + nil end # Propagate to downstream services the information that the current distributed trace is diff --git a/sig/datadog/appsec/compressed_json.rbs b/sig/datadog/appsec/compressed_json.rbs new file mode 100644 index 00000000000..ff1f5c5ac21 --- /dev/null +++ b/sig/datadog/appsec/compressed_json.rbs @@ -0,0 +1,11 @@ +module Datadog + module AppSec + module CompressedJson + MIN_SIZE_FOR_COMPRESSION: ::Integer + + def self.dump: (untyped payload) -> ::String? + + def self.compress_and_encode: (::String payload) -> ::String? + end + end +end diff --git a/sig/datadog/appsec/event.rbs b/sig/datadog/appsec/event.rbs index 713f73a2129..3f7c8518429 100644 --- a/sig/datadog/appsec/event.rbs +++ b/sig/datadog/appsec/event.rbs @@ -1,30 +1,27 @@ module Datadog module AppSec module Event - ALLOWED_REQUEST_HEADERS: untyped + DERIVATIVE_SCHEMA_KEY_PREFIX: ::String - ALLOWED_RESPONSE_HEADERS: untyped + DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE: ::Integer - MAX_ENCODED_SCHEMA_SIZE: Numeric - MIN_SCHEMA_SIZE_FOR_COMPRESSION: Numeric + ALLOWED_REQUEST_HEADERS: ::Array[::String] - def self.record: (Tracing::SpanOperation, *untyped events) -> (nil | untyped) + ALLOWED_RESPONSE_HEADERS: ::Array[::String] - def self.record_via_span: (Tracing::SpanOperation, *untyped events) -> untyped + def self.record: (Tracing::SpanOperation span, *untyped events) -> void - def self.build_service_entry_tags: (Array[Hash[::Symbol, untyped]] event_group) -> Hash[::String, untyped] + def self.record_via_span: (Tracing::SpanOperation span, *untyped events) -> void - def self.tag_and_keep!: (Context scope, WAF::Result waf_result) -> void + def self.build_service_entry_tags: (untyped event_group) -> ::Hash[::String, untyped] - private - - def self.compressed_and_base64_encoded: (untyped value) -> untyped + def self.tag_and_keep!: (untyped context, untyped waf_result) -> void - def self.json_parse: (untyped value) -> untyped + private - def self.gzip: (untyped value) -> untyped + def self.json_parse: (untyped value) -> ::String? - def self.add_distributed_tags: (Tracing::TraceOperation trace) -> void + def self.add_distributed_tags: (untyped trace) -> void end end end diff --git a/spec/datadog/appsec/compressed_json_spec.rb b/spec/datadog/appsec/compressed_json_spec.rb new file mode 100644 index 00000000000..a2491ee1ad7 --- /dev/null +++ b/spec/datadog/appsec/compressed_json_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'datadog/appsec/compressed_json' + +RSpec.describe Datadog::AppSec::CompressedJson do + before { allow(Datadog::AppSec).to receive(:telemetry).and_return(telemetry) } + + let(:telemetry) { spy(Datadog::Core::Telemetry::Component) } + + describe '.dump' do + context 'when payload is nil' do + it { expect(described_class.dump(nil)).to eq('null') } + end + + context 'when payload is small' do + before { stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 1000) } + + it { expect(described_class.dump({ foo: 'bar' })).to eq('{"foo":"bar"}') } + end + + context 'when payload is large' do + before { stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 10) } + + it 'returns compressed data' do + compressed = described_class.dump({ foo: 'block_request_and_redirect_to_login_page' }) + + expect(compressed).to_not eq('{"foo":"block_request_and_redirect_to_login_page"}') + expect(compressed).to match(%r{^[-A-Za-z0-9+/]*={0,3}$}) + end + end + + context 'when JSON conversion fails' do + before { allow(JSON).to receive(:dump).and_raise(ArgumentError) } + + it 'reports the error and returns nil' do + expect(described_class.dump({ foo: 'something bad' })).to be_nil + expect(Datadog::AppSec.telemetry).to have_received(:report) + end + end + + context 'when JSON dump fails' do + it 'reports the error and returns nil' do + expect(described_class.dump({ foo: "\xC2" })).to be_nil + expect(Datadog::AppSec.telemetry).to have_received(:report) + end + end + + context 'when compression fails' do + before do + stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 1) + allow(Zlib).to receive(:gzip).and_raise(Zlib::DataError) + end + + it 'reports the error and returns nil' do + expect(described_class.dump({ foo: 'bar' })).to be_nil + expect(Datadog::AppSec.telemetry).to have_received(:report) + end + end + end +end diff --git a/spec/datadog/appsec/event_spec.rb b/spec/datadog/appsec/event_spec.rb index ae46736e0a3..b231bc799cf 100644 --- a/spec/datadog/appsec/event_spec.rb +++ b/spec/datadog/appsec/event_spec.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'datadog/appsec/spec_helper' require 'datadog/appsec/event' @@ -209,7 +211,7 @@ context 'JSON payload' do it 'uses JSON string when do not exceeds MIN_SCHEMA_SIZE_FOR_COMPRESSION' do - stub_const('Datadog::AppSec::Event::MIN_SCHEMA_SIZE_FOR_COMPRESSION', 3000) + stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 3000) meta = top_level_span.meta expect(meta['_dd.appsec.s.req.headers']).to eq('[{"host":[8],"version":[8]}]') @@ -218,13 +220,10 @@ context 'Compressed payload' do it 'uses compressed value when JSON string is bigger than MIN_SCHEMA_SIZE_FOR_COMPRESSION' do - result = 'H4sIAOYoHGUAA4aphwAAAA=' - stub_const('Datadog::AppSec::Event::MIN_SCHEMA_SIZE_FOR_COMPRESSION', 1) - expect(described_class).to receive(:compressed_and_base64_encoded).and_return(result) - - meta = top_level_span.meta + stub_const('Datadog::AppSec::CompressedJson::MIN_SIZE_FOR_COMPRESSION', 1) + allow(Datadog::AppSec::CompressedJson).to receive(:dump).and_return('H4sIAOYoHGUAA4aphwAAAA=') - expect(meta['_dd.appsec.s.req.headers']).to eq(result) + expect(top_level_span.meta['_dd.appsec.s.req.headers']).to eq('H4sIAOYoHGUAA4aphwAAAA=') end context 'with big derivatives' do @@ -267,16 +266,14 @@ end it 'has no newlines when encoded' do - meta = top_level_span.meta - - expect(meta['_dd.appsec.s.req.headers']).to_not match(/\n/) + expect(top_level_span.meta['_dd.appsec.s.req.headers']).to_not match(/\n/) end end end - context 'derivative values exceed Event::MAX_ENCODED_SCHEMA_SIZE value' do + context 'derivative values exceed Event::DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE value' do it 'do not add derivative key to meta' do - stub_const('Datadog::AppSec::Event::MAX_ENCODED_SCHEMA_SIZE', 1) + stub_const('Datadog::AppSec::Event::DERIVATIVE_SCHEMA_MAX_COMPRESSED_SIZE', 1) meta = top_level_span.meta expect(meta['_dd.appsec.s.req.headers']).to be_nil