-
Notifications
You must be signed in to change notification settings - Fork 373
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DEBUG-2334 Dynamic Instrumentation Redactor component (#3901)
* Dynamic Instrumentation Redactor component This component determines whether a variable, attribute or hash element should be redacted based on the identifier name/key name and the type of the value. Unit tests are included. The Redactor component technically depends on DI settings but they are mocked out in the unit tests. * add non-ascii test cases * style --------- Co-authored-by: Oleg Pudeyev <[email protected]>
- Loading branch information
Showing
4 changed files
with
372 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# frozen_string_literal: true | ||
|
||
module Datadog | ||
module DI | ||
# Provides logic to identify sensitive information in snapshots captured | ||
# by dynamic instrumentation. | ||
# | ||
# Redaction can be performed based on identifier or attribute name, | ||
# or class name of said identifier or attribute. Redaction does not take | ||
# into account variable values. | ||
# | ||
# There is a built-in list of identifier names which will be subject to | ||
# redaction. Additional names can be provided by the user via the | ||
# settings.dynamic_instrumentation.redacted_identifiers setting or | ||
# the DD_DYNAMIC_INSTRUMENTATION_REDACTED_IDENTIFIERS environment | ||
# variable. Currently no class names are subject to redaction by default; | ||
# class names can be provided via the | ||
# settings.dynamic_instrumentation.redacted_type_names setting or | ||
# DD_DYNAMIC_INSTRUMENTATION_REDACTED_TYPES environment variable. | ||
# | ||
# Redacted identifiers must match exactly to an attribute name, a key | ||
# in a hash or a variable name. Redacted types can either be matched | ||
# exactly or, if the name is suffixed with an asterisk (*), any class | ||
# whose name contains the specified prefix will be subject to redaction. | ||
# | ||
# When specifying class (type) names to be redacted, user must specify | ||
# fully-qualified names. For example, if `Token` or `Token*` are | ||
# specified to be redacted, instances of ::Token will be redacted | ||
# but instances of ::Foo::Token will not be. To redact the latter, | ||
# specify `Foo::Token` or `::Foo::Token` as redacted types. | ||
# | ||
# This class does not perform redaction itself (i.e., value replacement | ||
# with a placeholder). This replacement is performed by Serializer. | ||
# | ||
# @api private | ||
class Redactor | ||
def initialize(settings) | ||
@settings = settings | ||
end | ||
|
||
attr_reader :settings | ||
|
||
def redact_identifier?(name) | ||
redacted_identifiers.include?(normalize(name)) | ||
end | ||
|
||
def redact_type?(value) | ||
# Classses can be nameless, do not attempt to redact in that case. | ||
if (cls_name = value.class.name) | ||
redacted_type_names_regexp.match?(cls_name) | ||
else | ||
false | ||
end | ||
end | ||
|
||
private | ||
|
||
def redacted_identifiers | ||
@redacted_identifiers ||= begin | ||
names = DEFAULT_REDACTED_IDENTIFIERS + settings.dynamic_instrumentation.redacted_identifiers | ||
names.map! do |name| | ||
normalize(name) | ||
end | ||
Set.new(names) | ||
end | ||
end | ||
|
||
def redacted_type_names_regexp | ||
@redacted_type_names_regexp ||= begin | ||
names = settings.dynamic_instrumentation.redacted_type_names | ||
names = names.map do |name| | ||
if name.start_with?("::") | ||
# :: prefix is redundant, all names are expected to be | ||
# fully-qualified. | ||
name = name[2...name.length] | ||
end | ||
if name.end_with?("*") | ||
name = name[0..-2] | ||
suffix = ".*" | ||
else | ||
suffix = "" | ||
end | ||
Regexp.escape(name) + suffix | ||
end.join("|") | ||
Regexp.new("\\A(?:#{names})\\z") | ||
end | ||
end | ||
|
||
# Copied from dd-trace-py | ||
DEFAULT_REDACTED_IDENTIFIERS = [ | ||
"2fa", | ||
"accesstoken", | ||
"aiohttpsession", | ||
"apikey", | ||
"apisecret", | ||
"apisignature", | ||
"appkey", | ||
"applicationkey", | ||
"auth", | ||
"authorization", | ||
"authtoken", | ||
"ccnumber", | ||
"certificatepin", | ||
"cipher", | ||
"clientid", | ||
"clientsecret", | ||
"connectionstring", | ||
"connectsid", | ||
"cookie", | ||
"credentials", | ||
"creditcard", | ||
"csrf", | ||
"csrftoken", | ||
"cvv", | ||
"databaseurl", | ||
"dburl", | ||
"encryptionkey", | ||
"encryptionkeyid", | ||
"env", | ||
"geolocation", | ||
"gpgkey", | ||
"ipaddress", | ||
"jti", | ||
"jwt", | ||
"licensekey", | ||
"masterkey", | ||
"mysqlpwd", | ||
"nonce", | ||
"oauth", | ||
"oauthtoken", | ||
"otp", | ||
"passhash", | ||
"passwd", | ||
"password", | ||
"passwordb", | ||
"pemfile", | ||
"pgpkey", | ||
"phpsessid", | ||
"pin", | ||
"pincode", | ||
"pkcs8", | ||
"privatekey", | ||
"publickey", | ||
"pwd", | ||
"recaptchakey", | ||
"refreshtoken", | ||
"routingnumber", | ||
"salt", | ||
"secret", | ||
"secretkey", | ||
"secrettoken", | ||
"securityanswer", | ||
"securitycode", | ||
"securityquestion", | ||
"serviceaccountcredentials", | ||
"session", | ||
"sessionid", | ||
"sessionkey", | ||
"setcookie", | ||
"signature", | ||
"signaturekey", | ||
"sshkey", | ||
"ssn", | ||
"symfony", | ||
"token", | ||
"transactionid", | ||
"twiliotoken", | ||
"usersession", | ||
"voterid", | ||
"xapikey", | ||
"xauthtoken", | ||
"xcsrftoken", | ||
"xforwardedfor", | ||
"xrealip", | ||
"xsrf", | ||
"xsrftoken", | ||
] | ||
|
||
# Input can be a string or a symbol. | ||
def normalize(str) | ||
str.to_s.strip.downcase.gsub(/[-_$@]/, "") | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
module Datadog | ||
module DI | ||
class Redactor | ||
@settings: untyped | ||
|
||
@redacted_identifiers: untyped | ||
|
||
@redacted_type_names_regexp: Regexp | ||
|
||
def initialize: (untyped settings) -> void | ||
|
||
attr_reader settings: untyped | ||
|
||
def redact_identifier?: (String name) -> (true | false) | ||
|
||
def redact_type?: (untyped value) -> (true | false) | ||
|
||
private | ||
|
||
def redacted_identifiers: () -> untyped | ||
|
||
def redacted_type_names_regexp: () -> untyped | ||
DEFAULT_REDACTED_IDENTIFIERS: ::Array["2fa" | "accesstoken" | "aiohttpsession" | "apikey" | "apisecret" | "apisignature" | "appkey" | "applicationkey" | "auth" | "authorization" | "authtoken" | "ccnumber" | "certificatepin" | "cipher" | "clientid" | "clientsecret" | "connectionstring" | "connectsid" | "cookie" | "credentials" | "creditcard" | "csrf" | "csrftoken" | "cvv" | "databaseurl" | "dburl" | "encryptionkey" | "encryptionkeyid" | "env" | "geolocation" | "gpgkey" | "ipaddress" | "jti" | "jwt" | "licensekey" | "masterkey" | "mysqlpwd" | "nonce" | "oauth" | "oauthtoken" | "otp" | "passhash" | "passwd" | "password" | "passwordb" | "pemfile" | "pgpkey" | "phpsessid" | "pin" | "pincode" | "pkcs8" | "privatekey" | "publickey" | "pwd" | "recaptchakey" | "refreshtoken" | "routingnumber" | "salt" | "secret" | "secretkey" | "secrettoken" | "securityanswer" | "securitycode" | "securityquestion" | "serviceaccountcredentials" | "session" | "sessionid" | "sessionkey" | "setcookie" | "signature" | "signaturekey" | "sshkey" | "ssn" | "symfony" | "token" | "transactionid" | "twiliotoken" | "usersession" | "voterid" | "xapikey" | "xauthtoken" | "xcsrftoken" | "xforwardedfor" | "xrealip" | "xsrf" | "xsrftoken"] | ||
def normalize: (untyped str) -> untyped | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
require "datadog/di/redactor" | ||
|
||
class DIRedactorSpecSensitiveType; end | ||
|
||
class DIRedactorSpecWildCard; end | ||
|
||
class DIRedactorSpecWildCardClass; end | ||
|
||
class DIRedactorSpecWildCa; end | ||
|
||
class DIRedactorSpecPrefixWildCard; end | ||
|
||
class DIRedactorSpecDoubleColon; end | ||
|
||
module DIRedactorSpec | ||
class SensitiveType; end | ||
|
||
class NotSensitiveType; end | ||
|
||
class WildCardSensitiveType; end | ||
|
||
class ExactMatch; end | ||
|
||
class DoubleColonNested; end | ||
|
||
class DoubleColonWildCardType; end | ||
end | ||
|
||
RSpec.describe Datadog::DI::Redactor do | ||
let(:settings) do | ||
double("settings").tap do |settings| | ||
allow(settings).to receive(:dynamic_instrumentation).and_return(di_settings) | ||
end | ||
end | ||
|
||
let(:di_settings) do | ||
double("di settings").tap do |settings| | ||
allow(settings).to receive(:enabled).and_return(true) | ||
allow(settings).to receive(:propagate_all_exceptions).and_return(false) | ||
allow(settings).to receive(:redacted_identifiers).and_return([]) | ||
end | ||
end | ||
|
||
let(:redactor) do | ||
Datadog::DI::Redactor.new(settings) | ||
end | ||
|
||
describe "#redact_identifier?" do | ||
def self.define_cases(cases) | ||
cases.each do |(label, identifier_, redact_)| | ||
identifier, redact = identifier_, redact_ | ||
|
||
context label do | ||
let(:identifier) { identifier } | ||
|
||
it do | ||
expect(redactor.redact_identifier?(identifier)).to be redact | ||
end | ||
end | ||
end | ||
end | ||
|
||
cases = [ | ||
["lowercase", "password", true], | ||
["uppercase", "PASSWORD", true], | ||
["with removed punctiation", "pass_word", true], | ||
["with non-removed punctuation", "pass/word", false], | ||
] | ||
|
||
define_cases(cases) | ||
|
||
context "when user-defined redacted identifiers exist" do | ||
before do | ||
expect(di_settings).to receive(:redacted_identifiers).and_return(%w[foo пароль Ключ]) | ||
end | ||
|
||
cases = [ | ||
["exact user-defined identifier", "foo", true], | ||
["prefix of user-defined identifier", "f", false], | ||
["suffix of user-defined identifier", "oo", false], | ||
["user-defined identifier with extra punctuation", "f-o-o", true], | ||
["user-defined identifier is not ascii", "ПАРОЛь", true], | ||
["user-defined identifier is not ascii and uses mixed case in definition", "ключ", true], | ||
["user-defined identifier is not ascii and uses mixed case in definition and is not exact match", "ключ1", false], | ||
] | ||
|
||
define_cases(cases) | ||
end | ||
end | ||
|
||
describe "#redact_type?" do | ||
let(:redacted_type_names) { | ||
%w[ | ||
DIRedactorSpecSensitiveType | ||
DIRedactorSpecWildCard* | ||
DIRedactorSpec::ExactMatch | ||
DIRedactorSpec::WildCard* | ||
SensitiveType | ||
SensitiveType* | ||
::DIRedactorSpecDoubleColon | ||
::DIRedactorSpec::DoubleColonNested | ||
::DIRedactorSpec::DoubleColonWildCard* | ||
] | ||
} | ||
|
||
def self.define_cases(cases) | ||
cases.each do |(label, value_, redact_)| | ||
value, redact = value_, redact_ | ||
|
||
context label do | ||
let(:value) { value } | ||
|
||
it do | ||
expect(redactor.redact_type?(value)).to be redact | ||
end | ||
end | ||
end | ||
end | ||
|
||
context "redacted type list is checked" do | ||
before do | ||
expect(di_settings).to receive(:redacted_type_names).and_return(redacted_type_names) | ||
end | ||
|
||
cases = [ | ||
["redacted", DIRedactorSpecSensitiveType.new, true], | ||
["not redacted", /123/, false], | ||
["primitive type", nil, false], | ||
["wild card type whose name is the same as prefix", DIRedactorSpecWildCard.new, true], | ||
["wild card type", DIRedactorSpecWildCardClass.new, true], | ||
["wild card does not match from beginning", DIRedactorSpecPrefixWildCard.new, false], | ||
["partial wild card prefix match", DIRedactorSpecWildCa.new, false], | ||
["class object", String, false], | ||
["anonymous class object", Class.new, false], | ||
["namespaced class - exact match", DIRedactorSpec::ExactMatch.new, true], | ||
["namespaced class - wildcard - matched", DIRedactorSpec::WildCardSensitiveType.new, true], | ||
["namespaced class - tail component match only", DIRedactorSpec::SensitiveType.new, false], | ||
["double-colon top-level specification", DIRedactorSpecDoubleColon.new, true], | ||
["double-colon nested specification", DIRedactorSpec::DoubleColonNested.new, true], | ||
["double-colon nested wildcard", DIRedactorSpec::DoubleColonWildCardType.new, true], | ||
] | ||
|
||
define_cases(cases) | ||
end | ||
|
||
context "redacted type list is not checked" do | ||
before do | ||
expect(di_settings).not_to receive(:redacted_type_names) | ||
end | ||
|
||
cases = [ | ||
["instance of anonymous class", Class.new.new, false], | ||
] | ||
|
||
define_cases(cases) | ||
end | ||
end | ||
end |