forked from envoyproxy/envoy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwell_known_names.cc
167 lines (127 loc) · 7.01 KB
/
well_known_names.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include "source/common/config/well_known_names.h"
#include "absl/strings/str_replace.h"
namespace Envoy {
namespace Config {
namespace {
const absl::string_view TAG_VALUE_REGEX = R"([^\.]+)";
// To allow for more readable regular expressions to be declared below, and to
// reduce duplication, define a few common pattern substitutions for regex
// segments.
std::string expandRegex(const std::string& regex) {
return absl::StrReplaceAll(
regex, {// Regex to look for either IPv4 or IPv6 addresses plus port number after underscore.
{"<ADDRESS>", R"((?:(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[[a-fA-F_\d]+\])_\d+))"},
// Cipher names can contain alphanumerics with dashes and
// underscores.
{"<CIPHER>", R"([\w-]+)"},
// A generic name can contain any character except dots.
{"<TAG_VALUE>", TAG_VALUE_REGEX},
// Route names may contain dots in addition to alphanumerics and
// dashes with underscores.
{"<ROUTE_CONFIG_NAME>", R"([\w-\.]+)"}});
}
const Regex::CompiledGoogleReMatcher& validTagValueRegex() {
CONSTRUCT_ON_FIRST_USE(Regex::CompiledGoogleReMatcher, absl::StrCat("^", TAG_VALUE_REGEX, "$"),
false);
}
} // namespace
bool doesTagNameValueMatchInvalidCharRegex(absl::string_view name) {
return validTagValueRegex().match(name);
}
TagNameValues::TagNameValues() {
// Note: the default regexes are defined below in the order that they will typically be matched
// (see the TagExtractor class definition for an explanation of the iterative matching process).
// This ordering is roughly from most specific to least specific. Despite the fact that these
// regexes are defined with a particular ordering in mind, users can customize the ordering of the
// processing of the default tag extraction regexes and include custom tags with regexes via the
// bootstrap configuration. Because of this flexibility, these regexes are designed to not
// interfere with one another no matter the ordering. They are tested in forward and reverse
// ordering to ensure they will be safe in most ordering configurations.
// To give a more user-friendly explanation of the intended behavior of each regex, each is
// preceded by a comment with a simplified notation to explain what the regex is designed to
// match:
// - The text that the regex is intended to capture will be enclosed in ().
// - Other default tags that are expected to exist in the name (and may or may not have been
// removed before this regex has been applied) are enclosed in [].
// - Stand-ins for a variable segment of the name (including inside capture groups) will be
// enclosed in <>.
// - Typical * notation will be used to denote an arbitrary set of characters.
// *_rq(_<response_code>)
addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_");
// *_rq_(<response_code_class>)xx
addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_");
// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
addRe2(
DYNAMO_PARTITION_ID,
R"(^http\.<TAG_VALUE>\.dynamodb\.table\.<TAG_VALUE>\.capacity\.<TAG_VALUE>(\.__partition_id=(\w{7}))$)",
".dynamodb.table.");
// http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)* or
// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
addRe2(
DYNAMO_OPERATION,
R"(^http\.<TAG_VALUE>\.dynamodb.(?:operation|table\.<TAG_VALUE>\.capacity)(\.(<TAG_VALUE>))(?:\.|$))",
".dynamodb.");
// mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.*
addTokenized(MONGO_CALLSITE, "mongo.*.collection.*.callsite.$.query.**");
// http.[<stat_prefix>.]dynamodb.table.(<table_name>.)* or
// http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
addRe2(DYNAMO_TABLE, R"(^http\.<TAG_VALUE>\.dynamodb.(?:table|error)\.((<TAG_VALUE>)\.))",
".dynamodb.");
// mongo.[<stat_prefix>.]collection.(<collection>.)query.*
addTokenized(MONGO_COLLECTION, "mongo.*.collection.$.**.query.*");
// mongo.[<stat_prefix>.]cmd.(<cmd>.)*
addTokenized(MONGO_CMD, "mongo.*.cmd.$.**");
// cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)*
addTokenized(GRPC_BRIDGE_METHOD, "cluster.*.grpc.*.$.**");
// http.[<stat_prefix>.]user_agent.(<user_agent>.)*
addTokenized(HTTP_USER_AGENT, "http.*.user_agent.$.**");
// vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)*
addTokenized(VIRTUAL_CLUSTER, "vhost.*.vcluster.$.**");
// http.[<stat_prefix>.]fault.(<downstream_cluster>.)*
addTokenized(FAULT_DOWNSTREAM_CLUSTER, "http.*.fault.$.**");
// listener.[<address>.]ssl.cipher.(<cipher>)
addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.cipher(\.(<CIPHER>))$)");
// cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
addRe2(SSL_CIPHER_SUITE, R"(^cluster\.<TAG_VALUE>\.ssl\.ciphers(\.(<CIPHER>))$)",
".ssl.ciphers.");
// cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
addTokenized(GRPC_BRIDGE_SERVICE, "cluster.*.grpc.$.**");
// tcp.(<stat_prefix>.)*
addTokenized(TCP_PREFIX, "tcp.$.**");
// udp.(<stat_prefix>.)*
addTokenized(UDP_PREFIX, "udp.$.**");
// auth.clientssl.(<stat_prefix>.)*
addTokenized(CLIENTSSL_PREFIX, "auth.clientssl.$.**");
// ratelimit.(<stat_prefix>.)*
addTokenized(RATELIMIT_PREFIX, "ratelimit.$.**");
// cluster.(<cluster_name>.)*
addTokenized(CLUSTER_NAME, "cluster.$.**");
// listener.[<address>.]http.(<stat_prefix>.)*
// The <address> part can be anything here (.*?) for the sake of a simpler
// internal state of the regex which performs better.
addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.((<TAG_VALUE>)\.))", ".http.");
// http.(<stat_prefix>.)*
addTokenized(HTTP_CONN_MANAGER_PREFIX, "http.$.**");
// listener.(<address|stat_prefix>.)*, but specifically excluding "admin"
addRe2(LISTENER_ADDRESS, R"(^listener\.((<ADDRESS>|<TAG_VALUE>)\.))", "", "admin");
// vhost.(<virtual host name>.)*
addTokenized(VIRTUAL_HOST, "vhost.$.**");
// mongo.(<stat_prefix>.)*
addTokenized(MONGO_PREFIX, "mongo.$.**");
// http.[<stat_prefix>.]rds.(<route_config_name>.)<base_stat>
// Note: <route_config_name> can contain dots thus we have to maintain full
// match.
addRe2(RDS_ROUTE_CONFIG, R"(^http\.<TAG_VALUE>\.rds\.((<ROUTE_CONFIG_NAME>)\.)\w+?$)", ".rds.");
// listener_manager.(worker_<id>.)*
addRe2(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_");
}
void TagNameValues::addRe2(const std::string& name, const std::string& regex,
const std::string& substr, const std::string& negative_matching_value) {
descriptor_vec_.emplace_back(
Descriptor{name, expandRegex(regex), substr, negative_matching_value, Regex::Type::Re2});
}
void TagNameValues::addTokenized(const std::string& name, const std::string& tokens) {
tokenized_descriptor_vec_.emplace_back(TokenizedDescriptor{name, tokens});
}
} // namespace Config
} // namespace Envoy