Skip to content

Commit 7fcadfc

Browse files
authored
stats: optional per-endpoint metrics (#29709)
Signed-off-by: Greg Greenway <[email protected]>
1 parent 1203508 commit 7fcadfc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+1558
-260
lines changed

Diff for: api/envoy/config/cluster/v3/cluster.proto

+15
Original file line numberDiff line numberDiff line change
@@ -1257,4 +1257,19 @@ message TrackClusterStats {
12571257
// <config_cluster_manager_cluster_stats_request_response_sizes>` tracking header and body sizes
12581258
// of requests and responses will be published.
12591259
bool request_response_sizes = 2;
1260+
1261+
// If true, some stats will be emitted per-endpoint, similar to the stats in admin ``/clusters``
1262+
// output.
1263+
//
1264+
// This does not currently output correct stats during a hot-restart.
1265+
//
1266+
// This is not currently implemented by all stat sinks.
1267+
//
1268+
// These stats do not honor filtering or tag extraction rules in :ref:`StatsConfig
1269+
// <envoy_v3_api_msg_config.metrics.v3.StatsConfig>` (but fixed-value tags are supported). Admin
1270+
// endpoint filtering is supported.
1271+
//
1272+
// This may not be used at the same time as
1273+
// :ref:`load_stats_config <envoy_v3_api_field_config.bootstrap.v3.ClusterManager.load_stats_config>`.
1274+
bool per_endpoint_stats = 3;
12601275
}

Diff for: changelogs/current.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ removed_config_or_runtime:
5656
runtime flag and legacy code path.
5757
5858
new_features:
59+
- area: stats
60+
change: |
61+
added :ref:`per_endpoint_stats <envoy_v3_api_field_config.cluster.v3.TrackClusterStats.per_endpoint_stats>` to get some metrics
62+
for each endpoint in a cluster.
5963
- area: jwt
6064
change: |
6165
The jwt filter can now serialize non-primitive custom claims when maping claims to headers.

Diff for: envoy/server/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ envoy_cc_library(
4343
external_deps = ["abseil_optional"],
4444
deps = [
4545
"//envoy/http:context_interface",
46+
"//envoy/stats:sink_interface",
4647
"//envoy/upstream:cluster_manager_interface",
4748
"@envoy_api//envoy/config/bootstrap/v3:pkg_cc_proto",
4849
],

Diff for: envoy/stats/BUILD

+16-2
Original file line numberDiff line numberDiff line change
@@ -14,36 +14,50 @@ envoy_cc_library(
1414
deps = ["//source/common/common:assert_lib"],
1515
)
1616

17+
envoy_cc_library(
18+
name = "tag_interface",
19+
hdrs = ["tag.h"],
20+
)
21+
1722
# TODO(jmarantz): atomize the build rules to match the include files.
1823
envoy_cc_library(
1924
name = "stats_interface",
2025
hdrs = [
2126
"allocator.h",
2227
"histogram.h",
2328
"scope.h",
24-
"sink.h",
2529
"stats.h",
2630
"stats_matcher.h",
2731
"store.h",
28-
"tag.h",
2932
"tag_extractor.h",
3033
"tag_producer.h",
3134
],
3235
external_deps = ["abseil_inlined_vector"],
3336
deps = [
3437
":refcount_ptr_interface",
38+
":tag_interface",
3539
"//envoy/common:interval_set_interface",
3640
"//envoy/common:optref_lib",
3741
"//envoy/common:time_interface",
3842
],
3943
)
4044

45+
envoy_cc_library(
46+
name = "sink_interface",
47+
hdrs = ["sink.h"],
48+
deps = [
49+
":primitive_stats_interface",
50+
":stats_interface",
51+
],
52+
)
53+
4154
envoy_cc_library(
4255
name = "primitive_stats_interface",
4356
hdrs = [
4457
"primitive_stats.h",
4558
],
4659
deps = [
60+
":tag_interface",
4761
"//source/common/common:assert_lib",
4862
"//source/common/common:non_copyable",
4963
],

Diff for: envoy/stats/primitive_stats.h

+49-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#include <string>
44

5+
#include "envoy/stats/tag.h"
6+
57
#include "source/common/common/assert.h"
68
#include "source/common/common/non_copyable.h"
79

@@ -34,7 +36,7 @@ class PrimitiveCounter : NonCopyable {
3436
std::atomic<uint64_t> pending_increment_{0};
3537
};
3638

37-
using PrimitiveCounterReference = std::reference_wrapper<const PrimitiveCounter>;
39+
using PrimitiveCounterReference = std::reference_wrapper<PrimitiveCounter>;
3840

3941
/**
4042
* Primitive, low-memory-overhead gauge with increment and decrement capabilities.
@@ -58,7 +60,52 @@ class PrimitiveGauge : NonCopyable {
5860
std::atomic<uint64_t> value_{0};
5961
};
6062

61-
using PrimitiveGaugeReference = std::reference_wrapper<const PrimitiveGauge>;
63+
using PrimitiveGaugeReference = std::reference_wrapper<PrimitiveGauge>;
64+
65+
class PrimitiveMetricMetadata {
66+
public:
67+
// Mirror some of the API for Stats::Metric for use in templates that
68+
// accept either Counter/Gauge or PrimitiveCounterSnapshot/PrimitiveGaugeSnapshot.
69+
const std::string& tagExtractedName() const { return tag_extracted_name_; }
70+
const std::string& name() const { return name_; }
71+
const Stats::TagVector& tags() const { return tags_; }
72+
bool used() const { return true; }
73+
bool hidden() const { return false; }
74+
75+
void setName(std::string&& name) { name_ = std::move(name); }
76+
void setTagExtractedName(std::string&& tag_extracted_name) {
77+
tag_extracted_name_ = std::move(tag_extracted_name);
78+
}
79+
void setTags(const Stats::TagVector& tags) { tags_ = tags; }
80+
81+
private:
82+
std::string name_;
83+
std::string tag_extracted_name_;
84+
Stats::TagVector tags_;
85+
};
86+
87+
class PrimitiveCounterSnapshot : public PrimitiveMetricMetadata {
88+
public:
89+
PrimitiveCounterSnapshot(PrimitiveCounter& counter)
90+
: value_(counter.value()), delta_(counter.latch()) {}
91+
92+
uint64_t value() const { return value_; }
93+
uint64_t delta() const { return delta_; }
94+
95+
private:
96+
const uint64_t value_;
97+
const uint64_t delta_;
98+
};
99+
100+
class PrimitiveGaugeSnapshot : public PrimitiveMetricMetadata {
101+
public:
102+
PrimitiveGaugeSnapshot(PrimitiveGauge& gauge) : value_(gauge.value()) {}
103+
104+
uint64_t value() const { return value_; }
105+
106+
private:
107+
const uint64_t value_;
108+
};
62109

63110
} // namespace Stats
64111
} // namespace Envoy

Diff for: envoy/stats/sink.h

+11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "envoy/common/pure.h"
77
#include "envoy/common/time.h"
88
#include "envoy/stats/histogram.h"
9+
#include "envoy/stats/primitive_stats.h"
910
#include "envoy/stats/stats.h"
1011

1112
namespace Envoy {
@@ -42,6 +43,16 @@ class MetricSnapshot {
4243
*/
4344
virtual const std::vector<std::reference_wrapper<const TextReadout>>& textReadouts() PURE;
4445

46+
/**
47+
* @return a snapshot of all host/endpoint-specific primitive counters.
48+
*/
49+
virtual const std::vector<Stats::PrimitiveCounterSnapshot>& hostCounters() PURE;
50+
51+
/**
52+
* @return a snapshot of all host/endpoint-specific primitive gauges.
53+
*/
54+
virtual const std::vector<Stats::PrimitiveGaugeSnapshot>& hostGauges() PURE;
55+
4556
/**
4657
* @return the time in UTC since epoch when the snapshot was created.
4758
*/

Diff for: envoy/stats/stats.h

+4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class SymbolTable;
2626

2727
/**
2828
* General interface for all stats objects.
29+
*
30+
* Note: some methods must match those in `PrimitiveMetricMetadata` because stats sinks
31+
* use templates to handle either type. The interface is not used for size/performance
32+
* reasons.
2933
*/
3034
class Metric : public RefcountInterface {
3135
public:

Diff for: envoy/stats/store.h

+5
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@ class Store {
193193
*/
194194
virtual void extractAndAppendTags(absl::string_view name, StatNamePool& pool,
195195
StatNameTagVector& stat_tags) PURE;
196+
197+
/**
198+
* Returns the configured fixed tags (which don't depend on the name of the stat).
199+
*/
200+
virtual const TagVector& fixedTags() PURE;
196201
};
197202

198203
using StorePtr = std::unique_ptr<Store>;

Diff for: envoy/stats/tag_producer.h

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class TagProducer {
3030
* @param tags TagVector a set of Stats::Tag.
3131
*/
3232
virtual std::string produceTags(absl::string_view metric_name, TagVector& tags) const PURE;
33+
34+
virtual const TagVector& fixedTags() const PURE;
3335
};
3436

3537
using TagProducerPtr = std::unique_ptr<const TagProducer>;

Diff for: envoy/upstream/host_description.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ struct HostStats {
4646
ALL_HOST_STATS(GENERATE_PRIMITIVE_COUNTER_STRUCT, GENERATE_PRIMITIVE_GAUGE_STRUCT);
4747

4848
// Provide access to name,counter pairs.
49-
std::vector<std::pair<absl::string_view, Stats::PrimitiveCounterReference>> counters() const {
49+
std::vector<std::pair<absl::string_view, Stats::PrimitiveCounterReference>> counters() {
5050
return {ALL_HOST_STATS(PRIMITIVE_COUNTER_NAME_AND_REFERENCE, IGNORE_PRIMITIVE_GAUGE)};
5151
}
5252

5353
// Provide access to name,gauge pairs.
54-
std::vector<std::pair<absl::string_view, Stats::PrimitiveGaugeReference>> gauges() const {
54+
std::vector<std::pair<absl::string_view, Stats::PrimitiveGaugeReference>> gauges() {
5555
return {ALL_HOST_STATS(IGNORE_PRIMITIVE_COUNTER, PRIMITIVE_GAUGE_NAME_AND_REFERENCE)};
5656
}
5757
};

Diff for: envoy/upstream/upstream.h

+5
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,11 @@ class ClusterInfo : public Http::FilterChainFactory {
11721172
*/
11731173
virtual ClusterTimeoutBudgetStatsOptRef timeoutBudgetStats() const PURE;
11741174

1175+
/**
1176+
* @return true if this cluster should produce per-endpoint stats.
1177+
*/
1178+
virtual bool perEndpointStatsEnabled() const PURE;
1179+
11751180
/**
11761181
* @return std::shared_ptr<UpstreamLocalAddressSelector> as upstream local address selector.
11771182
*/

Diff for: source/common/stats/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ envoy_cc_library(
1515
deps = [
1616
":metric_impl_lib",
1717
":stat_merger_lib",
18+
"//envoy/stats:sink_interface",
1819
"//source/common/common:assert_lib",
1920
"//source/common/common:hash_lib",
2021
"//source/common/common:thread_annotations",

Diff for: source/common/stats/isolated_store_impl.h

+1
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ class IsolatedStoreImpl : public Store {
236236

237237
void extractAndAppendTags(StatName, StatNamePool&, StatNameTagVector&) override {}
238238
void extractAndAppendTags(absl::string_view, StatNamePool&, StatNameTagVector&) override {}
239+
const TagVector& fixedTags() override { CONSTRUCT_ON_FIRST_USE(TagVector); }
239240

240241
protected:
241242
/**

Diff for: source/common/stats/tag_producer_impl.cc

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ TagProducerImpl::TagProducerImpl(const envoy::config::metrics::v3::StatsConfig&
2121

2222
for (const auto& cli_tag : cli_tags) {
2323
addExtractor(std::make_unique<TagExtractorFixedImpl>(cli_tag.name_, cli_tag.value_));
24+
fixed_tags_.push_back(cli_tag);
2425
}
2526

2627
for (const auto& tag_specifier : config.stats_tags()) {
@@ -43,6 +44,7 @@ TagProducerImpl::TagProducerImpl(const envoy::config::metrics::v3::StatsConfig&
4344
} else if (tag_specifier.tag_value_case() ==
4445
envoy::config::metrics::v3::TagSpecifier::TagValueCase::kFixedValue) {
4546
addExtractor(std::make_unique<TagExtractorFixedImpl>(name, tag_specifier.fixed_value()));
47+
fixed_tags_.push_back(Tag{name, tag_specifier.fixed_value()});
4648
}
4749
}
4850
}

Diff for: source/common/stats/tag_producer_impl.h

+4
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ class TagProducerImpl : public TagProducer {
4343
*/
4444
std::string produceTags(absl::string_view metric_name, TagVector& tags) const override;
4545

46+
const TagVector& fixedTags() const override { return fixed_tags_; }
47+
4648
private:
4749
friend class DefaultTagRegexTester;
4850

@@ -104,6 +106,8 @@ class TagProducerImpl : public TagProducer {
104106
// we need do elide duplicate extractors during extraction. It is not valid to
105107
// send duplicate tag names to Prometheus so this needs to be filtered out.
106108
absl::flat_hash_map<absl::string_view, std::reference_wrapper<TagExtractor>> extractor_map_;
109+
110+
TagVector fixed_tags_;
107111
};
108112

109113
} // namespace Stats

Diff for: source/common/stats/thread_local_store.h

+1
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ class ThreadLocalStoreImpl : Logger::Loggable<Logger::Id::stats>, public StoreRo
224224
void extractAndAppendTags(StatName name, StatNamePool& pool, StatNameTagVector& tags) override;
225225
void extractAndAppendTags(absl::string_view name, StatNamePool& pool,
226226
StatNameTagVector& tags) override;
227+
const TagVector& fixedTags() override { return tag_producer_->fixedTags(); };
227228

228229
private:
229230
friend class ThreadLocalStoreTestingPeer;

Diff for: source/common/upstream/BUILD

+2
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,10 @@ envoy_cc_library(
250250
srcs = ["host_utility.cc"],
251251
hdrs = ["host_utility.h"],
252252
deps = [
253+
"//envoy/stats:primitive_stats_interface",
253254
"//envoy/upstream:load_balancer_interface",
254255
"//envoy/upstream:upstream_interface",
256+
"//source/common/config:well_known_names",
255257
"//source/common/runtime:runtime_lib",
256258
],
257259
)

Diff for: source/common/upstream/host_utility.cc

+67
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <string>
44

5+
#include "source/common/config/well_known_names.h"
56
#include "source/common/runtime/runtime_features.h"
67

78
namespace Envoy {
@@ -153,5 +154,71 @@ HostConstSharedPtr HostUtility::selectOverrideHost(const HostMap* host_map, Host
153154
return nullptr;
154155
}
155156

157+
void HostUtility::forEachHostMetric(
158+
const ClusterManager& cluster_manager,
159+
const std::function<void(Stats::PrimitiveCounterSnapshot&& metric)>& counter_cb,
160+
const std::function<void(Stats::PrimitiveGaugeSnapshot&& metric)>& gauge_cb) {
161+
for (const auto& [unused_name, cluster_ref] : cluster_manager.clusters().active_clusters_) {
162+
Upstream::ClusterInfoConstSharedPtr cluster_info = cluster_ref.get().info();
163+
if (cluster_info->perEndpointStatsEnabled()) {
164+
const std::string cluster_name =
165+
Stats::Utility::sanitizeStatsName(cluster_info->observabilityName());
166+
167+
const Stats::TagVector& fixed_tags = cluster_info->statsScope().store().fixedTags();
168+
169+
for (auto& host_set : cluster_ref.get().prioritySet().hostSetsPerPriority()) {
170+
for (auto& host : host_set->hosts()) {
171+
172+
Stats::TagVector tags;
173+
tags.reserve(fixed_tags.size() + 3);
174+
tags.insert(tags.end(), fixed_tags.begin(), fixed_tags.end());
175+
tags.emplace_back(Stats::Tag{Envoy::Config::TagNames::get().CLUSTER_NAME, cluster_name});
176+
tags.emplace_back(Stats::Tag{"envoy.endpoint_address", host->address()->asString()});
177+
178+
const auto& hostname = host->hostname();
179+
if (!hostname.empty()) {
180+
tags.push_back({"envoy.endpoint_hostname", hostname});
181+
}
182+
183+
auto set_metric_metadata = [&](absl::string_view metric_name,
184+
Stats::PrimitiveMetricMetadata& metric) {
185+
metric.setName(
186+
absl::StrCat("cluster.", cluster_name, ".endpoint.",
187+
Stats::Utility::sanitizeStatsName(host->address()->asStringView()),
188+
".", metric_name));
189+
metric.setTagExtractedName(absl::StrCat("cluster.endpoint.", metric_name));
190+
metric.setTags(tags);
191+
192+
// Validate that all components were sanitized.
193+
ASSERT(metric.name() == Stats::Utility::sanitizeStatsName(metric.name()));
194+
ASSERT(metric.tagExtractedName() ==
195+
Stats::Utility::sanitizeStatsName(metric.tagExtractedName()));
196+
};
197+
198+
for (auto& [metric_name, primitive] : host->counters()) {
199+
Stats::PrimitiveCounterSnapshot metric(primitive.get());
200+
set_metric_metadata(metric_name, metric);
201+
202+
counter_cb(std::move(metric));
203+
}
204+
205+
auto gauges = host->gauges();
206+
207+
// Add synthetic "healthy" gauge.
208+
Stats::PrimitiveGauge healthy_gauge;
209+
healthy_gauge.set((host->coarseHealth() == Host::Health::Healthy) ? 1 : 0);
210+
gauges.emplace_back(absl::string_view("healthy"), healthy_gauge);
211+
212+
for (auto& [metric_name, primitive] : gauges) {
213+
Stats::PrimitiveGaugeSnapshot metric(primitive.get());
214+
set_metric_metadata(metric_name, metric);
215+
gauge_cb(std::move(metric));
216+
}
217+
}
218+
}
219+
}
220+
}
221+
}
222+
156223
} // namespace Upstream
157224
} // namespace Envoy

0 commit comments

Comments
 (0)