Skip to content

Commit 27586aa

Browse files
authored
Try out offficial prometheus_client for metrics (#72)
* Try out official `prometheus_client` crate Signed-off-by: clux <[email protected]> * fix metric setting Signed-off-by: clux <[email protected]> * make metrics work with registry Signed-off-by: clux <[email protected]> * fix prefix Signed-off-by: clux <[email protected]> * make exemplars work Signed-off-by: clux <[email protected]> * openmetrics response header Signed-off-by: clux <[email protected]> * simplify Signed-off-by: clux <[email protected]> --------- Signed-off-by: clux <[email protected]>
1 parent 6635a3a commit 27586aa

File tree

7 files changed

+138
-90
lines changed

7 files changed

+138
-90
lines changed

Cargo.lock

Lines changed: 24 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ schemars = { version = "0.8.12", features = ["chrono"] }
3434
serde = { version = "1.0.185", features = ["derive"] }
3535
serde_json = "1.0.105"
3636
serde_yaml = "0.9.25"
37-
prometheus = "0.13.3"
3837
chrono = { version = "0.4.26", features = ["serde"] }
3938
tracing = "0.1.37"
4039
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
@@ -44,6 +43,7 @@ opentelemetry-otlp = { version = "0.13.0", features = ["tokio"], optional = true
4443
tonic = { version = "0.9", optional = true }
4544
thiserror = "1.0.47"
4645
anyhow = "1.0.75"
46+
prometheus-client = "0.22.2"
4747

4848
[dev-dependencies]
4949
assert-json-diff = "2.0.2"

justfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ generate:
1212

1313
# run with opentelemetry
1414
run-telemetry:
15-
OPENTELEMETRY_ENDPOINT_URL=http://127.0.0.1:55680 RUST_LOG=info,kube=trace,controller=debug cargo run --features=telemetry
15+
OPENTELEMETRY_ENDPOINT_URL=http://127.0.0.1:55680 RUST_LOG=info,kube=debug,controller=debug cargo run --features=telemetry
1616

1717
# run without opentelemetry
1818
run:

src/controller.rs

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,14 @@ pub struct Context {
5353
/// Diagnostics read by the web server
5454
pub diagnostics: Arc<RwLock<Diagnostics>>,
5555
/// Prometheus metrics
56-
pub metrics: Metrics,
56+
pub metrics: Arc<Metrics>,
5757
}
5858

5959
#[instrument(skip(ctx, doc), fields(trace_id))]
6060
async fn reconcile(doc: Arc<Document>, ctx: Arc<Context>) -> Result<Action> {
6161
let trace_id = telemetry::get_trace_id();
6262
Span::current().record("trace_id", &field::display(&trace_id));
63-
let _timer = ctx.metrics.count_and_measure();
63+
let _timer = ctx.metrics.reconcile.count_and_measure(&trace_id);
6464
ctx.diagnostics.write().await.last_event = Utc::now();
6565
let ns = doc.namespace().unwrap(); // doc is namespace scoped
6666
let docs: Api<Document> = Api::namespaced(ctx.client.clone(), &ns);
@@ -78,7 +78,7 @@ async fn reconcile(doc: Arc<Document>, ctx: Arc<Context>) -> Result<Action> {
7878

7979
fn error_policy(doc: Arc<Document>, error: &Error, ctx: Arc<Context>) -> Action {
8080
warn!("reconcile failed: {:?}", error);
81-
ctx.metrics.reconcile_failure(&doc, error);
81+
ctx.metrics.reconcile.set_failure(&doc, error);
8282
Action::requeue(Duration::from_secs(5 * 60))
8383
}
8484

@@ -171,15 +171,18 @@ impl Diagnostics {
171171
pub struct State {
172172
/// Diagnostics populated by the reconciler
173173
diagnostics: Arc<RwLock<Diagnostics>>,
174-
/// Metrics registry
175-
registry: prometheus::Registry,
174+
/// Metrics
175+
metrics: Arc<Metrics>,
176176
}
177177

178178
/// State wrapper around the controller outputs for the web server
179179
impl State {
180180
/// Metrics getter
181-
pub fn metrics(&self) -> Vec<prometheus::proto::MetricFamily> {
182-
self.registry.gather()
181+
pub fn metrics(&self) -> String {
182+
let mut buffer = String::new();
183+
let registry = &*self.metrics.registry;
184+
prometheus_client::encoding::text::encode(&mut buffer, &registry).unwrap();
185+
buffer
183186
}
184187

185188
/// State getter
@@ -191,7 +194,7 @@ impl State {
191194
pub fn to_context(&self, client: Client) -> Arc<Context> {
192195
Arc::new(Context {
193196
client,
194-
metrics: Metrics::default().register(&self.registry).unwrap(),
197+
metrics: self.metrics.clone(),
195198
diagnostics: self.diagnostics.clone(),
196199
})
197200
}
@@ -218,12 +221,15 @@ pub async fn run(state: State) {
218221
#[cfg(test)]
219222
mod test {
220223
use super::{error_policy, reconcile, Context, Document};
221-
use crate::fixtures::{timeout_after_1s, Scenario};
224+
use crate::{
225+
fixtures::{timeout_after_1s, Scenario},
226+
metrics::ErrorLabels,
227+
};
222228
use std::sync::Arc;
223229

224230
#[tokio::test]
225231
async fn documents_without_finalizer_gets_a_finalizer() {
226-
let (testctx, fakeserver, _) = Context::test();
232+
let (testctx, fakeserver) = Context::test();
227233
let doc = Document::test();
228234
let mocksrv = fakeserver.run(Scenario::FinalizerCreation(doc.clone()));
229235
reconcile(Arc::new(doc), testctx).await.expect("reconciler");
@@ -232,7 +238,7 @@ mod test {
232238

233239
#[tokio::test]
234240
async fn finalized_doc_causes_status_patch() {
235-
let (testctx, fakeserver, _) = Context::test();
241+
let (testctx, fakeserver) = Context::test();
236242
let doc = Document::test().finalized();
237243
let mocksrv = fakeserver.run(Scenario::StatusPatch(doc.clone()));
238244
reconcile(Arc::new(doc), testctx).await.expect("reconciler");
@@ -241,7 +247,7 @@ mod test {
241247

242248
#[tokio::test]
243249
async fn finalized_doc_with_hide_causes_event_and_hide_patch() {
244-
let (testctx, fakeserver, _) = Context::test();
250+
let (testctx, fakeserver) = Context::test();
245251
let doc = Document::test().finalized().needs_hide();
246252
let scenario = Scenario::EventPublishThenStatusPatch("HideRequested".into(), doc.clone());
247253
let mocksrv = fakeserver.run(scenario);
@@ -251,7 +257,7 @@ mod test {
251257

252258
#[tokio::test]
253259
async fn finalized_doc_with_delete_timestamp_causes_delete() {
254-
let (testctx, fakeserver, _) = Context::test();
260+
let (testctx, fakeserver) = Context::test();
255261
let doc = Document::test().finalized().needs_delete();
256262
let mocksrv = fakeserver.run(Scenario::Cleanup("DeleteRequested".into(), doc.clone()));
257263
reconcile(Arc::new(doc), testctx).await.expect("reconciler");
@@ -260,7 +266,7 @@ mod test {
260266

261267
#[tokio::test]
262268
async fn illegal_doc_reconcile_errors_which_bumps_failure_metric() {
263-
let (testctx, fakeserver, _registry) = Context::test();
269+
let (testctx, fakeserver) = Context::test();
264270
let doc = Arc::new(Document::illegal().finalized());
265271
let mocksrv = fakeserver.run(Scenario::RadioSilence);
266272
let res = reconcile(doc.clone(), testctx.clone()).await;
@@ -270,12 +276,12 @@ mod test {
270276
assert!(err.to_string().contains("IllegalDocument"));
271277
// calling error policy with the reconciler error should cause the correct metric to be set
272278
error_policy(doc.clone(), &err, testctx.clone());
273-
//dbg!("actual metrics: {}", registry.gather());
274-
let failures = testctx
275-
.metrics
276-
.failures
277-
.with_label_values(&["illegal", "finalizererror(applyfailed(illegaldocument))"])
278-
.get();
279+
let err_labels = ErrorLabels {
280+
instance: "illegal".into(),
281+
error: "finalizererror(applyfailed(illegaldocument))".into(),
282+
};
283+
let metrics = &testctx.metrics.reconcile;
284+
let failures = metrics.failures.get_or_create(&err_labels).get();
279285
assert_eq!(failures, 1);
280286
}
281287

src/fixtures.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
//! Helper methods only available for tests
2-
use crate::{Context, Document, DocumentSpec, DocumentStatus, Metrics, Result, DOCUMENT_FINALIZER};
2+
use crate::{Context, Document, DocumentSpec, DocumentStatus, Result, DOCUMENT_FINALIZER};
33
use assert_json_diff::assert_json_include;
44
use http::{Request, Response};
55
use kube::{client::Body, Client, Resource, ResourceExt};
6-
use prometheus::Registry;
76
use std::sync::Arc;
87

98
impl Document {
@@ -208,15 +207,14 @@ impl ApiServerVerifier {
208207

209208
impl Context {
210209
// Create a test context with a mocked kube client, locally registered metrics and default diagnostics
211-
pub fn test() -> (Arc<Self>, ApiServerVerifier, Registry) {
210+
pub fn test() -> (Arc<Self>, ApiServerVerifier) {
212211
let (mock_service, handle) = tower_test::mock::pair::<Request<Body>, Response<Body>>();
213212
let mock_client = Client::new(mock_service, "default");
214-
let registry = Registry::default();
215213
let ctx = Self {
216214
client: mock_client,
217-
metrics: Metrics::default().register(&registry).unwrap(),
215+
metrics: Arc::default(),
218216
diagnostics: Arc::default(),
219217
};
220-
(Arc::new(ctx), ApiServerVerifier(handle), registry)
218+
(Arc::new(ctx), ApiServerVerifier(handle))
221219
}
222220
}

src/main.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
#![allow(unused_imports, unused_variables)]
22
use actix_web::{get, middleware, web::Data, App, HttpRequest, HttpResponse, HttpServer, Responder};
33
pub use controller::{self, telemetry, State};
4-
use prometheus::{Encoder, TextEncoder};
54

65
#[get("/metrics")]
76
async fn metrics(c: Data<State>, _req: HttpRequest) -> impl Responder {
87
let metrics = c.metrics();
9-
let encoder = TextEncoder::new();
10-
let mut buffer = vec![];
11-
encoder.encode(&metrics, &mut buffer).unwrap();
12-
HttpResponse::Ok().body(buffer)
8+
HttpResponse::Ok()
9+
.content_type("application/openmetrics-text; version=1.0.0; charset=utf-8")
10+
.body(metrics)
1311
}
1412

1513
#[get("/health")]

0 commit comments

Comments
 (0)