@@ -53,14 +53,14 @@ pub struct Context {
53
53
/// Diagnostics read by the web server
54
54
pub diagnostics : Arc < RwLock < Diagnostics > > ,
55
55
/// Prometheus metrics
56
- pub metrics : Metrics ,
56
+ pub metrics : Arc < Metrics > ,
57
57
}
58
58
59
59
#[ instrument( skip( ctx, doc) , fields( trace_id) ) ]
60
60
async fn reconcile ( doc : Arc < Document > , ctx : Arc < Context > ) -> Result < Action > {
61
61
let trace_id = telemetry:: get_trace_id ( ) ;
62
62
Span :: current ( ) . record ( "trace_id" , & field:: display ( & trace_id) ) ;
63
- let _timer = ctx. metrics . count_and_measure ( ) ;
63
+ let _timer = ctx. metrics . reconcile . count_and_measure ( & trace_id ) ;
64
64
ctx. diagnostics . write ( ) . await . last_event = Utc :: now ( ) ;
65
65
let ns = doc. namespace ( ) . unwrap ( ) ; // doc is namespace scoped
66
66
let docs: Api < Document > = Api :: namespaced ( ctx. client . clone ( ) , & ns) ;
@@ -78,7 +78,7 @@ async fn reconcile(doc: Arc<Document>, ctx: Arc<Context>) -> Result<Action> {
78
78
79
79
fn error_policy ( doc : Arc < Document > , error : & Error , ctx : Arc < Context > ) -> Action {
80
80
warn ! ( "reconcile failed: {:?}" , error) ;
81
- ctx. metrics . reconcile_failure ( & doc, error) ;
81
+ ctx. metrics . reconcile . set_failure ( & doc, error) ;
82
82
Action :: requeue ( Duration :: from_secs ( 5 * 60 ) )
83
83
}
84
84
@@ -171,15 +171,18 @@ impl Diagnostics {
171
171
pub struct State {
172
172
/// Diagnostics populated by the reconciler
173
173
diagnostics : Arc < RwLock < Diagnostics > > ,
174
- /// Metrics registry
175
- registry : prometheus :: Registry ,
174
+ /// Metrics
175
+ metrics : Arc < Metrics > ,
176
176
}
177
177
178
178
/// State wrapper around the controller outputs for the web server
179
179
impl State {
180
180
/// Metrics getter
181
- pub fn metrics ( & self ) -> Vec < prometheus:: proto:: MetricFamily > {
182
- self . registry . gather ( )
181
+ pub fn metrics ( & self ) -> String {
182
+ let mut buffer = String :: new ( ) ;
183
+ let registry = & * self . metrics . registry ;
184
+ prometheus_client:: encoding:: text:: encode ( & mut buffer, & registry) . unwrap ( ) ;
185
+ buffer
183
186
}
184
187
185
188
/// State getter
@@ -191,7 +194,7 @@ impl State {
191
194
pub fn to_context ( & self , client : Client ) -> Arc < Context > {
192
195
Arc :: new ( Context {
193
196
client,
194
- metrics : Metrics :: default ( ) . register ( & self . registry ) . unwrap ( ) ,
197
+ metrics : self . metrics . clone ( ) ,
195
198
diagnostics : self . diagnostics . clone ( ) ,
196
199
} )
197
200
}
@@ -218,12 +221,15 @@ pub async fn run(state: State) {
218
221
#[ cfg( test) ]
219
222
mod test {
220
223
use super :: { error_policy, reconcile, Context , Document } ;
221
- use crate :: fixtures:: { timeout_after_1s, Scenario } ;
224
+ use crate :: {
225
+ fixtures:: { timeout_after_1s, Scenario } ,
226
+ metrics:: ErrorLabels ,
227
+ } ;
222
228
use std:: sync:: Arc ;
223
229
224
230
#[ tokio:: test]
225
231
async fn documents_without_finalizer_gets_a_finalizer ( ) {
226
- let ( testctx, fakeserver, _ ) = Context :: test ( ) ;
232
+ let ( testctx, fakeserver) = Context :: test ( ) ;
227
233
let doc = Document :: test ( ) ;
228
234
let mocksrv = fakeserver. run ( Scenario :: FinalizerCreation ( doc. clone ( ) ) ) ;
229
235
reconcile ( Arc :: new ( doc) , testctx) . await . expect ( "reconciler" ) ;
@@ -232,7 +238,7 @@ mod test {
232
238
233
239
#[ tokio:: test]
234
240
async fn finalized_doc_causes_status_patch ( ) {
235
- let ( testctx, fakeserver, _ ) = Context :: test ( ) ;
241
+ let ( testctx, fakeserver) = Context :: test ( ) ;
236
242
let doc = Document :: test ( ) . finalized ( ) ;
237
243
let mocksrv = fakeserver. run ( Scenario :: StatusPatch ( doc. clone ( ) ) ) ;
238
244
reconcile ( Arc :: new ( doc) , testctx) . await . expect ( "reconciler" ) ;
@@ -241,7 +247,7 @@ mod test {
241
247
242
248
#[ tokio:: test]
243
249
async fn finalized_doc_with_hide_causes_event_and_hide_patch ( ) {
244
- let ( testctx, fakeserver, _ ) = Context :: test ( ) ;
250
+ let ( testctx, fakeserver) = Context :: test ( ) ;
245
251
let doc = Document :: test ( ) . finalized ( ) . needs_hide ( ) ;
246
252
let scenario = Scenario :: EventPublishThenStatusPatch ( "HideRequested" . into ( ) , doc. clone ( ) ) ;
247
253
let mocksrv = fakeserver. run ( scenario) ;
@@ -251,7 +257,7 @@ mod test {
251
257
252
258
#[ tokio:: test]
253
259
async fn finalized_doc_with_delete_timestamp_causes_delete ( ) {
254
- let ( testctx, fakeserver, _ ) = Context :: test ( ) ;
260
+ let ( testctx, fakeserver) = Context :: test ( ) ;
255
261
let doc = Document :: test ( ) . finalized ( ) . needs_delete ( ) ;
256
262
let mocksrv = fakeserver. run ( Scenario :: Cleanup ( "DeleteRequested" . into ( ) , doc. clone ( ) ) ) ;
257
263
reconcile ( Arc :: new ( doc) , testctx) . await . expect ( "reconciler" ) ;
@@ -260,7 +266,7 @@ mod test {
260
266
261
267
#[ tokio:: test]
262
268
async fn illegal_doc_reconcile_errors_which_bumps_failure_metric ( ) {
263
- let ( testctx, fakeserver, _registry ) = Context :: test ( ) ;
269
+ let ( testctx, fakeserver) = Context :: test ( ) ;
264
270
let doc = Arc :: new ( Document :: illegal ( ) . finalized ( ) ) ;
265
271
let mocksrv = fakeserver. run ( Scenario :: RadioSilence ) ;
266
272
let res = reconcile ( doc. clone ( ) , testctx. clone ( ) ) . await ;
@@ -270,12 +276,12 @@ mod test {
270
276
assert ! ( err. to_string( ) . contains( "IllegalDocument" ) ) ;
271
277
// calling error policy with the reconciler error should cause the correct metric to be set
272
278
error_policy ( doc. clone ( ) , & err, testctx. clone ( ) ) ;
273
- //dbg!("actual metrics: {}", registry.gather());
274
- let failures = testctx
275
- . metrics
276
- . failures
277
- . with_label_values ( & [ "illegal" , "finalizererror(applyfailed(illegaldocument))" ] )
278
- . get ( ) ;
279
+ let err_labels = ErrorLabels {
280
+ instance : "illegal" . into ( ) ,
281
+ error : "finalizererror(applyfailed(illegaldocument))" . into ( ) ,
282
+ } ;
283
+ let metrics = & testctx . metrics . reconcile ;
284
+ let failures = metrics . failures . get_or_create ( & err_labels ) . get ( ) ;
279
285
assert_eq ! ( failures, 1 ) ;
280
286
}
281
287
0 commit comments