Skip to content

Commit 0a4a5f9

Browse files
committed
Revamp storage metrics
1 parent 4dc3f71 commit 0a4a5f9

File tree

7 files changed

+583
-150
lines changed

7 files changed

+583
-150
lines changed

quickwit/quickwit-storage/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ mod cache;
2929
mod debouncer;
3030
mod file_descriptor_cache;
3131
mod metrics;
32+
mod metrics_wrappers;
3233
mod storage;
3334
mod timeout_and_retry_storage;
3435
pub use debouncer::AsyncDebouncer;

quickwit/quickwit-storage/src/metrics.rs

Lines changed: 37 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
use once_cell::sync::Lazy;
1818
use quickwit_common::metrics::{
19-
GaugeGuard, Histogram, IntCounter, IntCounterVec, IntGauge, new_counter, new_counter_vec,
19+
GaugeGuard, HistogramVec, IntCounter, IntCounterVec, IntGauge, new_counter, new_counter_vec,
2020
new_gauge, new_histogram_vec,
2121
};
2222

@@ -30,19 +30,13 @@ pub struct StorageMetrics {
3030
pub searcher_split_cache: CacheMetrics,
3131
pub get_slice_timeout_successes: [IntCounter; 3],
3232
pub get_slice_timeout_all_timeouts: IntCounter,
33-
pub object_storage_get_total: IntCounter,
34-
pub object_storage_get_errors_total: IntCounterVec<1>,
33+
pub object_storage_requests_total: IntCounterVec<2>,
34+
pub object_storage_request_duration: HistogramVec<2>,
3535
pub object_storage_get_slice_in_flight_count: IntGauge,
3636
pub object_storage_get_slice_in_flight_num_bytes: IntGauge,
37-
pub object_storage_put_total: IntCounter,
38-
pub object_storage_put_parts: IntCounter,
39-
pub object_storage_download_num_bytes: IntCounter,
40-
pub object_storage_upload_num_bytes: IntCounter,
41-
42-
pub object_storage_delete_requests_total: IntCounter,
43-
pub object_storage_bulk_delete_requests_total: IntCounter,
44-
pub object_storage_delete_request_duration: Histogram,
45-
pub object_storage_bulk_delete_request_duration: Histogram,
37+
pub object_storage_download_num_bytes: IntCounterVec<1>,
38+
pub object_storage_download_errors: IntCounterVec<1>,
39+
pub object_storage_upload_num_bytes: IntCounterVec<1>,
4640
}
4741

4842
impl Default for StorageMetrics {
@@ -63,31 +57,6 @@ impl Default for StorageMetrics {
6357
let get_slice_timeout_all_timeouts =
6458
get_slice_timeout_outcome_total_vec.with_label_values(["all_timeouts"]);
6559

66-
let object_storage_requests_total = new_counter_vec(
67-
"object_storage_requests_total",
68-
"Total number of object storage requests performed.",
69-
"storage",
70-
&[],
71-
["action"],
72-
);
73-
let object_storage_delete_requests_total =
74-
object_storage_requests_total.with_label_values(["delete_object"]);
75-
let object_storage_bulk_delete_requests_total =
76-
object_storage_requests_total.with_label_values(["delete_objects"]);
77-
78-
let object_storage_request_duration = new_histogram_vec(
79-
"object_storage_request_duration_seconds",
80-
"Duration of object storage requests in seconds.",
81-
"storage",
82-
&[],
83-
["action"],
84-
vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0],
85-
);
86-
let object_storage_delete_request_duration =
87-
object_storage_request_duration.with_label_values(["delete_object"]);
88-
let object_storage_bulk_delete_request_duration =
89-
object_storage_request_duration.with_label_values(["delete_objects"]);
90-
9160
StorageMetrics {
9261
fast_field_cache: CacheMetrics::for_component("fastfields"),
9362
fd_cache_metrics: CacheMetrics::for_component("fd"),
@@ -97,62 +66,63 @@ impl Default for StorageMetrics {
9766
split_footer_cache: CacheMetrics::for_component("splitfooter"),
9867
get_slice_timeout_successes,
9968
get_slice_timeout_all_timeouts,
100-
object_storage_get_total: new_counter(
101-
"object_storage_gets_total",
102-
"Number of objects fetched. Might be lower than get_slice_timeout_outcome if \
103-
queries are debounced.",
69+
object_storage_requests_total: new_counter_vec(
70+
"object_storage_requests_total",
71+
"Number of requests to the object store, by action and status. Requests are \
72+
recorded when the response headers are returned, download failures will not \
73+
appear as errors.",
10474
"storage",
10575
&[],
76+
["action", "status"],
10677
),
107-
object_storage_get_errors_total: new_counter_vec::<1>(
108-
"object_storage_get_errors_total",
109-
"Number of GetObject errors.",
78+
object_storage_request_duration: new_histogram_vec(
79+
"object_storage_request_duration",
80+
"Durations until the response headers are returned from the object store, by \
81+
action and status. This does not measure the download time for the body content.",
11082
"storage",
11183
&[],
112-
["code"],
84+
["action", "status"],
85+
vec![0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0],
11386
),
11487
object_storage_get_slice_in_flight_count: new_gauge(
11588
"object_storage_get_slice_in_flight_count",
116-
"Number of GetObject for which the memory was allocated but the download is still \
117-
in progress.",
89+
"Number of get_object for which the memory was allocated but the download is \
90+
still in progress.",
11891
"storage",
11992
&[],
12093
),
12194
object_storage_get_slice_in_flight_num_bytes: new_gauge(
12295
"object_storage_get_slice_in_flight_num_bytes",
123-
"Memory allocated for GetObject requests that are still in progress.",
96+
"Memory allocated for get_object requests that are still in progress.",
12497
"storage",
12598
&[],
12699
),
127-
object_storage_put_total: new_counter(
128-
"object_storage_puts_total",
129-
"Number of objects uploaded. May differ from object_storage_requests_parts due to \
130-
multipart upload.",
100+
object_storage_download_num_bytes: new_counter_vec(
101+
"object_storage_download_num_bytes",
102+
"Amount of data downloaded from object storage.",
131103
"storage",
132104
&[],
105+
["status"],
133106
),
134-
object_storage_put_parts: new_counter(
135-
"object_storage_puts_parts",
136-
"Number of object parts uploaded.",
137-
"",
138-
&[],
139-
),
140-
object_storage_download_num_bytes: new_counter(
141-
"object_storage_download_num_bytes",
142-
"Amount of data downloaded from an object storage.",
107+
object_storage_download_errors: new_counter_vec(
108+
"object_storage_download_errors",
109+
// Download errors are recorded separately because the associated
110+
// get_object requests were already recorded as successful in
111+
// object_storage_requests_total
112+
"Number of download requests that received successful response headers but failed \
113+
during download.",
143114
"storage",
144115
&[],
116+
["status"],
145117
),
146-
object_storage_upload_num_bytes: new_counter(
118+
object_storage_upload_num_bytes: new_counter_vec(
147119
"object_storage_upload_num_bytes",
148-
"Amount of data uploaded to an object storage.",
120+
"Amount of data uploaded to object storage. The value recorded for failed and \
121+
aborted uploads is the full payload size.",
149122
"storage",
150123
&[],
124+
["status"],
151125
),
152-
object_storage_delete_requests_total,
153-
object_storage_bulk_delete_requests_total,
154-
object_storage_delete_request_duration,
155-
object_storage_bulk_delete_request_duration,
156126
}
157127
}
158128
}

0 commit comments

Comments
 (0)