11local grafana = import 'grafonnet/grafana.libsonnet' ;
22
33local common = import 'dashboard/panels/common.libsonnet' ;
4+ local common_utils = import 'dashboard/panels/common.libsonnet' ;
45local variable = import 'dashboard/variable.libsonnet' ;
56
67local influxdb = grafana.influxdb;
@@ -21,14 +22,14 @@ local prometheus = grafana.prometheus;
2122 format='percentunit' ,
2223 decimalsY1=0 ,
2324 min=0 ,
24- panel_width=12 ,
25+ panel_width=8 ,
2526 ).addTarget(
2627 common.target(cfg, metric_name, rate=true )
2728 ),
2829
29- getrusage_cpu_user_time (
30+ getrusage_cpu_instance_user_time (
3031 cfg,
31- title='CPU user time' ,
32+ title='CPU user time per instance ' ,
3233 description=|||
3334 This is the average share of time
3435 spent by instance process executing in user mode.
@@ -43,9 +44,9 @@ local prometheus = grafana.prometheus;
4344 metric_name='tnt_cpu_user_time' ,
4445 ),
4546
46- getrusage_cpu_system_time (
47+ getrusage_cpu_instance_system_time (
4748 cfg,
48- title='CPU system time' ,
49+ title='CPU system time per instance ' ,
4950 description=|||
5051 This is the average share of time
5152 spent by instance process executing in kernel mode.
@@ -60,6 +61,225 @@ local prometheus = grafana.prometheus;
6061 metric_name='tnt_cpu_system_time' ,
6162 ),
6263
64+ // --------------------------------------------------------------------------
65+ local getrusage_cpu_total_percentage_graph(
66+ cfg, title, description,
67+ ) = common.default_graph(
68+ cfg,
69+ title=title,
70+ description=description,
71+ format='percentunit' ,
72+ decimalsY1=0 ,
73+ min=0 ,
74+ panel_width=8 ,
75+ ).addTarget(
76+ if cfg.type == variable.datasource_type.prometheus then
77+ prometheus.target(
78+ expr=std.format (
79+ |||
80+ rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]) +
81+ rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval])
82+ ||| ,
83+ {
84+ metrics_prefix: cfg.metrics_prefix,
85+ filters: common.prometheus_query_filters(cfg.filters),
86+ }
87+ ),
88+ legendFormat='{{alias}}'
89+ )
90+ else if cfg.type == variable.datasource_type.influxdb then
91+ influxdb.target(
92+ rawQuery=true ,
93+ query=std.format(|||
94+ SELECT non_negative_derivative(SUM("value"), 1s)
95+ FROM %(measurement_with_policy)s
96+ WHERE (("metric_name" = '%(metric_user_time)s' OR "metric_name" = '%(metric_system_time)s') AND %(filters)s)
97+ AND $timeFilter
98+ GROUP BY time($__interval), "label_pairs_alias" fill(none)
99+ ||| , {
100+ measurement_with_policy: std.format ('%(policy_prefix)s"%(measurement)s"' , {
101+ policy_prefix: if cfg.policy == 'default' then '' else std.format ('"%(policy)s".' , cfg.policy),
102+ measurement: cfg.measurement,
103+ }),
104+ metric_user_time: cfg.metrics_prefix + 'tnt_cpu_user_time' ,
105+ metric_system_time: cfg.metrics_prefix + 'tnt_cpu_system_time' ,
106+ filters: common.influxdb_query_filters(cfg.filters),
107+ }),
108+ alias='$tag_label_pairs_alias' ,
109+ )
110+ ),
111+
112+ getrusage_cpu_instance_total_time(
113+ cfg,
114+ title='CPU total time per instance' ,
115+ description=|||
116+ This is the average share of time spent
117+ by instance process executing.
118+
119+ Panel minimal requirements: metrics 0.8.0.
120+ ||| ,
121+ ):: getrusage_cpu_total_percentage_graph(
122+ cfg=cfg,
123+ title=title,
124+ description=description,
125+ ),
126+
127+ // --------------------------------------------------------------------------
128+ local getrusage_cpu_common_percentage_graph(
129+ cfg,
130+ title,
131+ description,
132+ prometheus_expr,
133+ prometheus_legend,
134+ influx_query,
135+ influx_alias,
136+ ) = common.default_graph(
137+ cfg,
138+ title=title,
139+ description=description,
140+ format='percentunit' ,
141+ decimalsY1=0 ,
142+ min=0 ,
143+ panel_width=8 ,
144+ ).addTarget(
145+ if cfg.type == variable.datasource_type.prometheus then
146+ prometheus.target(
147+ expr=prometheus_expr,
148+ legendFormat=prometheus_legend,
149+ )
150+ else if cfg.type == variable.datasource_type.influxdb then
151+ influxdb.target(
152+ rawQuery=true ,
153+ query=influx_query,
154+ alias=influx_alias,
155+ )
156+ ),
157+
158+ getrusage_cpu_total_time(
159+ cfg,
160+ title='CPU total time per cluster' ,
161+ description=|||
162+ This is the total share of time spent
163+ by each cluster process executing.
164+
165+ Panel minimal requirements: metrics 0.8.0.
166+ ||| ,
167+ ):: getrusage_cpu_common_percentage_graph(
168+ cfg=cfg,
169+ title=title,
170+ description=description,
171+ prometheus_expr=std.format (
172+ |||
173+ sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval])) +
174+ sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]))
175+ ||| ,
176+ {
177+ metrics_prefix: cfg.metrics_prefix,
178+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
179+ }
180+ ),
181+ prometheus_legend=title,
182+ influx_query=std.format(|||
183+ SELECT non_negative_derivative(SUM("value"), 1s)
184+ FROM %(measurement_with_policy)s
185+ WHERE (("metric_name" = '%(metric_user_time)s' OR "metric_name" = '%(metric_system_time)s') AND %(filters)s)
186+ AND $timeFilter
187+ GROUP BY time($__interval)
188+ ||| , {
189+ measurement_with_policy: std.format ('%(policy_prefix)s"%(measurement)s"' , {
190+ policy_prefix: if cfg.policy == 'default' then '' else std.format ('"%(policy)s".' , cfg.policy),
191+ measurement: cfg.measurement,
192+ }),
193+ metric_user_time: cfg.metrics_prefix + 'tnt_cpu_user_time' ,
194+ metric_system_time: cfg.metrics_prefix + 'tnt_cpu_system_time' ,
195+ filters: if common.influxdb_query_filters(common.remove_field(cfg.filters, 'label_pairs_alias' )) != ''
196+ then common.influxdb_query_filters(common.remove_field(cfg.filters, 'label_pairs_alias' ))
197+ else 'true' ,
198+ }),
199+ influx_alias=title
200+ ),
201+
202+ getrusage_cpu_total_user_time(
203+ cfg,
204+ title='CPU total user time per cluster' ,
205+ description=|||
206+ This is the total share of time
207+ spent in user mode per cluster.
208+
209+ Panel minimal requirements: metrics 0.8.0.
210+ ||| ,
211+ ):: getrusage_cpu_common_percentage_graph(
212+ cfg=cfg,
213+ title=title,
214+ description=description,
215+ prometheus_expr=std.format (
216+ |||
217+ sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]))
218+ ||| ,
219+ {
220+ metrics_prefix: cfg.metrics_prefix,
221+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
222+ }
223+ ),
224+ prometheus_legend=title,
225+ influx_query=std.format(|||
226+ SELECT non_negative_derivative(SUM("value"), 1s)
227+ FROM %(measurement_with_policy)s
228+ WHERE "metric_name" = '%(metric_user_time)s' AND %(filters)s
229+ AND $timeFilter
230+ GROUP BY time($__interval)
231+ ||| , {
232+ measurement_with_policy: std.format ('%(policy_prefix)s"%(measurement)s"' , {
233+ policy_prefix: if cfg.policy == 'default' then '' else std.format ('"%(policy)s".' , cfg.policy),
234+ measurement: cfg.measurement,
235+ }),
236+ metric_user_time: cfg.metrics_prefix + 'tnt_cpu_user_time' ,
237+ filters: common.influxdb_query_filters(cfg.filters),
238+ }),
239+ influx_alias=title
240+ ),
241+
242+ getrusage_cpu_total_system_time(
243+ cfg,
244+ title='CPU total system time per cluster' ,
245+ description=|||
246+ This is the total share of time
247+ spent in system mode per cluster.
248+
249+ Panel minimal requirements: metrics 0.8.0.
250+ ||| ,
251+ ):: getrusage_cpu_common_percentage_graph(
252+ cfg=cfg,
253+ title=title,
254+ description=description,
255+ prometheus_expr=std.format (
256+ |||
257+ sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]))
258+ ||| ,
259+ {
260+ metrics_prefix: cfg.metrics_prefix,
261+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
262+ }
263+ ),
264+ prometheus_legend=title,
265+ influx_query=std.format(|||
266+ SELECT non_negative_derivative(SUM("value"), 1s)
267+ FROM %(measurement_with_policy)s
268+ WHERE "metric_name" = '%(metric_system_time)s' AND %(filters)s
269+ AND $timeFilter
270+ GROUP BY time($__interval)
271+ ||| , {
272+ measurement_with_policy: std.format ('%(policy_prefix)s"%(measurement)s"' , {
273+ policy_prefix: if cfg.policy == 'default' then '' else std.format ('"%(policy)s".' , cfg.policy),
274+ measurement: cfg.measurement,
275+ }),
276+ metric_system_time: cfg.metrics_prefix + 'tnt_cpu_system_time' ,
277+ filters: common.influxdb_query_filters(cfg.filters),
278+ }),
279+ influx_alias=title
280+ ),
281+
282+ // --------------------------------------------------------------------------
63283 local procstat_thread_time_graph(
64284 cfg,
65285 title,
0 commit comments