11local grafana = import 'grafonnet/grafana.libsonnet' ;
22
33local common = import 'dashboard/panels/common.libsonnet' ;
4+ local common_utils = import 'dashboard/panels/common.libsonnet' ;
45local variable = import 'dashboard/variable.libsonnet' ;
56
67local influxdb = grafana.influxdb;
@@ -21,14 +22,14 @@ local prometheus = grafana.prometheus;
2122 format='percentunit' ,
2223 decimalsY1=0 ,
2324 min=0 ,
24- panel_width=12 ,
25+ panel_width=8 ,
2526 ).addTarget(
2627 common.target(cfg, metric_name, rate=true )
2728 ),
2829
29- getrusage_cpu_user_time (
30+ getrusage_cpu_instance_user_time (
3031 cfg,
31- title='CPU user time' ,
32+ title='CPU user time per instance ' ,
3233 description=|||
3334 This is the average share of time
3435 spent by instance process executing in user mode.
@@ -43,9 +44,9 @@ local prometheus = grafana.prometheus;
4344 metric_name='tnt_cpu_user_time' ,
4445 ),
4546
46- getrusage_cpu_system_time (
47+ getrusage_cpu_instance_system_time (
4748 cfg,
48- title='CPU system time' ,
49+ title='CPU system time per instance ' ,
4950 description=|||
5051 This is the average share of time
5152 spent by instance process executing in kernel mode.
@@ -60,6 +61,193 @@ local prometheus = grafana.prometheus;
6061 metric_name='tnt_cpu_system_time' ,
6162 ),
6263
64+ // --------------------------------------------------------------------------
65+ local getrusage_cpu_total_percentage_graph(
66+ cfg, title, description,
67+ ) = common.default_graph(
68+ cfg,
69+ title=title,
70+ description=description,
71+ format='percentunit' ,
72+ decimalsY1=0 ,
73+ min=0 ,
74+ panel_width=8 ,
75+ ).addTarget(
76+ if cfg.type == variable.datasource_type.prometheus then
77+ prometheus.target(
78+ expr=std.format (
79+ |||
80+ rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]) +
81+ rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval])
82+ ||| ,
83+ {
84+ metrics_prefix: cfg.metrics_prefix,
85+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
86+ }
87+ ),
88+ legendFormat='{{alias}}'
89+ )
90+ else if cfg.type == variable.datasource_type.influxdb then
91+ influxdb.target(
92+ rawQuery=true ,
93+ query=|||
94+ SELECT non_negative_derivative(sum("value"), 1s)
95+ FROM "$policy"."$measurement"
96+ WHERE (("metric_name" = 'tnt_cpu_user_time' OR "metric_name" = 'tnt_cpu_system_time') AND "label_pairs_alias" =~ /^$alias$/)
97+ AND $timeFilter
98+ GROUP BY time($__interval), "label_pairs_alias" fill(none)
99+ ||| ,
100+ alias='$tag_label_pairs_alias' ,
101+ )
102+ ),
103+
104+ getrusage_cpu_instance_total_time(
105+ cfg,
106+ title='CPU total time per instance' ,
107+ description=|||
108+ This is the average share of time spent
109+ by instance process executing.
110+
111+ Panel minimal requirements: metrics 0.8.0.
112+ ||| ,
113+ ):: getrusage_cpu_total_percentage_graph(
114+ cfg=cfg,
115+ title=title,
116+ description=description,
117+ ),
118+
119+ // --------------------------------------------------------------------------
120+ local getrusage_cpu_common_percentage_graph(
121+ cfg,
122+ title,
123+ description,
124+ prometheus_expr,
125+ prometheus_legend,
126+ influx_query,
127+ influx_alias,
128+ ) = common.default_graph(
129+ cfg,
130+ title=title,
131+ description=description,
132+ format='percentunit' ,
133+ decimalsY1=0 ,
134+ min=0 ,
135+ panel_width=8 ,
136+ ).addTarget(
137+ if cfg.type == variable.datasource_type.prometheus then
138+ prometheus.target(
139+ expr=prometheus_expr,
140+ legendFormat=prometheus_legend,
141+ )
142+ else if cfg.type == variable.datasource_type.influxdb then
143+ influxdb.target(
144+ rawQuery=true ,
145+ query=influx_query,
146+ alias=influx_alias,
147+ )
148+ ),
149+
150+ getrusage_cpu_total_time(
151+ cfg,
152+ title='CPU total time per cluster' ,
153+ description=|||
154+ This is the total share of time spent
155+ by each cluster process executing.
156+
157+ Panel minimal requirements: metrics 0.8.0.
158+ ||| ,
159+ ):: getrusage_cpu_common_percentage_graph(
160+ cfg=cfg,
161+ title=title,
162+ description=description,
163+ prometheus_expr=std.format (
164+ |||
165+ sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval])) +
166+ sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]))
167+ ||| ,
168+ {
169+ metrics_prefix: cfg.metrics_prefix,
170+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
171+ }
172+ ),
173+ prometheus_legend=title,
174+ influx_query=|||
175+ SELECT non_negative_derivative(SUM("value"), 1s) AS total_cpu_time_per_cluster
176+ FROM "$policy"."$measurement"
177+ WHERE ("metric_name" = 'tnt_cpu_user_time' OR "metric_name" = 'tnt_cpu_system_time')
178+ AND $timeFilter
179+ GROUP BY time($__interval)
180+ ||| ,
181+ influx_alias=title
182+ ),
183+
184+ getrusage_cpu_total_user_time(
185+ cfg,
186+ title='CPU total user time per cluster' ,
187+ description=|||
188+ This is the total share of time
189+ spent in user mode per cluster.
190+
191+ Panel minimal requirements: metrics 0.8.0.
192+ ||| ,
193+ ):: getrusage_cpu_common_percentage_graph(
194+ cfg=cfg,
195+ title=title,
196+ description=description,
197+ prometheus_expr=std.format (
198+ |||
199+ sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]))
200+ ||| ,
201+ {
202+ metrics_prefix: cfg.metrics_prefix,
203+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
204+ }
205+ ),
206+ prometheus_legend=title,
207+ influx_query=|||
208+ SELECT non_negative_derivative(SUM("value"), 1s) AS total_cpu_user_time
209+ FROM "$policy"."$measurement"
210+ WHERE "metric_name" = 'tnt_cpu_user_time' AND "label_pairs_alias" =~ /^$alias$/
211+ AND $timeFilter
212+ GROUP BY time($__interval)
213+ ||| ,
214+ influx_alias=title
215+ ),
216+
217+ getrusage_cpu_total_system_time(
218+ cfg,
219+ title='CPU total system time per cluster' ,
220+ description=|||
221+ This is the total share of time
222+ spent in system mode per cluster.
223+
224+ Panel minimal requirements: metrics 0.8.0.
225+ ||| ,
226+ ):: getrusage_cpu_common_percentage_graph(
227+ cfg=cfg,
228+ title=title,
229+ description=description,
230+ prometheus_expr=std.format (
231+ |||
232+ sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]))
233+ ||| ,
234+ {
235+ metrics_prefix: cfg.metrics_prefix,
236+ filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias' )),
237+ }
238+ ),
239+ prometheus_legend=title,
240+ influx_query=|||
241+ SELECT non_negative_derivative(SUM("value"), 1s) AS total_cpu_system_time
242+ FROM "$policy"."$measurement"
243+ WHERE ("metric_name" = 'tnt_cpu_system_time' AND "label_pairs_alias" =~ /^$alias$/)
244+ AND $timeFilter
245+ GROUP BY time($__interval)
246+ ||| ,
247+ influx_alias=title
248+ ),
249+
250+ // --------------------------------------------------------------------------
63251 local procstat_thread_time_graph(
64252 cfg,
65253 title,
0 commit comments