Skip to content

Commit 8ca51ce

Browse files
update cpu panels
1 parent 2426c0b commit 8ca51ce

File tree

3 files changed

+87
-45
lines changed

3 files changed

+87
-45
lines changed

CHANGELOG.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010
- Panel with Сartridge configuration checksum (#242)
11-
- Panel with CPU total utilization per instance (#TNTP-4365)
12-
- Panels with memory/virtual memory utilization per instance and total (#TNTP-4365)
11+
- Panels with `CPU/memory/virtual memory` utilization per instance and total (#TNTP-4365)
1312

1413
## [3.2.1] - 2024-12-06
1514
Grafana revisions:

dashboard/panels/cpu.libsonnet

Lines changed: 75 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ local prometheus = grafana.prometheus;
2727
common.target(cfg, metric_name, rate=true)
2828
),
2929

30-
getrusage_cpu_user_time(
30+
getrusage_cpu_instance_user_time(
3131
cfg,
32-
title='CPU user time',
32+
title='CPU user time per instance',
3333
description=|||
3434
This is the average share of time
3535
spent by instance process executing in user mode.
@@ -44,9 +44,9 @@ local prometheus = grafana.prometheus;
4444
metric_name='tnt_cpu_user_time',
4545
),
4646

47-
getrusage_cpu_system_time(
47+
getrusage_cpu_instance_system_time(
4848
cfg,
49-
title='CPU system time',
49+
title='CPU system time per instance',
5050
description=|||
5151
This is the average share of time
5252
spent by instance process executing in kernel mode.
@@ -61,8 +61,9 @@ local prometheus = grafana.prometheus;
6161
metric_name='tnt_cpu_system_time',
6262
),
6363

64+
// --------------------------------------------------------------------------
6465
local getrusage_cpu_total_percentage_graph(
65-
cfg, title, description, metric_user, metric_system,
66+
cfg, title, description,
6667
) = common.default_graph(
6768
cfg,
6869
title=title,
@@ -74,41 +75,44 @@ local prometheus = grafana.prometheus;
7475
).addTarget(
7576
if cfg.type == variable.datasource_type.prometheus then
7677
prometheus.target(
77-
expr=std.format(
78-
'rate(%s[$__rate_interval]) + rate(%s[$__rate_interval])',
79-
[
80-
metric_user,
81-
metric_system,
82-
]
83-
),
78+
expr='rate(tnt_cpu_user_time[$__rate_interval]) + rate(tnt_cpu_system_time[$__rate_interval])',
8479
legendFormat='{{alias}}'
8580
)
8681
else if cfg.type == variable.datasource_type.influxdb then
87-
local filters = common_utils.influxdb_query_filters(cfg.filters);
88-
influxdb.target(
89-
rawQuery=true,
90-
query=std.format(|||
91-
SELECT mean("%(metrics_prefix)s%(metric_user)s") + mean("%(metrics_prefix)s%(metric_system)s")
92-
as "total" FROM
93-
(SELECT "value" as "%(metrics_prefix)s%(metric_user)s" FROM %(policy_prefix)s"%(measurement)s"
94-
WHERE ("metric_name" = '%(metrics_prefix)s%(metric_user)s' %(filters)s),
95-
(SELECT "value" as "%(metrics_prefix)s%(metric_system)s" FROM %(policy_prefix)s"%(measurement)s"
96-
WHERE ("metric_name" = '%(metrics_prefix)s%(metric_system)s' %(filters)s))
97-
GROUP BY time($__interval), "label_pairs_alias", "label_pairs_name" fill(none)
98-
|||, {
99-
metrics_prefix: cfg.metrics_prefix,
100-
metric_user: metric_user,
101-
metric_system: metric_system,
102-
policy_prefix: if cfg.policy == 'default' then '' else std.format('"%(policy)s".', cfg.policy),
103-
measurement: cfg.measurement,
104-
filters: if filters == '' then '' else std.format('AND %s', filters),
105-
}),
106-
alias='$tag_label_pairs_name — $tag_label_pairs_alias'
107-
),
82+
influxdb.target()
83+
),
84+
85+
getrusage_cpu_instance_total_time(
86+
cfg,
87+
title='CPU total time per instance',
88+
description=|||
89+
This is the average share of time spent
90+
by instance process executing.
91+
92+
Panel minimal requirements: metrics 0.8.0.
93+
|||,
94+
):: getrusage_cpu_total_percentage_graph(
95+
cfg=cfg,
96+
title=title,
97+
description=description,
98+
),
99+
100+
// --------------------------------------------------------------------------
101+
local getrusage_cpu_common_percentage_graph(
102+
cfg, title, description, expr,
103+
) = common.default_graph(
104+
cfg,
105+
title=title,
106+
description=description,
107+
format='percentunit',
108+
decimalsY1=0,
109+
min=0,
110+
panel_width=8,
108111
).addTarget(
109112
if cfg.type == variable.datasource_type.prometheus then
110113
prometheus.target(
111-
expr='sum(rate(tnt_cpu_user_time{job=~"$job"}[$__rate_interval])) + sum(rate(tnt_cpu_system_time{job=~"$job"}[$__rate_interval]))',
114+
expr=expr,
115+
legendFormat='{{alias}}'
112116
)
113117
else if cfg.type == variable.datasource_type.influxdb then
114118
influxdb.target()
@@ -118,19 +122,51 @@ local prometheus = grafana.prometheus;
118122
cfg,
119123
title='CPU total time',
120124
description=|||
121-
This is the average share of time spent
122-
by instance process executing.
125+
This is the total share of time spent
126+
by each instance process executing.
123127
124128
Panel minimal requirements: metrics 0.8.0.
125129
|||,
126-
):: getrusage_cpu_total_percentage_graph(
130+
):: getrusage_cpu_common_percentage_graph(
131+
cfg=cfg,
132+
title=title,
133+
description=description,
134+
expr='sum(rate(tnt_cpu_user_time{job=~"$job"}[$__rate_interval])) + sum(rate(tnt_cpu_system_time{job=~"$job"}[$__rate_interval]))',
135+
),
136+
137+
getrusage_cpu_total_user_time(
138+
cfg,
139+
title='CPU total user time',
140+
description=|||
141+
This is the total share of time
142+
spent in user mode.
143+
144+
Panel minimal requirements: metrics 0.8.0.
145+
|||,
146+
):: getrusage_cpu_common_percentage_graph(
147+
cfg=cfg,
148+
title=title,
149+
description=description,
150+
expr='sum(rate(tnt_cpu_user_time{job=~"$job"}[$__rate_interval]))',
151+
),
152+
153+
getrusage_cpu_total_system_time(
154+
cfg,
155+
title='CPU total system time',
156+
description=|||
157+
This is the total share of time
158+
spent in system mode.
159+
160+
Panel minimal requirements: metrics 0.8.0.
161+
|||,
162+
):: getrusage_cpu_common_percentage_graph(
127163
cfg=cfg,
128164
title=title,
129165
description=description,
130-
metric_user='tnt_cpu_user_time',
131-
metric_system='tnt_cpu_system_time',
166+
expr='sum(rate(tnt_cpu_system_time{job=~"$job"}[$__rate_interval]))',
132167
),
133168

169+
// --------------------------------------------------------------------------
134170
local procstat_thread_time_graph(
135171
cfg,
136172
title,

dashboard/section.libsonnet

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,23 @@ local vinyl = import 'dashboard/panels/vinyl.libsonnet';
223223

224224
cpu(cfg):: [
225225
cpu.row,
226+
cpu.getrusage_cpu_instance_total_time(cfg),
227+
cpu.getrusage_cpu_instance_user_time(cfg),
228+
cpu.getrusage_cpu_instance_system_time(cfg),
226229
cpu.getrusage_cpu_total_time(cfg),
227-
cpu.getrusage_cpu_user_time(cfg),
228-
cpu.getrusage_cpu_system_time(cfg),
230+
cpu.getrusage_cpu_total_user_time(cfg),
231+
cpu.getrusage_cpu_total_system_time(cfg),
232+
229233
],
230234

231235
cpu_extended(cfg):: [
232236
cpu.row,
237+
cpu.getrusage_cpu_instance_total_time(cfg),
238+
cpu.getrusage_cpu_instance_user_time(cfg),
239+
cpu.getrusage_cpu_instance_system_time(cfg),
233240
cpu.getrusage_cpu_total_time(cfg),
234-
cpu.getrusage_cpu_user_time(cfg),
235-
cpu.getrusage_cpu_system_time(cfg),
241+
cpu.getrusage_cpu_total_user_time(cfg),
242+
cpu.getrusage_cpu_total_system_time(cfg),
236243
cpu.procstat_thread_user_time(cfg),
237244
cpu.procstat_thread_system_time(cfg),
238245
],

0 commit comments

Comments
 (0)