tarantool
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dashboard/panels/cpu.libsonnet‎
Lines changed: 193 additions & 5 deletions b/‎dashboard/panels/cpu.libsonnet‎
Lines changed: 193 additions & 5 deletions
@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Panel with Сartridge configuration checksum (#242)
 - Panel with `need schema upgrade` status (#243)
-
+- Panels with `CPU/memory/virtual memory` utilization per instance and total (#245)
 
 ## [3.2.1] - 2024-12-06
 Grafana revisions:
 
@@ -1,6 +1,7 @@
 local grafana = import 'grafonnet/grafana.libsonnet';
 
 local common = import 'dashboard/panels/common.libsonnet';
+local common_utils = import 'dashboard/panels/common.libsonnet';
 local variable = import 'dashboard/variable.libsonnet';
 
 local influxdb = grafana.influxdb;
@@ -21,14 +22,14 @@ local prometheus = grafana.prometheus;
     format='percentunit',
     decimalsY1=0,
     min=0,
-    panel_width=12,
+    panel_width=8,
   ).addTarget(
     common.target(cfg, metric_name, rate=true)
   ),
 
-  getrusage_cpu_user_time(
+  getrusage_cpu_instance_user_time(
     cfg,
-    title='CPU user time',
+    title='CPU user time per instance',
     description=|||
       This is the average share of time
       spent by instance process executing in user mode.
@@ -43,9 +44,9 @@ local prometheus = grafana.prometheus;
     metric_name='tnt_cpu_user_time',
   ),
 
-  getrusage_cpu_system_time(
+  getrusage_cpu_instance_system_time(
     cfg,
-    title='CPU system time',
+    title='CPU system time per instance',
     description=|||
       This is the average share of time
       spent by instance process executing in kernel mode.
@@ -60,6 +61,193 @@ local prometheus = grafana.prometheus;
     metric_name='tnt_cpu_system_time',
   ),
 
+  // --------------------------------------------------------------------------
+  local getrusage_cpu_total_percentage_graph(
+    cfg, title, description,
+  ) = common.default_graph(
+    cfg,
+    title=title,
+    description=description,
+    format='percentunit',
+    decimalsY1=0,
+    min=0,
+    panel_width=8,
+  ).addTarget(
+    if cfg.type == variable.datasource_type.prometheus then
+      prometheus.target(
+        expr=std.format(
+          |||
+            rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]) +
+            rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval])
+          |||,
+          {
+            metrics_prefix: cfg.metrics_prefix,
+            filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias')),
+          }
+        ),
+        legendFormat='{{alias}}'
+      )
+    else if cfg.type == variable.datasource_type.influxdb then
+      influxdb.target(
+        rawQuery=true,
+        query=|||
+          SELECT non_negative_derivative(sum("value"), 1s)
+          FROM "$policy"."$measurement"
+          WHERE (("metric_name" = 'tnt_cpu_user_time' OR "metric_name" = 'tnt_cpu_system_time') AND "label_pairs_alias" =~ /^$alias$/)
+          AND $timeFilter
+          GROUP BY time($__interval), "label_pairs_alias" fill(none)
+        |||,
+        alias='$tag_label_pairs_alias',
+      )
+  ),
+
+  getrusage_cpu_instance_total_time(
+    cfg,
+    title='CPU total time per instance',
+    description=|||
+      This is the average share of time spent
+      by instance process executing.
+
+      Panel minimal requirements: metrics 0.8.0.
+    |||,
+  ):: getrusage_cpu_total_percentage_graph(
+    cfg=cfg,
+    title=title,
+    description=description,
+  ),
+
+  // --------------------------------------------------------------------------
+  local getrusage_cpu_common_percentage_graph(
+    cfg,
+    title,
+    description,
+    prometheus_expr,
+    prometheus_legend,
+    influx_query,
+    influx_alias,
+  ) = common.default_graph(
+    cfg,
+    title=title,
+    description=description,
+    format='percentunit',
+    decimalsY1=0,
+    min=0,
+    panel_width=8,
+  ).addTarget(
+    if cfg.type == variable.datasource_type.prometheus then
+      prometheus.target(
+        expr=prometheus_expr,
+        legendFormat=prometheus_legend,
+      )
+    else if cfg.type == variable.datasource_type.influxdb then
+      influxdb.target(
+        rawQuery=true,
+        query=influx_query,
+        alias=influx_alias,
+      )
+  ),
+
+  getrusage_cpu_total_time(
+    cfg,
+    title='CPU total time per cluster',
+    description=|||
+      This is the total share of time spent
+      by each cluster process executing.
+
+      Panel minimal requirements: metrics 0.8.0.
+    |||,
+  ):: getrusage_cpu_common_percentage_graph(
+    cfg=cfg,
+    title=title,
+    description=description,
+    prometheus_expr=std.format(
+      |||
+        sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval])) +
+        sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]))
+      |||,
+      {
+        metrics_prefix: cfg.metrics_prefix,
+        filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias')),
+      }
+    ),
+    prometheus_legend=title,
+    influx_query=|||
+      SELECT non_negative_derivative(SUM("value"), 1s) AS total_cpu_time_per_cluster
+      FROM "$policy"."$measurement"
+      WHERE ("metric_name" = 'tnt_cpu_user_time' OR "metric_name" = 'tnt_cpu_system_time')
+      AND $timeFilter
+      GROUP BY time($__interval)
+    |||,
+    influx_alias=title
+  ),
+
+  getrusage_cpu_total_user_time(
+    cfg,
+    title='CPU total user time per cluster',
+    description=|||
+      This is the total share of time
+      spent in user mode per cluster.
+
+      Panel minimal requirements: metrics 0.8.0.
+    |||,
+  ):: getrusage_cpu_common_percentage_graph(
+    cfg=cfg,
+    title=title,
+    description=description,
+    prometheus_expr=std.format(
+      |||
+        sum(rate(%(metrics_prefix)stnt_cpu_user_time{%(filters)s}[$__rate_interval]))
+      |||,
+      {
+        metrics_prefix: cfg.metrics_prefix,
+        filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias')),
+      }
+    ),
+    prometheus_legend=title,
+    influx_query=|||
+      SELECT non_negative_derivative(SUM("value"), 1s) AS total_cpu_user_time
+      FROM "$policy"."$measurement"
+      WHERE "metric_name" = 'tnt_cpu_user_time' AND "label_pairs_alias" =~ /^$alias$/
+      AND $timeFilter
+      GROUP BY time($__interval)
+    |||,
+    influx_alias=title
+  ),
+
+  getrusage_cpu_total_system_time(
+    cfg,
+    title='CPU total system time per cluster',
+    description=|||
+      This is the total share of time
+      spent in system mode per cluster.
+
+      Panel minimal requirements: metrics 0.8.0.
+    |||,
+  ):: getrusage_cpu_common_percentage_graph(
+    cfg=cfg,
+    title=title,
+    description=description,
+    prometheus_expr=std.format(
+      |||
+        sum(rate(%(metrics_prefix)stnt_cpu_system_time{%(filters)s}[$__rate_interval]))
+      |||,
+      {
+        metrics_prefix: cfg.metrics_prefix,
+        filters: common.prometheus_query_filters(common.remove_field(cfg.filters, 'alias')),
+      }
+    ),
+    prometheus_legend=title,
+    influx_query=|||
+      SELECT non_negative_derivative(SUM("value"), 1s) AS total_cpu_system_time
+      FROM "$policy"."$measurement"
+      WHERE ("metric_name" = 'tnt_cpu_system_time' AND "label_pairs_alias" =~ /^$alias$/)
+      AND   $timeFilter
+      GROUP BY time($__interval)
+    |||,
+    influx_alias=title
+  ),
+
+  // --------------------------------------------------------------------------
   local procstat_thread_time_graph(
     cfg,
     title,