Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
vendor
jsonnetfile.lock.json
*.zip
.worktrees
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
prometheusAlerts+:: {
groups+: [
new(this): {
groups: [
{
name: 'bigip-alerts',
name: this.config.uid + '-alerts',
rules: [
{
alert: 'BigIPLowNodeAvailabilityStatus',
expr: |||
100 * (sum(bigip_node_status_availability_state) / clamp_min(count(bigip_node_status_availability_state), 1)) < %(alertsCriticalNodeAvailability)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'critical',
Expand All @@ -19,14 +19,14 @@
(
'{{ printf "%%.0f" $value }} percent of available nodes, ' +
'which is below the threshold of %(alertsCriticalNodeAvailability)s.'
) % $._config,
) % this.config,
},
},
{
alert: 'BigIPServerSideConnectionLimit',
expr: |||
max without(instance, job) (100 * bigip_node_serverside_cur_conns / clamp_min(bigip_node_serverside_max_conns, 1)) > %(alertsWarningServerSideConnectionLimit)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -37,14 +37,14 @@
(
'{{ printf "%%.0f" $value }} percent of the max number of connections in use on node {{$labels.node}}, ' +
'which is above the threshold of %(alertsWarningServerSideConnectionLimit)s percent.'
) % $._config,
) % this.config,
},
},
{
alert: 'BigIPHighRequestRate',
expr: |||
max without(instance, job) (100 * rate(bigip_pool_tot_requests[10m]) / clamp_min(rate(bigip_pool_tot_requests[50m] offset 10m), 1)) > %(alertsCriticalHighRequestRate)s
||| % $._config,
||| % this.config,
'for': '10m',
labels: {
severity: 'warning',
Expand All @@ -55,14 +55,14 @@
(
'{{ printf "%%.0f" $value }} percent increase in requests on pool {{$labels.pool}}, ' +
'which is above the threshold of %(alertsCriticalHighRequestRate)s.'
) % $._config,
) % this.config,
},
},
{
alert: 'BigIPHighConnectionQueueDepth',
expr: |||
max without(instance, job) (100 * rate(bigip_pool_connq_depth[5m])) / clamp_min(rate(bigip_pool_connq_depth[50m] offset 10m), 1) > %(alertsCriticalHighConnectionQueueDepth)s
||| % $._config,
||| % this.config,
'for': '5m',
labels: {
severity: 'warning',
Expand All @@ -73,7 +73,7 @@
(
'{{ printf "%%.0f" $value }} percent increase in connection queue depth on node {{$labels.pool}}, ' +
'which is above the threshold of %(alertsCriticalHighConnectionQueueDepth)s.'
) % $._config,
) % this.config,
},
},
],
Expand Down
47 changes: 35 additions & 12 deletions f5-bigip-mixin/config.libsonnet
Original file line number Diff line number Diff line change
@@ -1,17 +1,40 @@
{
_config+:: {
dashboardTags: ['f5-bigip-mixin'],
dashboardPeriod: 'now-30m',
dashboardTimezone: 'default',
dashboardRefresh: '1m',
local this = self,

// alerts thresholds
alertsCriticalNodeAvailability: 95, // %
alertsWarningServerSideConnectionLimit: 80, // %
alertsCriticalHighRequestRate: 150, // %
alertsCriticalHighConnectionQueueDepth: 75, // %
// Basic filtering
filteringSelector: 'job=~"$job", instance=~"$instance"',
groupLabels: ['job'],
instanceLabels: ['instance'],

enableLokiLogs: false,
filterSelector: 'job=~"syslog"',
// Dashboard settings
dashboardTags: ['f5-bigip-mixin'],
uid: 'f5-bigip',
dashboardNamePrefix: 'F5 BIG-IP',
dashboardRefresh: '1m',
dashboardPeriod: 'now-30m',
dashboardTimezone: 'default',

// Logs configuration
enableLokiLogs: true,
filterSelector: 'job=~"syslog"',
logLabels: ['job', 'host', 'syslog_facility', 'level'],
extraLogLabels: [],
logsVolumeGroupBy: 'level',
showLogsVolume: true,

// Alert thresholds
alertsCriticalNodeAvailability: 95, // %
alertsWarningServerSideConnectionLimit: 80, // %
alertsCriticalHighRequestRate: 150, // %
alertsCriticalHighConnectionQueueDepth: 75, // %

// Metrics source
metricsSource: 'prometheus',

signals+: {
system: (import './signals/system.libsonnet')(this),
virtualServers: (import './signals/virtual-servers.libsonnet')(this),
pools: (import './signals/pools.libsonnet')(this),
nodes: (import './signals/nodes.libsonnet')(this),
},
}
225 changes: 225 additions & 0 deletions f5-bigip-mixin/dashboards.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
local g = import './g.libsonnet';
local logslib = import 'logs-lib/logs/main.libsonnet';

{
local root = self,
new(this)::
local prefix = this.config.dashboardNamePrefix;
local links = this.grafana.links;
local tags = this.config.dashboardTags;
local uid = this.config.uid;
local vars = this.grafana.variables;
local annotations = this.grafana.annotations;
local refresh = this.config.dashboardRefresh;
local period = this.config.dashboardPeriod;
local timezone = this.config.dashboardTimezone;

{
// Cluster Overview Dashboard
'f5-bigip-cluster-overview.json':
g.dashboard.new(prefix + ' cluster overview')
+ g.dashboard.withDescription('Overview of F5 BIG-IP cluster health and top metrics.')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.clusterOverviewRow,
]
)
)
)
+ root.applyCommon(
vars.multiInstance + [
// k (top count) variable
g.dashboard.variable.custom.new('k', values=['5', '10', '20', '50'])
+ g.dashboard.variable.custom.generalOptions.withLabel('Top node count')
+ g.dashboard.variable.custom.generalOptions.withCurrent('5'),
// bigip_partition variable
g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_node_status_availability_state{job=~"$job", instance=~"$instance", partition=~"$bigip_partition"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '-cluster-overview',
tags,
links { clusterOverview+:: {} },
annotations,
timezone,
refresh,
period
),

// Node Overview Dashboard
'f5-bigip-node-overview.json':
g.dashboard.new(prefix + ' node overview')
+ g.dashboard.withDescription('Detailed view of F5 BIG-IP node metrics and status.')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.nodeStatusRow,
this.grafana.rows.nodeMetricsRow,
]
)
)
)
+ root.applyCommon(
vars.multiInstance + [
// bigip_node variable
g.dashboard.variable.query.new('bigip_node')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('node', 'bigip_node_status_availability_state{job=~"$job", instance=~"$instance"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP node')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
// bigip_partition variable
g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_node_status_availability_state{job=~"$job", instance=~"$instance", node=~"$bigip_node"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '-node-overview',
tags,
links { nodeOverview+:: {} },
annotations,
timezone,
refresh,
period
),

// Pool Overview Dashboard
'f5-bigip-pool-overview.json':
g.dashboard.new(prefix + ' pool overview')
+ g.dashboard.withDescription('Detailed view of F5 BIG-IP pool metrics and status.')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.poolStatusRow,
this.grafana.rows.poolMetricsRow,
]
)
)
)
+ root.applyCommon(
vars.multiInstance + [
// bigip_pool variable
g.dashboard.variable.query.new('bigip_pool')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('pool', 'bigip_pool_status_availability_state{job=~"$job", instance=~"$instance"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP pool')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
// bigip_partition variable
g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_pool_status_availability_state{job=~"$job", instance=~"$instance", pool=~"$bigip_pool"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '-pool-overview',
tags,
links { poolOverview+:: {} },
annotations,
timezone,
refresh,
period
),

// Virtual Server Overview Dashboard
'f5-bigip-virtual-server-overview.json':
g.dashboard.new(prefix + ' virtual server overview')
+ g.dashboard.withDescription('Detailed view of F5 BIG-IP virtual server metrics and status.')
+ g.dashboard.withPanels(
g.util.panel.resolveCollapsedFlagOnRows(
g.util.grid.wrapPanels(
[
this.grafana.rows.virtualServerStatusRow,
this.grafana.rows.virtualServerClientsideMetricsRow,
this.grafana.rows.virtualServerEphemeralMetricsRow,
]
)
)
)
+ root.applyCommon(
vars.multiInstance + [
// bigip_virtual_server variable
g.dashboard.variable.query.new('bigip_virtual_server')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('virtual_server', 'bigip_vs_status_availability_state{job=~"$job", instance=~"$instance"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP virtual server')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
// bigip_partition variable
g.dashboard.variable.query.new('bigip_partition')
+ g.dashboard.variable.query.withDatasourceFromVariable(vars.datasources.prometheus)
+ g.dashboard.variable.query.queryTypes.withLabelValues('partition', 'bigip_vs_status_availability_state{job=~"$job", instance=~"$instance", virtual_server=~"$bigip_virtual_server"}')
+ g.dashboard.variable.query.generalOptions.withLabel('BIG-IP partition')
+ g.dashboard.variable.query.selectionOptions.withMulti(true)
+ g.dashboard.variable.query.selectionOptions.withIncludeAll(true, '.+')
+ g.dashboard.variable.query.refresh.onLoad()
+ g.dashboard.variable.query.refresh.onTime(),
],
uid + '-virtual-server-overview',
tags,
links { virtualServerOverview+:: {} },
annotations,
timezone,
refresh,
period
),
}
+
if this.config.enableLokiLogs then
{
'f5-bigip-logs.json':
logslib.new(
prefix + ' logs',
datasourceName=vars.datasources.loki.name,
datasourceRegex=vars.datasources.loki.regex,
filterSelector=this.config.filterSelector,
labels=this.config.logLabels,
formatParser=null,
showLogsVolume=this.config.showLogsVolume,
logsVolumeGroupBy=this.config.logsVolumeGroupBy,
)
{
dashboards+:
{
logs+:
g.dashboard.withUid(uid + '-logs')
+ g.dashboard.withTags(tags)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.withLinks(std.objectValues(links { logs+:: {} }))
+ g.panel.logs.options.withShowTime(false),
},
}.dashboards.logs,
} else {},

applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
g.dashboard.withTags(tags)
+ g.dashboard.withUid(uid)
+ g.dashboard.withLinks(std.objectValues(links))
+ g.dashboard.withTimezone(timezone)
+ g.dashboard.withRefresh(refresh)
+ g.dashboard.time.withFrom(period)
+ g.dashboard.withVariables(vars)
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
}
Loading
Loading