Skip to content

Commit e496da8

Browse files
committed
feat: support for mine NVMe Volatile Memory Backup
This PR add's metric, when NVMe PLP is failed (bool) Further use of the drive is unsafe; if the power fails, the data may be lost. Healthy device ```console [root@host]# smartctl --json --info --capabilities --health --attributes --tolerance=verypermissive --nocheck=standby --format=brief --log=error --device=nvme /dev/nvme1 | jq .smart_status { "passed": true, "nvme": { "value": 0 } } ``` PLP failed ```console [root@host]# smartctl --json --info --capabilities --health --attributes --tolerance=verypermissive --nocheck=standby --format=brief --log=error --device=nvme /dev/nvme0 | jq .smart_status { "passed": false, "nvme": { "value": 16, "spare_below_threshold": false, "temperature_above_or_below_threshold": false, "reliability_degraded": false, "media_read_only": false, "volatile_memory_backup_failed": true, "persistent_memory_region_unreliable": false, "other": 0 } } ``` Signed-off-by: Konstantin Shalygin <[email protected]>
1 parent ef5c03d commit e496da8

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

metrics.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,14 @@ var (
194194
},
195195
nil,
196196
)
197+
metricDeviceVolatileMemoryBackupFailed = prometheus.NewDesc(
198+
"smartctl_device_volatile_memory_backup_failed",
199+
"Indicates that Volatile Memory Backup (NVMe PLP) is failed",
200+
[]string{
201+
"device",
202+
},
203+
nil,
204+
)
197205
metricDeviceBytesRead = prometheus.NewDesc(
198206
"smartctl_device_bytes_read",
199207
"",

smartctl.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ func (smart *SMARTctl) Collect() {
109109
smart.mineNvmeCriticalWarning()
110110
smart.mineNvmeMediaErrors()
111111
smart.mineNvmeNumErrLogEntries()
112+
smart.mineNvmeVolatileMemoryBackupFailed()
112113
smart.mineNvmeBytesRead()
113114
smart.mineNvmeBytesWritten()
114115
}
@@ -382,6 +383,21 @@ func (smart *SMARTctl) mineNvmeNumErrLogEntries() {
382383
)
383384
}
384385

386+
func (smart *SMARTctl) mineNvmeVolatileMemoryBackupFailed() {
387+
nvmeStatus := smart.json.Get("smart_status.nvme")
388+
if nvmeStatus.Exists() {
389+
volatileMemoryBackupFailed := nvmeStatus.Get("volatile_memory_backup_failed")
390+
if volatileMemoryBackupFailed.Exists() {
391+
smart.ch <- prometheus.MustNewConstMetric(
392+
metricDeviceVolatileMemoryBackupFailed,
393+
prometheus.CounterValue,
394+
volatileMemoryBackupFailed.Float(),
395+
smart.device.device,
396+
)
397+
}
398+
}
399+
}
400+
385401
// https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-1.0d-2023.12.28-Ratified.pdf
386402
// 4.1.4.2 SMART / Health Information (02h)
387403
// The SMART / Health Information log page is as defined in the NVM Express Base Specification. For the
@@ -472,7 +488,7 @@ func (smart *SMARTctl) mineSmartStatus() {
472488
if smartStatus.Exists() {
473489
smart.ch <- prometheus.MustNewConstMetric(
474490
metricDeviceSmartStatus,
475-
prometheus.GaugeValue,
491+
prometheus.CounterValue,
476492
smartStatus.Get("passed").Float(),
477493
smart.device.device,
478494
)

0 commit comments

Comments
 (0)