* split block mine to mineBlockSize() from mineCapacity()

* remove redundant meta labels from SCSI metrics
* added `smartctl_device_nvme_capacity_bytes` metric
* for some devices, such as 2.5" NVMe Intel & Micron the `family` field may be empty

The `.user_capacity` exists only when NVMe have single namespace. Otherwise,
for NVMe deivces with multiple namespaces, when device name used witout
namespace number (exporter case) `.user_capacity` will be absent

```
smartctl --info --health --attributes \
--tolerance=verypermissive --nocheck=standby --format=brief --log=error \
/dev/nvme11 --json | jq '.user_capacity'

null

smartctl --info --health --attributes \
--tolerance=verypermissive --nocheck=standby --format=brief --log=error \
/dev/nvme11 --json | jq '.nvme_total_capacity'

3840755982336
```

Signed-off-by: Konstantin Shalygin <k0ste@k0ste.ru>
This commit is contained in:
Konstantin Shalygin 2023-08-24 14:50:04 +03:00
parent 8ab045d1be
commit 1ab518e696
No known key found for this signature in database
GPG key ID: 3C160886BF25D873
2 changed files with 40 additions and 53 deletions

View file

@ -69,6 +69,14 @@ var (
}, },
nil, nil,
) )
metricDeviceTotalCapacityBytes = prometheus.NewDesc(
"smartctl_device_nvme_capacity_bytes",
"NVMe device total capacity bytes",
[]string{
"device",
},
nil,
)
metricDeviceBlockSize = prometheus.NewDesc( metricDeviceBlockSize = prometheus.NewDesc(
"smartctl_device_block_size", "smartctl_device_block_size",
"Device block size", "Device block size",
@ -274,9 +282,6 @@ var (
"Device SCSI grown defect list counter", "Device SCSI grown defect list counter",
[]string{ []string{
"device", "device",
"model_family",
"model_name",
"serial_number",
}, },
nil, nil,
) )
@ -285,9 +290,6 @@ var (
"Read Errors Corrected by ReReads/ReWrites", "Read Errors Corrected by ReReads/ReWrites",
[]string{ []string{
"device", "device",
"model_family",
"model_name",
"serial_number",
}, },
nil, nil,
) )
@ -296,9 +298,6 @@ var (
"Read Total Uncorrected Errors", "Read Total Uncorrected Errors",
[]string{ []string{
"device", "device",
"model_family",
"model_name",
"serial_number",
}, },
nil, nil,
) )
@ -307,9 +306,6 @@ var (
"Write Errors Corrected by ReReads/ReWrites", "Write Errors Corrected by ReReads/ReWrites",
[]string{ []string{
"device", "device",
"model_family",
"model_name",
"serial_number",
}, },
nil, nil,
) )
@ -318,9 +314,6 @@ var (
"Write Total Uncorrected Errors", "Write Total Uncorrected Errors",
[]string{ []string{
"device", "device",
"model_family",
"model_name",
"serial_number",
}, },
nil, nil,
) )

View file

@ -48,7 +48,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
device: SMARTDevice{ device: SMARTDevice{
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"), device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
serial: strings.TrimSpace(json.Get("serial_number").String()), serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(json.Get("model_family").String()), family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(json.Get("model_name").String()), model: strings.TrimSpace(json.Get("model_name").String()),
}, },
} }
@ -60,6 +60,7 @@ func (smart *SMARTctl) Collect() {
smart.mineExitStatus() smart.mineExitStatus()
smart.mineDevice() smart.mineDevice()
smart.mineCapacity() smart.mineCapacity()
smart.mineBlockSize()
smart.mineInterfaceSpeed() smart.mineInterfaceSpeed()
smart.mineDeviceAttribute() smart.mineDeviceAttribute()
smart.minePowerOnSeconds() smart.minePowerOnSeconds()
@ -71,14 +72,14 @@ func (smart *SMARTctl) Collect() {
smart.mineDeviceErrorLog() smart.mineDeviceErrorLog()
smart.mineDeviceSelfTestLog() smart.mineDeviceSelfTestLog()
smart.mineDeviceERC() smart.mineDeviceERC()
smart.minePercentageUsed() smart.mineNvmePercentageUsed()
smart.mineAvailableSpare() smart.mineNvmeAvailableSpare()
smart.mineAvailableSpareThreshold() smart.mineNvmeAvailableSpareThreshold()
smart.mineCriticalWarning() smart.mineNvmeCriticalWarning()
smart.mineMediaErrors() smart.mineNvmeMediaErrors()
smart.mineNumErrLogEntries() smart.mineNvmeNumErrLogEntries()
smart.mineBytesRead() smart.mineNvmeBytesRead()
smart.mineBytesWritten() smart.mineNvmeBytesWritten()
smart.mineSmartStatus() smart.mineSmartStatus()
smart.mineSCSIGrownDefectList() smart.mineSCSIGrownDefectList()
smart.mineSCSIErrorCounterLog() smart.mineSCSIErrorCounterLog()
@ -114,19 +115,30 @@ func (smart *SMARTctl) mineDevice() {
} }
func (smart *SMARTctl) mineCapacity() { func (smart *SMARTctl) mineCapacity() {
capacity := smart.json.Get("user_capacity") // The user_capacity exists only when NVMe have single namespace. Otherwise,
// for NVMe devices with multiple namespaces, when device name used without
// namespace number (exporter case) user_capacity will be absent
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceCapacityBlocks, metricDeviceCapacityBlocks,
prometheus.GaugeValue, prometheus.GaugeValue,
capacity.Get("blocks").Float(), smart.json.Get("user_capacity.blocks").Float(),
smart.device.device, smart.device.device,
) )
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceCapacityBytes, metricDeviceCapacityBytes,
prometheus.GaugeValue, prometheus.GaugeValue,
capacity.Get("bytes").Float(), smart.json.Get("user_capacity.bytes").Float(),
smart.device.device, smart.device.device,
) )
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceTotalCapacityBytes,
prometheus.GaugeValue,
smart.json.Get("nvme_total_capacity").Float(),
smart.device.device,
)
}
func (smart *SMARTctl) mineBlockSize() {
for _, blockType := range []string{"logical", "physical"} { for _, blockType := range []string{"logical", "physical"} {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBlockSize, metricDeviceBlockSize,
@ -245,7 +257,7 @@ func (smart *SMARTctl) mineDeviceSCTStatus() {
} }
} }
func (smart *SMARTctl) minePercentageUsed() { func (smart *SMARTctl) mineNvmePercentageUsed() {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDevicePercentageUsed, metricDevicePercentageUsed,
prometheus.CounterValue, prometheus.CounterValue,
@ -254,7 +266,7 @@ func (smart *SMARTctl) minePercentageUsed() {
) )
} }
func (smart *SMARTctl) mineAvailableSpare() { func (smart *SMARTctl) mineNvmeAvailableSpare() {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceAvailableSpare, metricDeviceAvailableSpare,
prometheus.CounterValue, prometheus.CounterValue,
@ -263,7 +275,7 @@ func (smart *SMARTctl) mineAvailableSpare() {
) )
} }
func (smart *SMARTctl) mineAvailableSpareThreshold() { func (smart *SMARTctl) mineNvmeAvailableSpareThreshold() {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceAvailableSpareThreshold, metricDeviceAvailableSpareThreshold,
prometheus.CounterValue, prometheus.CounterValue,
@ -272,7 +284,7 @@ func (smart *SMARTctl) mineAvailableSpareThreshold() {
) )
} }
func (smart *SMARTctl) mineCriticalWarning() { func (smart *SMARTctl) mineNvmeCriticalWarning() {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceCriticalWarning, metricDeviceCriticalWarning,
prometheus.CounterValue, prometheus.CounterValue,
@ -281,7 +293,7 @@ func (smart *SMARTctl) mineCriticalWarning() {
) )
} }
func (smart *SMARTctl) mineMediaErrors() { func (smart *SMARTctl) mineNvmeMediaErrors() {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceMediaErrors, metricDeviceMediaErrors,
prometheus.CounterValue, prometheus.CounterValue,
@ -290,7 +302,7 @@ func (smart *SMARTctl) mineMediaErrors() {
) )
} }
func (smart *SMARTctl) mineNumErrLogEntries() { func (smart *SMARTctl) mineNvmeNumErrLogEntries() {
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceNumErrLogEntries, metricDeviceNumErrLogEntries,
prometheus.CounterValue, prometheus.CounterValue,
@ -299,7 +311,7 @@ func (smart *SMARTctl) mineNumErrLogEntries() {
) )
} }
func (smart *SMARTctl) mineBytesRead() { func (smart *SMARTctl) mineNvmeBytesRead() {
blockSize := smart.json.Get("logical_block_size").Float() blockSize := smart.json.Get("logical_block_size").Float()
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBytesRead, metricDeviceBytesRead,
@ -311,7 +323,7 @@ func (smart *SMARTctl) mineBytesRead() {
) )
} }
func (smart *SMARTctl) mineBytesWritten() { func (smart *SMARTctl) mineNvmeBytesWritten() {
blockSize := smart.json.Get("logical_block_size").Float() blockSize := smart.json.Get("logical_block_size").Float()
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBytesWritten, metricDeviceBytesWritten,
@ -346,9 +358,6 @@ func (smart *SMARTctl) mineDeviceStatistics() {
prometheus.GaugeValue, prometheus.GaugeValue,
statistic.Get("value").Float(), statistic.Get("value").Float(),
smart.device.device, smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
table, table,
strings.TrimSpace(statistic.Get("name").String()), strings.TrimSpace(statistic.Get("name").String()),
strings.TrimSpace(statistic.Get("flags.string").String()), strings.TrimSpace(statistic.Get("flags.string").String()),
@ -438,9 +447,6 @@ func (smart *SMARTctl) mineSCSIGrownDefectList() {
prometheus.GaugeValue, prometheus.GaugeValue,
scsi_grown_defect_list.Float(), scsi_grown_defect_list.Float(),
smart.device.device, smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
) )
} }
} }
@ -453,36 +459,24 @@ func (smart *SMARTctl) mineSCSIErrorCounterLog() {
prometheus.GaugeValue, prometheus.GaugeValue,
SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(), SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(),
smart.device.device, smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
) )
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricReadTotalUncorrectedErrors, metricReadTotalUncorrectedErrors,
prometheus.GaugeValue, prometheus.GaugeValue,
SCSIHealth.Get("read.total_uncorrected_errors").Float(), SCSIHealth.Get("read.total_uncorrected_errors").Float(),
smart.device.device, smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
) )
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricWriteErrorsCorrectedByRereadsRewrites, metricWriteErrorsCorrectedByRereadsRewrites,
prometheus.GaugeValue, prometheus.GaugeValue,
SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(), SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(),
smart.device.device, smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
) )
smart.ch <- prometheus.MustNewConstMetric( smart.ch <- prometheus.MustNewConstMetric(
metricWriteTotalUncorrectedErrors, metricWriteTotalUncorrectedErrors,
prometheus.GaugeValue, prometheus.GaugeValue,
SCSIHealth.Get("write.total_uncorrected_errors").Float(), SCSIHealth.Get("write.total_uncorrected_errors").Float(),
smart.device.device, smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
) )
} }
} }