From 1ab518e69623937f7054b4138d0dc8f221476796 Mon Sep 17 00:00:00 2001 From: Konstantin Shalygin Date: Thu, 24 Aug 2023 14:50:04 +0300 Subject: [PATCH] * split block mine to mineBlockSize() from mineCapacity() * remove redundant meta labels from SCSI metrics * added `smartctl_device_nvme_capacity_bytes` metric * for some devices, such as 2.5" NVMe Intel & Micron the `family` field may be empty The `.user_capacity` exists only when NVMe have single namespace. Otherwise, for NVMe deivces with multiple namespaces, when device name used witout namespace number (exporter case) `.user_capacity` will be absent ``` smartctl --info --health --attributes \ --tolerance=verypermissive --nocheck=standby --format=brief --log=error \ /dev/nvme11 --json | jq '.user_capacity' null smartctl --info --health --attributes \ --tolerance=verypermissive --nocheck=standby --format=brief --log=error \ /dev/nvme11 --json | jq '.nvme_total_capacity' 3840755982336 ``` Signed-off-by: Konstantin Shalygin --- metrics.go | 23 ++++++------------ smartctl.go | 70 ++++++++++++++++++++++++----------------------------- 2 files changed, 40 insertions(+), 53 deletions(-) diff --git a/metrics.go b/metrics.go index 91c16a1..c675fd0 100644 --- a/metrics.go +++ b/metrics.go @@ -69,6 +69,14 @@ var ( }, nil, ) + metricDeviceTotalCapacityBytes = prometheus.NewDesc( + "smartctl_device_nvme_capacity_bytes", + "NVMe device total capacity bytes", + []string{ + "device", + }, + nil, + ) metricDeviceBlockSize = prometheus.NewDesc( "smartctl_device_block_size", "Device block size", @@ -274,9 +282,6 @@ var ( "Device SCSI grown defect list counter", []string{ "device", - "model_family", - "model_name", - "serial_number", }, nil, ) @@ -285,9 +290,6 @@ var ( "Read Errors Corrected by ReReads/ReWrites", []string{ "device", - "model_family", - "model_name", - "serial_number", }, nil, ) @@ -296,9 +298,6 @@ var ( "Read Total Uncorrected Errors", []string{ "device", - "model_family", - "model_name", - "serial_number", }, nil, ) @@ -307,9 +306,6 @@ var ( "Write Errors Corrected by ReReads/ReWrites", []string{ "device", - "model_family", - "model_name", - "serial_number", }, nil, ) @@ -318,9 +314,6 @@ var ( "Write Total Uncorrected Errors", []string{ "device", - "model_family", - "model_name", - "serial_number", }, nil, ) diff --git a/smartctl.go b/smartctl.go index 792d0c7..23a374b 100644 --- a/smartctl.go +++ b/smartctl.go @@ -48,7 +48,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr device: SMARTDevice{ device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"), serial: strings.TrimSpace(json.Get("serial_number").String()), - family: strings.TrimSpace(json.Get("model_family").String()), + family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")), model: strings.TrimSpace(json.Get("model_name").String()), }, } @@ -60,6 +60,7 @@ func (smart *SMARTctl) Collect() { smart.mineExitStatus() smart.mineDevice() smart.mineCapacity() + smart.mineBlockSize() smart.mineInterfaceSpeed() smart.mineDeviceAttribute() smart.minePowerOnSeconds() @@ -71,14 +72,14 @@ func (smart *SMARTctl) Collect() { smart.mineDeviceErrorLog() smart.mineDeviceSelfTestLog() smart.mineDeviceERC() - smart.minePercentageUsed() - smart.mineAvailableSpare() - smart.mineAvailableSpareThreshold() - smart.mineCriticalWarning() - smart.mineMediaErrors() - smart.mineNumErrLogEntries() - smart.mineBytesRead() - smart.mineBytesWritten() + smart.mineNvmePercentageUsed() + smart.mineNvmeAvailableSpare() + smart.mineNvmeAvailableSpareThreshold() + smart.mineNvmeCriticalWarning() + smart.mineNvmeMediaErrors() + smart.mineNvmeNumErrLogEntries() + smart.mineNvmeBytesRead() + smart.mineNvmeBytesWritten() smart.mineSmartStatus() smart.mineSCSIGrownDefectList() smart.mineSCSIErrorCounterLog() @@ -114,19 +115,30 @@ func (smart *SMARTctl) mineDevice() { } func (smart *SMARTctl) mineCapacity() { - capacity := smart.json.Get("user_capacity") + // The user_capacity exists only when NVMe have single namespace. Otherwise, + // for NVMe devices with multiple namespaces, when device name used without + // namespace number (exporter case) user_capacity will be absent smart.ch <- prometheus.MustNewConstMetric( metricDeviceCapacityBlocks, prometheus.GaugeValue, - capacity.Get("blocks").Float(), + smart.json.Get("user_capacity.blocks").Float(), smart.device.device, ) smart.ch <- prometheus.MustNewConstMetric( metricDeviceCapacityBytes, prometheus.GaugeValue, - capacity.Get("bytes").Float(), + smart.json.Get("user_capacity.bytes").Float(), smart.device.device, ) + smart.ch <- prometheus.MustNewConstMetric( + metricDeviceTotalCapacityBytes, + prometheus.GaugeValue, + smart.json.Get("nvme_total_capacity").Float(), + smart.device.device, + ) +} + +func (smart *SMARTctl) mineBlockSize() { for _, blockType := range []string{"logical", "physical"} { smart.ch <- prometheus.MustNewConstMetric( metricDeviceBlockSize, @@ -245,7 +257,7 @@ func (smart *SMARTctl) mineDeviceSCTStatus() { } } -func (smart *SMARTctl) minePercentageUsed() { +func (smart *SMARTctl) mineNvmePercentageUsed() { smart.ch <- prometheus.MustNewConstMetric( metricDevicePercentageUsed, prometheus.CounterValue, @@ -254,7 +266,7 @@ func (smart *SMARTctl) minePercentageUsed() { ) } -func (smart *SMARTctl) mineAvailableSpare() { +func (smart *SMARTctl) mineNvmeAvailableSpare() { smart.ch <- prometheus.MustNewConstMetric( metricDeviceAvailableSpare, prometheus.CounterValue, @@ -263,7 +275,7 @@ func (smart *SMARTctl) mineAvailableSpare() { ) } -func (smart *SMARTctl) mineAvailableSpareThreshold() { +func (smart *SMARTctl) mineNvmeAvailableSpareThreshold() { smart.ch <- prometheus.MustNewConstMetric( metricDeviceAvailableSpareThreshold, prometheus.CounterValue, @@ -272,7 +284,7 @@ func (smart *SMARTctl) mineAvailableSpareThreshold() { ) } -func (smart *SMARTctl) mineCriticalWarning() { +func (smart *SMARTctl) mineNvmeCriticalWarning() { smart.ch <- prometheus.MustNewConstMetric( metricDeviceCriticalWarning, prometheus.CounterValue, @@ -281,7 +293,7 @@ func (smart *SMARTctl) mineCriticalWarning() { ) } -func (smart *SMARTctl) mineMediaErrors() { +func (smart *SMARTctl) mineNvmeMediaErrors() { smart.ch <- prometheus.MustNewConstMetric( metricDeviceMediaErrors, prometheus.CounterValue, @@ -290,7 +302,7 @@ func (smart *SMARTctl) mineMediaErrors() { ) } -func (smart *SMARTctl) mineNumErrLogEntries() { +func (smart *SMARTctl) mineNvmeNumErrLogEntries() { smart.ch <- prometheus.MustNewConstMetric( metricDeviceNumErrLogEntries, prometheus.CounterValue, @@ -299,7 +311,7 @@ func (smart *SMARTctl) mineNumErrLogEntries() { ) } -func (smart *SMARTctl) mineBytesRead() { +func (smart *SMARTctl) mineNvmeBytesRead() { blockSize := smart.json.Get("logical_block_size").Float() smart.ch <- prometheus.MustNewConstMetric( metricDeviceBytesRead, @@ -311,7 +323,7 @@ func (smart *SMARTctl) mineBytesRead() { ) } -func (smart *SMARTctl) mineBytesWritten() { +func (smart *SMARTctl) mineNvmeBytesWritten() { blockSize := smart.json.Get("logical_block_size").Float() smart.ch <- prometheus.MustNewConstMetric( metricDeviceBytesWritten, @@ -346,9 +358,6 @@ func (smart *SMARTctl) mineDeviceStatistics() { prometheus.GaugeValue, statistic.Get("value").Float(), smart.device.device, - smart.device.family, - smart.device.model, - smart.device.serial, table, strings.TrimSpace(statistic.Get("name").String()), strings.TrimSpace(statistic.Get("flags.string").String()), @@ -438,9 +447,6 @@ func (smart *SMARTctl) mineSCSIGrownDefectList() { prometheus.GaugeValue, scsi_grown_defect_list.Float(), smart.device.device, - smart.device.family, - smart.device.model, - smart.device.serial, ) } } @@ -453,36 +459,24 @@ func (smart *SMARTctl) mineSCSIErrorCounterLog() { prometheus.GaugeValue, SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(), smart.device.device, - smart.device.family, - smart.device.model, - smart.device.serial, ) smart.ch <- prometheus.MustNewConstMetric( metricReadTotalUncorrectedErrors, prometheus.GaugeValue, SCSIHealth.Get("read.total_uncorrected_errors").Float(), smart.device.device, - smart.device.family, - smart.device.model, - smart.device.serial, ) smart.ch <- prometheus.MustNewConstMetric( metricWriteErrorsCorrectedByRereadsRewrites, prometheus.GaugeValue, SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(), smart.device.device, - smart.device.family, - smart.device.model, - smart.device.serial, ) smart.ch <- prometheus.MustNewConstMetric( metricWriteTotalUncorrectedErrors, prometheus.GaugeValue, SCSIHealth.Get("write.total_uncorrected_errors").Float(), smart.device.device, - smart.device.family, - smart.device.model, - smart.device.serial, ) } }