From 637ad4223bf91857acb873a4e0f9ad9a0d5c1c3f Mon Sep 17 00:00:00 2001 From: Denys Lemeshko Date: Sun, 18 Apr 2021 22:07:05 +0000 Subject: [PATCH] Critical metrics for SCSI disks added Signed-off-by: Denys Lemeshko --- metrics.go | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ smartctl.go | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/metrics.go b/metrics.go index 27cf0cc..09039f6 100644 --- a/metrics.go +++ b/metrics.go @@ -271,4 +271,59 @@ var ( }, nil, ) + metricSCSIGrownDefectList = prometheus.NewDesc( + "smartctl_scsi_grown_defect_list", + "Device SCSI grown defect list counter", + []string{ + "device", + "model_family", + "model_name", + "serial_number", + }, + nil, + ) + metricReadErrorsCorrectedByRereadsRewrites = prometheus.NewDesc( + "smartctl_read_errors_corrected_by_rereads_rewrites", + "Read Errors Corrected by ReReads/ReWrites", + []string{ + "device", + "model_family", + "model_name", + "serial_number", + }, + nil, + ) + metricReadTotalUncorrectedErrors = prometheus.NewDesc( + "smartctl_read_total_uncorrected_errors", + "Read Total Uncorrected Errors", + []string{ + "device", + "model_family", + "model_name", + "serial_number", + }, + nil, + ) + metricWriteErrorsCorrectedByRereadsRewrites = prometheus.NewDesc( + "smartctl_write_errors_corrected_by_rereads_rewrites", + "Write Errors Corrected by ReReads/ReWrites", + []string{ + "device", + "model_family", + "model_name", + "serial_number", + }, + nil, + ) + metricWriteTotalUncorrectedErrors = prometheus.NewDesc( + "smartctl_write_total_uncorrected_errors", + "Write Total Uncorrected Errors", + []string{ + "device", + "model_family", + "model_name", + "serial_number", + }, + nil, + ) ) diff --git a/smartctl.go b/smartctl.go index 7ffb25e..1f23b95 100644 --- a/smartctl.go +++ b/smartctl.go @@ -81,7 +81,8 @@ func (smart *SMARTctl) Collect() { smart.mineBytesRead() smart.mineBytesWritten() smart.mineSmartStatus() - + smart.mineSCSIGrownDefectList() + smart.mineSCSIErrorCounterLog() } func (smart *SMARTctl) mineExitStatus() { @@ -435,3 +436,60 @@ func (smart *SMARTctl) mineDeviceERC() { ) } } + +func (smart *SMARTctl) mineSCSIGrownDefectList() { + scsi_grown_defect_list := smart.json.Get("scsi_grown_defect_list") + if scsi_grown_defect_list.Exists() { + smart.ch <- prometheus.MustNewConstMetric( + metricSCSIGrownDefectList, + prometheus.CounterValue, + scsi_grown_defect_list.Float(), + smart.device.device, + smart.device.family, + smart.device.model, + smart.device.serial, + ) + } +} + +func (smart *SMARTctl) mineSCSIErrorCounterLog() { + SCSIHealth := smart.json.Get("scsi_error_counter_log") + if SCSIHealth.Exists() { + smart.ch <- prometheus.MustNewConstMetric( + metricReadErrorsCorrectedByRereadsRewrites, + prometheus.CounterValue, + SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(), + smart.device.device, + smart.device.family, + smart.device.model, + smart.device.serial, + ) + smart.ch <- prometheus.MustNewConstMetric( + metricReadTotalUncorrectedErrors, + prometheus.CounterValue, + SCSIHealth.Get("read.total_uncorrected_errors").Float(), + smart.device.device, + smart.device.family, + smart.device.model, + smart.device.serial, + ) + smart.ch <- prometheus.MustNewConstMetric( + metricWriteErrorsCorrectedByRereadsRewrites, + prometheus.CounterValue, + SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(), + smart.device.device, + smart.device.family, + smart.device.model, + smart.device.serial, + ) + smart.ch <- prometheus.MustNewConstMetric( + metricWriteTotalUncorrectedErrors, + prometheus.CounterValue, + SCSIHealth.Get("write.total_uncorrected_errors").Float(), + smart.device.device, + smart.device.family, + smart.device.model, + smart.device.serial, + ) + } +}