mirror of
https://github.com/prometheus-community/smartctl_exporter.git
synced 2024-11-23 01:43:07 +01:00
feat: Better SCSI/SAS support
Fix the following metrics that were exported as zero because the exporter did not know how to read them for SCSI devices: - smartctl_device_bytes_read - smartctl_device_bytes_written - smartctl_device_power_cycle_count New metrics: - smartctl_read_errors_corrected_by_eccdelayed - smartctl_read_errors_corrected_by_eccfast - smartctl_write_errors_corrected_by_eccdelayed - smartctl_write_errors_corrected_by_eccfast Fix labels: - smartctl_device{model_name} is now populated for SCSI/SAS, using scsi_model_name. New labels: - smartctl_device{} gains: scsi_product,scsi_revision,scsi_vendor,scsi_version Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com>
This commit is contained in:
parent
d90594ac23
commit
9113c6cf0f
2 changed files with 115 additions and 0 deletions
37
metrics.go
37
metrics.go
|
@ -44,6 +44,11 @@ var (
|
||||||
"ata_version",
|
"ata_version",
|
||||||
"sata_version",
|
"sata_version",
|
||||||
"form_factor",
|
"form_factor",
|
||||||
|
// scsi_model_name is mapped into model_name
|
||||||
|
"scsi_vendor",
|
||||||
|
"scsi_product",
|
||||||
|
"scsi_revision",
|
||||||
|
"scsi_version",
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
|
@ -293,6 +298,22 @@ var (
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
|
metricReadErrorsCorrectedByEccFast = prometheus.NewDesc(
|
||||||
|
"smartctl_read_errors_corrected_by_eccfast",
|
||||||
|
"Read Errors Corrected by ECC Fast",
|
||||||
|
[]string{
|
||||||
|
"device",
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
metricReadErrorsCorrectedByEccDelayed = prometheus.NewDesc(
|
||||||
|
"smartctl_read_errors_corrected_by_eccdelayed",
|
||||||
|
"Read Errors Corrected by ECC Delayed",
|
||||||
|
[]string{
|
||||||
|
"device",
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
metricReadTotalUncorrectedErrors = prometheus.NewDesc(
|
metricReadTotalUncorrectedErrors = prometheus.NewDesc(
|
||||||
"smartctl_read_total_uncorrected_errors",
|
"smartctl_read_total_uncorrected_errors",
|
||||||
"Read Total Uncorrected Errors",
|
"Read Total Uncorrected Errors",
|
||||||
|
@ -309,6 +330,22 @@ var (
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
|
metricWriteErrorsCorrectedByEccFast = prometheus.NewDesc(
|
||||||
|
"smartctl_write_errors_corrected_by_eccfast",
|
||||||
|
"Write Errors Corrected by ECC Fast",
|
||||||
|
[]string{
|
||||||
|
"device",
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
metricWriteErrorsCorrectedByEccDelayed = prometheus.NewDesc(
|
||||||
|
"smartctl_write_errors_corrected_by_eccdelayed",
|
||||||
|
"Write Errors Corrected by ECC Delayed",
|
||||||
|
[]string{
|
||||||
|
"device",
|
||||||
|
},
|
||||||
|
nil,
|
||||||
|
)
|
||||||
metricWriteTotalUncorrectedErrors = prometheus.NewDesc(
|
metricWriteTotalUncorrectedErrors = prometheus.NewDesc(
|
||||||
"smartctl_write_total_uncorrected_errors",
|
"smartctl_write_total_uncorrected_errors",
|
||||||
"Write Total Uncorrected Errors",
|
"Write Total Uncorrected Errors",
|
||||||
|
|
78
smartctl.go
78
smartctl.go
|
@ -47,6 +47,8 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
|
||||||
var model_name string
|
var model_name string
|
||||||
if obj := json.Get("model_name"); obj.Exists() {
|
if obj := json.Get("model_name"); obj.Exists() {
|
||||||
model_name = obj.String()
|
model_name = obj.String()
|
||||||
|
} else if obj := json.Get("scsi_model_name"); obj.Exists() {
|
||||||
|
model_name = obj.String()
|
||||||
}
|
}
|
||||||
// If the drive returns an empty model name, replace that with unknown.
|
// If the drive returns an empty model name, replace that with unknown.
|
||||||
if model_name == "" {
|
if model_name == "" {
|
||||||
|
@ -102,6 +104,8 @@ func (smart *SMARTctl) Collect() {
|
||||||
if smart.device.interface_ == "scsi" {
|
if smart.device.interface_ == "scsi" {
|
||||||
smart.mineSCSIGrownDefectList()
|
smart.mineSCSIGrownDefectList()
|
||||||
smart.mineSCSIErrorCounterLog()
|
smart.mineSCSIErrorCounterLog()
|
||||||
|
smart.mineSCSIBytesRead()
|
||||||
|
smart.mineSCSIBytesWritten()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,6 +134,11 @@ func (smart *SMARTctl) mineDevice() {
|
||||||
smart.json.Get("ata_version.string").String(),
|
smart.json.Get("ata_version.string").String(),
|
||||||
smart.json.Get("sata_version.string").String(),
|
smart.json.Get("sata_version.string").String(),
|
||||||
smart.json.Get("form_factor.name").String(),
|
smart.json.Get("form_factor.name").String(),
|
||||||
|
// scsi_model_name is mapped into model_name
|
||||||
|
smart.json.Get("scsi_vendor").String(),
|
||||||
|
smart.json.Get("scsi_product").String(),
|
||||||
|
smart.json.Get("scsi_revision").String(),
|
||||||
|
smart.json.Get("scsi_version").String(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,6 +182,7 @@ func (smart *SMARTctl) mineBlockSize() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineInterfaceSpeed() {
|
func (smart *SMARTctl) mineInterfaceSpeed() {
|
||||||
|
// TODO: Support scsi_sas_port_[01].phy_N.negotiated_logical_link_rate
|
||||||
iSpeed := smart.json.Get("interface_speed")
|
iSpeed := smart.json.Get("interface_speed")
|
||||||
if iSpeed.Exists() {
|
if iSpeed.Exists() {
|
||||||
for _, speedType := range []string{"max", "current"} {
|
for _, speedType := range []string{"max", "current"} {
|
||||||
|
@ -253,6 +263,7 @@ func (smart *SMARTctl) mineRotationRate() {
|
||||||
|
|
||||||
func (smart *SMARTctl) mineTemperatures() {
|
func (smart *SMARTctl) mineTemperatures() {
|
||||||
temperatures := smart.json.Get("temperature")
|
temperatures := smart.json.Get("temperature")
|
||||||
|
// TODO: Implement scsi_environmental_reports
|
||||||
if temperatures.Exists() {
|
if temperatures.Exists() {
|
||||||
temperatures.ForEach(func(key, value gjson.Result) bool {
|
temperatures.ForEach(func(key, value gjson.Result) bool {
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
@ -279,6 +290,18 @@ func (smart *SMARTctl) minePowerCycleCount() {
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SCSI
|
||||||
|
powerCycleCount = smart.json.Get("scsi_start_stop_cycle_counter.accumulated_start_stop_cycles")
|
||||||
|
if powerCycleCount.Exists() {
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricDevicePowerCycleCount,
|
||||||
|
prometheus.CounterValue,
|
||||||
|
powerCycleCount.Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineDeviceSCTStatus() {
|
func (smart *SMARTctl) mineDeviceSCTStatus() {
|
||||||
|
@ -379,6 +402,36 @@ func (smart *SMARTctl) mineNvmeBytesWritten() {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (smart *SMARTctl) mineSCSIBytesRead() {
|
||||||
|
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
||||||
|
if SCSIHealth.Exists() {
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricDeviceBytesRead,
|
||||||
|
prometheus.CounterValue,
|
||||||
|
// This value is reported by SMARTctl in GB [10^9].
|
||||||
|
// It is possible that some drives mis-report the value, but
|
||||||
|
// that is not the responsibility of the exporter or smartctl
|
||||||
|
SCSIHealth.Get("read.gigabytes_processed").Float()*1e9,
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (smart *SMARTctl) mineSCSIBytesWritten() {
|
||||||
|
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
||||||
|
if SCSIHealth.Exists() {
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricDeviceBytesWritten,
|
||||||
|
prometheus.CounterValue,
|
||||||
|
// This value is reported by SMARTctl in GB [10^9].
|
||||||
|
// It is possible that some drives mis-report the value, but
|
||||||
|
// that is not the responsibility of the exporter or smartctl
|
||||||
|
SCSIHealth.Get("write.gigabytes_processed").Float()*1e9,
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineSmartStatus() {
|
func (smart *SMARTctl) mineSmartStatus() {
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricDeviceSmartStatus,
|
metricDeviceSmartStatus,
|
||||||
|
@ -504,6 +557,18 @@ func (smart *SMARTctl) mineSCSIErrorCounterLog() {
|
||||||
SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(),
|
SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(),
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
)
|
)
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricReadErrorsCorrectedByEccFast,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
SCSIHealth.Get("read.errors_corrected_by_eccfast").Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricReadErrorsCorrectedByEccDelayed,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
SCSIHealth.Get("read.errors_corrected_by_eccdelayed").Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricReadTotalUncorrectedErrors,
|
metricReadTotalUncorrectedErrors,
|
||||||
prometheus.GaugeValue,
|
prometheus.GaugeValue,
|
||||||
|
@ -516,11 +581,24 @@ func (smart *SMARTctl) mineSCSIErrorCounterLog() {
|
||||||
SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(),
|
SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(),
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
)
|
)
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricWriteErrorsCorrectedByEccFast,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
SCSIHealth.Get("write.errors_corrected_by_eccfast").Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
|
metricWriteErrorsCorrectedByEccDelayed,
|
||||||
|
prometheus.GaugeValue,
|
||||||
|
SCSIHealth.Get("write.errors_corrected_by_eccdelayed").Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricWriteTotalUncorrectedErrors,
|
metricWriteTotalUncorrectedErrors,
|
||||||
prometheus.GaugeValue,
|
prometheus.GaugeValue,
|
||||||
SCSIHealth.Get("write.total_uncorrected_errors").Float(),
|
SCSIHealth.Get("write.total_uncorrected_errors").Float(),
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
)
|
)
|
||||||
|
// TODO: Should we also export the verify category?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue