mirror of
https://github.com/prometheus-community/smartctl_exporter.git
synced 2024-12-21 02:21:55 +01:00
feat: Better SCSI/SAS support
Fix the following metrics that were exported as zero because the exporter did not know how to read them for SCSI devices: - smartctl_device_bytes_read - smartctl_device_bytes_written - smartctl_device_power_cycle_count New metrics: - smartctl_read_errors_corrected_by_eccdelayed - smartctl_read_errors_corrected_by_eccfast - smartctl_write_errors_corrected_by_eccdelayed - smartctl_write_errors_corrected_by_eccfast Fix labels: - smartctl_device{model_name} is now populated for SCSI/SAS, using scsi_model_name. New labels: - smartctl_device{} gains: scsi_product,scsi_revision,scsi_vendor,scsi_version Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com>
This commit is contained in:
parent
d90594ac23
commit
9113c6cf0f
2 changed files with 115 additions and 0 deletions
37
metrics.go
37
metrics.go
|
@ -44,6 +44,11 @@ var (
|
|||
"ata_version",
|
||||
"sata_version",
|
||||
"form_factor",
|
||||
// scsi_model_name is mapped into model_name
|
||||
"scsi_vendor",
|
||||
"scsi_product",
|
||||
"scsi_revision",
|
||||
"scsi_version",
|
||||
},
|
||||
nil,
|
||||
)
|
||||
|
@ -293,6 +298,22 @@ var (
|
|||
},
|
||||
nil,
|
||||
)
|
||||
metricReadErrorsCorrectedByEccFast = prometheus.NewDesc(
|
||||
"smartctl_read_errors_corrected_by_eccfast",
|
||||
"Read Errors Corrected by ECC Fast",
|
||||
[]string{
|
||||
"device",
|
||||
},
|
||||
nil,
|
||||
)
|
||||
metricReadErrorsCorrectedByEccDelayed = prometheus.NewDesc(
|
||||
"smartctl_read_errors_corrected_by_eccdelayed",
|
||||
"Read Errors Corrected by ECC Delayed",
|
||||
[]string{
|
||||
"device",
|
||||
},
|
||||
nil,
|
||||
)
|
||||
metricReadTotalUncorrectedErrors = prometheus.NewDesc(
|
||||
"smartctl_read_total_uncorrected_errors",
|
||||
"Read Total Uncorrected Errors",
|
||||
|
@ -309,6 +330,22 @@ var (
|
|||
},
|
||||
nil,
|
||||
)
|
||||
metricWriteErrorsCorrectedByEccFast = prometheus.NewDesc(
|
||||
"smartctl_write_errors_corrected_by_eccfast",
|
||||
"Write Errors Corrected by ECC Fast",
|
||||
[]string{
|
||||
"device",
|
||||
},
|
||||
nil,
|
||||
)
|
||||
metricWriteErrorsCorrectedByEccDelayed = prometheus.NewDesc(
|
||||
"smartctl_write_errors_corrected_by_eccdelayed",
|
||||
"Write Errors Corrected by ECC Delayed",
|
||||
[]string{
|
||||
"device",
|
||||
},
|
||||
nil,
|
||||
)
|
||||
metricWriteTotalUncorrectedErrors = prometheus.NewDesc(
|
||||
"smartctl_write_total_uncorrected_errors",
|
||||
"Write Total Uncorrected Errors",
|
||||
|
|
78
smartctl.go
78
smartctl.go
|
@ -47,6 +47,8 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
|
|||
var model_name string
|
||||
if obj := json.Get("model_name"); obj.Exists() {
|
||||
model_name = obj.String()
|
||||
} else if obj := json.Get("scsi_model_name"); obj.Exists() {
|
||||
model_name = obj.String()
|
||||
}
|
||||
// If the drive returns an empty model name, replace that with unknown.
|
||||
if model_name == "" {
|
||||
|
@ -102,6 +104,8 @@ func (smart *SMARTctl) Collect() {
|
|||
if smart.device.interface_ == "scsi" {
|
||||
smart.mineSCSIGrownDefectList()
|
||||
smart.mineSCSIErrorCounterLog()
|
||||
smart.mineSCSIBytesRead()
|
||||
smart.mineSCSIBytesWritten()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,6 +134,11 @@ func (smart *SMARTctl) mineDevice() {
|
|||
smart.json.Get("ata_version.string").String(),
|
||||
smart.json.Get("sata_version.string").String(),
|
||||
smart.json.Get("form_factor.name").String(),
|
||||
// scsi_model_name is mapped into model_name
|
||||
smart.json.Get("scsi_vendor").String(),
|
||||
smart.json.Get("scsi_product").String(),
|
||||
smart.json.Get("scsi_revision").String(),
|
||||
smart.json.Get("scsi_version").String(),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -173,6 +182,7 @@ func (smart *SMARTctl) mineBlockSize() {
|
|||
}
|
||||
|
||||
func (smart *SMARTctl) mineInterfaceSpeed() {
|
||||
// TODO: Support scsi_sas_port_[01].phy_N.negotiated_logical_link_rate
|
||||
iSpeed := smart.json.Get("interface_speed")
|
||||
if iSpeed.Exists() {
|
||||
for _, speedType := range []string{"max", "current"} {
|
||||
|
@ -253,6 +263,7 @@ func (smart *SMARTctl) mineRotationRate() {
|
|||
|
||||
func (smart *SMARTctl) mineTemperatures() {
|
||||
temperatures := smart.json.Get("temperature")
|
||||
// TODO: Implement scsi_environmental_reports
|
||||
if temperatures.Exists() {
|
||||
temperatures.ForEach(func(key, value gjson.Result) bool {
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
|
@ -279,6 +290,18 @@ func (smart *SMARTctl) minePowerCycleCount() {
|
|||
)
|
||||
return
|
||||
}
|
||||
|
||||
// SCSI
|
||||
powerCycleCount = smart.json.Get("scsi_start_stop_cycle_counter.accumulated_start_stop_cycles")
|
||||
if powerCycleCount.Exists() {
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricDevicePowerCycleCount,
|
||||
prometheus.CounterValue,
|
||||
powerCycleCount.Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (smart *SMARTctl) mineDeviceSCTStatus() {
|
||||
|
@ -379,6 +402,36 @@ func (smart *SMARTctl) mineNvmeBytesWritten() {
|
|||
)
|
||||
}
|
||||
|
||||
func (smart *SMARTctl) mineSCSIBytesRead() {
|
||||
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
||||
if SCSIHealth.Exists() {
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricDeviceBytesRead,
|
||||
prometheus.CounterValue,
|
||||
// This value is reported by SMARTctl in GB [10^9].
|
||||
// It is possible that some drives mis-report the value, but
|
||||
// that is not the responsibility of the exporter or smartctl
|
||||
SCSIHealth.Get("read.gigabytes_processed").Float()*1e9,
|
||||
smart.device.device,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func (smart *SMARTctl) mineSCSIBytesWritten() {
|
||||
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
||||
if SCSIHealth.Exists() {
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricDeviceBytesWritten,
|
||||
prometheus.CounterValue,
|
||||
// This value is reported by SMARTctl in GB [10^9].
|
||||
// It is possible that some drives mis-report the value, but
|
||||
// that is not the responsibility of the exporter or smartctl
|
||||
SCSIHealth.Get("write.gigabytes_processed").Float()*1e9,
|
||||
smart.device.device,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func (smart *SMARTctl) mineSmartStatus() {
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricDeviceSmartStatus,
|
||||
|
@ -504,6 +557,18 @@ func (smart *SMARTctl) mineSCSIErrorCounterLog() {
|
|||
SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricReadErrorsCorrectedByEccFast,
|
||||
prometheus.GaugeValue,
|
||||
SCSIHealth.Get("read.errors_corrected_by_eccfast").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricReadErrorsCorrectedByEccDelayed,
|
||||
prometheus.GaugeValue,
|
||||
SCSIHealth.Get("read.errors_corrected_by_eccdelayed").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricReadTotalUncorrectedErrors,
|
||||
prometheus.GaugeValue,
|
||||
|
@ -516,11 +581,24 @@ func (smart *SMARTctl) mineSCSIErrorCounterLog() {
|
|||
SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricWriteErrorsCorrectedByEccFast,
|
||||
prometheus.GaugeValue,
|
||||
SCSIHealth.Get("write.errors_corrected_by_eccfast").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricWriteErrorsCorrectedByEccDelayed,
|
||||
prometheus.GaugeValue,
|
||||
SCSIHealth.Get("write.errors_corrected_by_eccdelayed").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
smart.ch <- prometheus.MustNewConstMetric(
|
||||
metricWriteTotalUncorrectedErrors,
|
||||
prometheus.GaugeValue,
|
||||
SCSIHealth.Get("write.total_uncorrected_errors").Float(),
|
||||
smart.device.device,
|
||||
)
|
||||
// TODO: Should we also export the verify category?
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue