Merge pull request #131 from jthiltges/pr/21upd

Critical metrics for SCSI disks added, rebased
This commit is contained in:
David Randall 2023-08-09 13:50:34 -04:00 committed by GitHub
commit 6448d79458
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 115 additions and 2 deletions

View file

@ -271,4 +271,59 @@ var (
}, },
nil, nil,
) )
metricSCSIGrownDefectList = prometheus.NewDesc(
"smartctl_scsi_grown_defect_list",
"Device SCSI grown defect list counter",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricReadErrorsCorrectedByRereadsRewrites = prometheus.NewDesc(
"smartctl_read_errors_corrected_by_rereads_rewrites",
"Read Errors Corrected by ReReads/ReWrites",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricReadTotalUncorrectedErrors = prometheus.NewDesc(
"smartctl_read_total_uncorrected_errors",
"Read Total Uncorrected Errors",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricWriteErrorsCorrectedByRereadsRewrites = prometheus.NewDesc(
"smartctl_write_errors_corrected_by_rereads_rewrites",
"Write Errors Corrected by ReReads/ReWrites",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricWriteTotalUncorrectedErrors = prometheus.NewDesc(
"smartctl_write_total_uncorrected_errors",
"Write Total Uncorrected Errors",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
) )

View file

@ -64,7 +64,7 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
// Get json from smartctl and parse it // Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) { func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) {
level.Debug(logger).Log("msg", "Collecting S.M.A.R.T. counters", "device", device) level.Debug(logger).Log("msg", "Collecting S.M.A.R.T. counters", "device", device)
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", device).Output() out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output()
if err != nil { if err != nil {
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err) level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err)
} }

View file

@ -81,7 +81,8 @@ func (smart *SMARTctl) Collect() {
smart.mineBytesRead() smart.mineBytesRead()
smart.mineBytesWritten() smart.mineBytesWritten()
smart.mineSmartStatus() smart.mineSmartStatus()
smart.mineSCSIGrownDefectList()
smart.mineSCSIErrorCounterLog()
} }
func (smart *SMARTctl) mineExitStatus() { func (smart *SMARTctl) mineExitStatus() {
@ -435,3 +436,60 @@ func (smart *SMARTctl) mineDeviceERC() {
) )
} }
} }
func (smart *SMARTctl) mineSCSIGrownDefectList() {
scsi_grown_defect_list := smart.json.Get("scsi_grown_defect_list")
if scsi_grown_defect_list.Exists() {
smart.ch <- prometheus.MustNewConstMetric(
metricSCSIGrownDefectList,
prometheus.GaugeValue,
scsi_grown_defect_list.Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
}
func (smart *SMARTctl) mineSCSIErrorCounterLog() {
SCSIHealth := smart.json.Get("scsi_error_counter_log")
if SCSIHealth.Exists() {
smart.ch <- prometheus.MustNewConstMetric(
metricReadErrorsCorrectedByRereadsRewrites,
prometheus.GaugeValue,
SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
smart.ch <- prometheus.MustNewConstMetric(
metricReadTotalUncorrectedErrors,
prometheus.GaugeValue,
SCSIHealth.Get("read.total_uncorrected_errors").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
smart.ch <- prometheus.MustNewConstMetric(
metricWriteErrorsCorrectedByRereadsRewrites,
prometheus.GaugeValue,
SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
smart.ch <- prometheus.MustNewConstMetric(
metricWriteTotalUncorrectedErrors,
prometheus.GaugeValue,
SCSIHealth.Get("write.total_uncorrected_errors").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
}