mirror of
https://github.com/prometheus-community/smartctl_exporter.git
synced 2024-11-16 01:33:07 +01:00
fix: Remove confused metrics
The exporter presently has metrics that are nonsense for a given type of drive, and remain at zero due to their defaults. Change the behavior to NOT emit a metric if the underlying JSON field is not present. Future related work may include parsing the corresponding metrics for SATA/SAS SSDs (e.g. `smartctl_device_percentage_used` could derived from `SSD_Life_Left` on some drives). Metrics no longer exported for the wrong type of drive: - `smartctl_device_nvme_capacity_bytes` (NVME-specific) - `smartctl_device_available_spare` (NVME-specific, ATA possible) - `smartctl_device_available_spare_threshold` (NVME-specific, ATA possible) - `smartctl_device_critical_warning` (NVME-specific, ATA possible) - `smartctl_device_interface_speed` (ATA-specific) - `smartctl_device_media_errors` (NVME-specific, ATA possible) - `smartctl_device_num_err_log_entries` (NVME-specific, SCSI uses distinct metrics, ATA possible) - `smartctl_device_nvme_capacity_bytes` (NVME-specific) - `smartctl_device_percentage_used` (NVME-specific, ATA possible) Signed-off-by: Robin H. Johnson <rjohnson@coreweave.com>
This commit is contained in:
parent
558a760c14
commit
d90594ac23
1 changed files with 93 additions and 49 deletions
142
smartctl.go
142
smartctl.go
|
@ -29,6 +29,9 @@ type SMARTDevice struct {
|
||||||
serial string
|
serial string
|
||||||
family string
|
family string
|
||||||
model string
|
model string
|
||||||
|
// These are used to select types of metrics.
|
||||||
|
interface_ string
|
||||||
|
protocol string
|
||||||
}
|
}
|
||||||
|
|
||||||
// SMARTctl object
|
// SMARTctl object
|
||||||
|
@ -41,15 +44,26 @@ type SMARTctl struct {
|
||||||
|
|
||||||
// NewSMARTctl is smartctl constructor
|
// NewSMARTctl is smartctl constructor
|
||||||
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
|
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
|
||||||
|
var model_name string
|
||||||
|
if obj := json.Get("model_name"); obj.Exists() {
|
||||||
|
model_name = obj.String()
|
||||||
|
}
|
||||||
|
// If the drive returns an empty model name, replace that with unknown.
|
||||||
|
if model_name == "" {
|
||||||
|
model_name = "unknown"
|
||||||
|
}
|
||||||
|
|
||||||
return SMARTctl{
|
return SMARTctl{
|
||||||
ch: ch,
|
ch: ch,
|
||||||
json: json,
|
json: json,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
device: SMARTDevice{
|
device: SMARTDevice{
|
||||||
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
|
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
|
||||||
serial: strings.TrimSpace(json.Get("serial_number").String()),
|
serial: strings.TrimSpace(json.Get("serial_number").String()),
|
||||||
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
|
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
|
||||||
model: strings.TrimSpace(json.Get("model_name").String()),
|
model: strings.TrimSpace(model_name),
|
||||||
|
interface_: strings.TrimSpace(json.Get("device.type").String()),
|
||||||
|
protocol: strings.TrimSpace(json.Get("device.protocol").String()),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -66,23 +80,29 @@ func (smart *SMARTctl) Collect() {
|
||||||
smart.minePowerOnSeconds()
|
smart.minePowerOnSeconds()
|
||||||
smart.mineRotationRate()
|
smart.mineRotationRate()
|
||||||
smart.mineTemperatures()
|
smart.mineTemperatures()
|
||||||
smart.minePowerCycleCount()
|
smart.minePowerCycleCount() // ATA/SATA, NVME, SCSI, SAS
|
||||||
smart.mineDeviceSCTStatus()
|
smart.mineDeviceSCTStatus()
|
||||||
smart.mineDeviceStatistics()
|
smart.mineDeviceStatistics()
|
||||||
smart.mineDeviceErrorLog()
|
smart.mineDeviceErrorLog()
|
||||||
smart.mineDeviceSelfTestLog()
|
smart.mineDeviceSelfTestLog()
|
||||||
smart.mineDeviceERC()
|
smart.mineDeviceERC()
|
||||||
smart.mineNvmePercentageUsed()
|
|
||||||
smart.mineNvmeAvailableSpare()
|
|
||||||
smart.mineNvmeAvailableSpareThreshold()
|
|
||||||
smart.mineNvmeCriticalWarning()
|
|
||||||
smart.mineNvmeMediaErrors()
|
|
||||||
smart.mineNvmeNumErrLogEntries()
|
|
||||||
smart.mineNvmeBytesRead()
|
|
||||||
smart.mineNvmeBytesWritten()
|
|
||||||
smart.mineSmartStatus()
|
smart.mineSmartStatus()
|
||||||
smart.mineSCSIGrownDefectList()
|
|
||||||
smart.mineSCSIErrorCounterLog()
|
if smart.device.interface_ == "nvme" {
|
||||||
|
smart.mineNvmePercentageUsed()
|
||||||
|
smart.mineNvmeAvailableSpare()
|
||||||
|
smart.mineNvmeAvailableSpareThreshold()
|
||||||
|
smart.mineNvmeCriticalWarning()
|
||||||
|
smart.mineNvmeMediaErrors()
|
||||||
|
smart.mineNvmeNumErrLogEntries()
|
||||||
|
smart.mineNvmeBytesRead()
|
||||||
|
smart.mineNvmeBytesWritten()
|
||||||
|
}
|
||||||
|
// SCSI, SAS
|
||||||
|
if smart.device.interface_ == "scsi" {
|
||||||
|
smart.mineSCSIGrownDefectList()
|
||||||
|
smart.mineSCSIErrorCounterLog()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineExitStatus() {
|
func (smart *SMARTctl) mineExitStatus() {
|
||||||
|
@ -95,14 +115,13 @@ func (smart *SMARTctl) mineExitStatus() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineDevice() {
|
func (smart *SMARTctl) mineDevice() {
|
||||||
device := smart.json.Get("device")
|
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricDeviceModel,
|
metricDeviceModel,
|
||||||
prometheus.GaugeValue,
|
prometheus.GaugeValue,
|
||||||
1,
|
1,
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
device.Get("type").String(),
|
smart.device.interface_,
|
||||||
device.Get("protocol").String(),
|
smart.device.protocol,
|
||||||
smart.device.family,
|
smart.device.family,
|
||||||
smart.device.model,
|
smart.device.model,
|
||||||
smart.device.serial,
|
smart.device.serial,
|
||||||
|
@ -130,12 +149,15 @@ func (smart *SMARTctl) mineCapacity() {
|
||||||
smart.json.Get("user_capacity.bytes").Float(),
|
smart.json.Get("user_capacity.bytes").Float(),
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
)
|
)
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
nvme_total_capacity := smart.json.Get("nvme_total_capacity")
|
||||||
metricDeviceTotalCapacityBytes,
|
if nvme_total_capacity.Exists() {
|
||||||
prometheus.GaugeValue,
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
smart.json.Get("nvme_total_capacity").Float(),
|
metricDeviceTotalCapacityBytes,
|
||||||
smart.device.device,
|
prometheus.GaugeValue,
|
||||||
)
|
nvme_total_capacity.Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineBlockSize() {
|
func (smart *SMARTctl) mineBlockSize() {
|
||||||
|
@ -152,15 +174,19 @@ func (smart *SMARTctl) mineBlockSize() {
|
||||||
|
|
||||||
func (smart *SMARTctl) mineInterfaceSpeed() {
|
func (smart *SMARTctl) mineInterfaceSpeed() {
|
||||||
iSpeed := smart.json.Get("interface_speed")
|
iSpeed := smart.json.Get("interface_speed")
|
||||||
for _, speedType := range []string{"max", "current"} {
|
if iSpeed.Exists() {
|
||||||
tSpeed := iSpeed.Get(speedType)
|
for _, speedType := range []string{"max", "current"} {
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
tSpeed := iSpeed.Get(speedType)
|
||||||
metricDeviceInterfaceSpeed,
|
if tSpeed.Exists() {
|
||||||
prometheus.GaugeValue,
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
tSpeed.Get("units_per_second").Float()*tSpeed.Get("bits_per_unit").Float(),
|
metricDeviceInterfaceSpeed,
|
||||||
smart.device.device,
|
prometheus.GaugeValue,
|
||||||
speedType,
|
tSpeed.Get("units_per_second").Float()*tSpeed.Get("bits_per_unit").Float(),
|
||||||
)
|
smart.device.device,
|
||||||
|
speedType,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,16 +226,21 @@ func (smart *SMARTctl) mineDeviceAttribute() {
|
||||||
|
|
||||||
func (smart *SMARTctl) minePowerOnSeconds() {
|
func (smart *SMARTctl) minePowerOnSeconds() {
|
||||||
pot := smart.json.Get("power_on_time")
|
pot := smart.json.Get("power_on_time")
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
// If the power_on_time is NOT present, do not report as 0.
|
||||||
metricDevicePowerOnSeconds,
|
if pot.Exists() {
|
||||||
prometheus.CounterValue,
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
GetFloatIfExists(pot, "hours", 0)*60*60+GetFloatIfExists(pot, "minutes", 0)*60,
|
metricDevicePowerOnSeconds,
|
||||||
smart.device.device,
|
prometheus.CounterValue,
|
||||||
)
|
GetFloatIfExists(pot, "hours", 0)*60*60+GetFloatIfExists(pot, "minutes", 0)*60,
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineRotationRate() {
|
func (smart *SMARTctl) mineRotationRate() {
|
||||||
rRate := GetFloatIfExists(smart.json, "rotation_rate", 0)
|
rRate := GetFloatIfExists(smart.json, "rotation_rate", 0)
|
||||||
|
// TODO: what should be done if this is absent vs really zero (for
|
||||||
|
// solid-state drives)?
|
||||||
if rRate > 0 {
|
if rRate > 0 {
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricDeviceRotationRate,
|
metricDeviceRotationRate,
|
||||||
|
@ -237,12 +268,17 @@ func (smart *SMARTctl) mineTemperatures() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) minePowerCycleCount() {
|
func (smart *SMARTctl) minePowerCycleCount() {
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
// ATA & NVME
|
||||||
metricDevicePowerCycleCount,
|
powerCycleCount := smart.json.Get("power_cycle_count")
|
||||||
prometheus.CounterValue,
|
if powerCycleCount.Exists() {
|
||||||
smart.json.Get("power_cycle_count").Float(),
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
smart.device.device,
|
metricDevicePowerCycleCount,
|
||||||
)
|
prometheus.CounterValue,
|
||||||
|
powerCycleCount.Float(),
|
||||||
|
smart.device.device,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineDeviceSCTStatus() {
|
func (smart *SMARTctl) mineDeviceSCTStatus() {
|
||||||
|
@ -312,25 +348,33 @@ func (smart *SMARTctl) mineNvmeNumErrLogEntries() {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineNvmeBytesRead() {
|
func (smart *SMARTctl) mineNvmeBytesRead() {
|
||||||
blockSize := smart.json.Get("logical_block_size").Float()
|
blockSize := smart.json.Get("logical_block_size")
|
||||||
|
data_units_read := smart.json.Get("nvme_smart_health_information_log.data_units_read")
|
||||||
|
if !blockSize.Exists() || !data_units_read.Exists() {
|
||||||
|
return
|
||||||
|
}
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricDeviceBytesRead,
|
metricDeviceBytesRead,
|
||||||
prometheus.CounterValue,
|
prometheus.CounterValue,
|
||||||
// This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes written) and is rounded up.
|
// This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes written) and is rounded up.
|
||||||
// When the LBA size is a value other than 512 bytes, the controller shall convert the amount of data written to 512 byte units.
|
// When the LBA size is a value other than 512 bytes, the controller shall convert the amount of data written to 512 byte units.
|
||||||
smart.json.Get("nvme_smart_health_information_log.data_units_read").Float()*1000.0*blockSize,
|
data_units_read.Float()*1000.0*blockSize.Float(),
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (smart *SMARTctl) mineNvmeBytesWritten() {
|
func (smart *SMARTctl) mineNvmeBytesWritten() {
|
||||||
blockSize := smart.json.Get("logical_block_size").Float()
|
blockSize := smart.json.Get("logical_block_size")
|
||||||
|
data_units_written := smart.json.Get("nvme_smart_health_information_log.data_units_written")
|
||||||
|
if !blockSize.Exists() || !data_units_written.Exists() {
|
||||||
|
return
|
||||||
|
}
|
||||||
smart.ch <- prometheus.MustNewConstMetric(
|
smart.ch <- prometheus.MustNewConstMetric(
|
||||||
metricDeviceBytesWritten,
|
metricDeviceBytesWritten,
|
||||||
prometheus.CounterValue,
|
prometheus.CounterValue,
|
||||||
// This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes written) and is rounded up.
|
// This value is reported in thousands (i.e., a value of 1 corresponds to 1000 units of 512 bytes written) and is rounded up.
|
||||||
// When the LBA size is a value other than 512 bytes, the controller shall convert the amount of data written to 512 byte units.
|
// When the LBA size is a value other than 512 bytes, the controller shall convert the amount of data written to 512 byte units.
|
||||||
smart.json.Get("nvme_smart_health_information_log.data_units_written").Float()*1000.0*blockSize,
|
data_units_written.Float()*1000.0*blockSize.Float(),
|
||||||
smart.device.device,
|
smart.device.device,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue