2022-08-05 03:37:13 +02:00
|
|
|
// Copyright 2022 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2019-08-14 22:34:49 +02:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2024-03-08 15:39:33 +01:00
|
|
|
"regexp"
|
2019-08-14 22:34:49 +02:00
|
|
|
"strings"
|
|
|
|
|
2022-10-03 11:16:00 +02:00
|
|
|
"github.com/go-kit/log"
|
|
|
|
"github.com/go-kit/log/level"
|
2019-08-14 22:34:49 +02:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/tidwall/gjson"
|
|
|
|
)
|
|
|
|
|
|
|
|
// SMARTDevice - short info about device
|
|
|
|
type SMARTDevice struct {
|
|
|
|
device string
|
|
|
|
serial string
|
|
|
|
family string
|
|
|
|
model string
|
2023-10-16 07:59:57 +02:00
|
|
|
// These are used to select types of metrics.
|
|
|
|
interface_ string
|
|
|
|
protocol string
|
2019-08-14 22:34:49 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// SMARTctl object
|
|
|
|
type SMARTctl struct {
|
|
|
|
ch chan<- prometheus.Metric
|
|
|
|
json gjson.Result
|
2022-10-03 11:16:00 +02:00
|
|
|
logger log.Logger
|
2019-08-14 22:34:49 +02:00
|
|
|
device SMARTDevice
|
|
|
|
}
|
|
|
|
|
2024-03-08 15:39:33 +01:00
|
|
|
func extractDiskName(input string) string {
|
2024-03-16 00:37:09 +01:00
|
|
|
re := regexp.MustCompile(`^(?:/dev/(?P<bus_name>\S+)/(?P<bus_num>\S+)\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
|
2024-03-08 15:39:33 +01:00
|
|
|
match := re.FindStringSubmatch(input)
|
|
|
|
|
|
|
|
if len(match) > 0 {
|
2024-03-16 00:37:09 +01:00
|
|
|
busNameIndex := re.SubexpIndex("bus_name")
|
|
|
|
busNumIndex := re.SubexpIndex("bus_num")
|
|
|
|
diskIndex := re.SubexpIndex("disk")
|
|
|
|
var name []string
|
|
|
|
if busNameIndex != -1 && match[busNameIndex] != "" {
|
|
|
|
name = append(name, match[busNameIndex])
|
|
|
|
}
|
|
|
|
if busNumIndex != -1 && match[busNumIndex] != "" {
|
|
|
|
name = append(name, match[busNumIndex])
|
|
|
|
}
|
|
|
|
if diskIndex != -1 && match[diskIndex] != "" {
|
|
|
|
name = append(name, match[diskIndex])
|
|
|
|
}
|
|
|
|
|
|
|
|
return strings.Join(name, "_")
|
2024-03-08 15:39:33 +01:00
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
2019-08-14 23:04:32 +02:00
|
|
|
// NewSMARTctl is smartctl constructor
|
2022-10-03 11:16:00 +02:00
|
|
|
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
|
2023-10-16 07:59:57 +02:00
|
|
|
var model_name string
|
|
|
|
if obj := json.Get("model_name"); obj.Exists() {
|
|
|
|
model_name = obj.String()
|
2023-10-16 07:59:57 +02:00
|
|
|
} else if obj := json.Get("scsi_model_name"); obj.Exists() {
|
|
|
|
model_name = obj.String()
|
2023-10-16 07:59:57 +02:00
|
|
|
}
|
|
|
|
// If the drive returns an empty model name, replace that with unknown.
|
|
|
|
if model_name == "" {
|
|
|
|
model_name = "unknown"
|
|
|
|
}
|
|
|
|
|
2022-10-03 11:16:00 +02:00
|
|
|
return SMARTctl{
|
|
|
|
ch: ch,
|
|
|
|
json: json,
|
|
|
|
logger: logger,
|
|
|
|
device: SMARTDevice{
|
2024-03-08 15:39:33 +01:00
|
|
|
device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
|
2023-10-16 07:59:57 +02:00
|
|
|
serial: strings.TrimSpace(json.Get("serial_number").String()),
|
|
|
|
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
|
|
|
|
model: strings.TrimSpace(model_name),
|
|
|
|
interface_: strings.TrimSpace(json.Get("device.type").String()),
|
|
|
|
protocol: strings.TrimSpace(json.Get("device.protocol").String()),
|
2022-10-03 11:16:00 +02:00
|
|
|
},
|
2019-08-14 22:34:49 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Collect metrics
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) Collect() {
|
2022-10-03 11:16:00 +02:00
|
|
|
level.Debug(smart.logger).Log("msg", "Collecting metrics from", "device", smart.device.device, "family", smart.device.family, "model", smart.device.model)
|
2019-08-15 23:01:16 +02:00
|
|
|
smart.mineExitStatus()
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.mineDevice()
|
|
|
|
smart.mineCapacity()
|
2023-08-24 13:50:04 +02:00
|
|
|
smart.mineBlockSize()
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.mineInterfaceSpeed()
|
|
|
|
smart.mineDeviceAttribute()
|
|
|
|
smart.minePowerOnSeconds()
|
|
|
|
smart.mineRotationRate()
|
|
|
|
smart.mineTemperatures()
|
2023-10-16 07:59:57 +02:00
|
|
|
smart.minePowerCycleCount() // ATA/SATA, NVME, SCSI, SAS
|
2020-07-26 21:54:54 +02:00
|
|
|
smart.mineDeviceSCTStatus()
|
2019-08-17 12:18:48 +02:00
|
|
|
smart.mineDeviceStatistics()
|
2020-07-26 22:48:49 +02:00
|
|
|
smart.mineDeviceErrorLog()
|
2020-07-26 23:16:08 +02:00
|
|
|
smart.mineDeviceSelfTestLog()
|
2020-07-26 23:37:43 +02:00
|
|
|
smart.mineDeviceERC()
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.mineSmartStatus()
|
2023-10-16 07:59:57 +02:00
|
|
|
|
|
|
|
if smart.device.interface_ == "nvme" {
|
|
|
|
smart.mineNvmePercentageUsed()
|
|
|
|
smart.mineNvmeAvailableSpare()
|
|
|
|
smart.mineNvmeAvailableSpareThreshold()
|
|
|
|
smart.mineNvmeCriticalWarning()
|
|
|
|
smart.mineNvmeMediaErrors()
|
|
|
|
smart.mineNvmeNumErrLogEntries()
|
|
|
|
smart.mineNvmeBytesRead()
|
|
|
|
smart.mineNvmeBytesWritten()
|
|
|
|
}
|
|
|
|
// SCSI, SAS
|
|
|
|
if smart.device.interface_ == "scsi" {
|
|
|
|
smart.mineSCSIGrownDefectList()
|
|
|
|
smart.mineSCSIErrorCounterLog()
|
2023-10-16 07:59:57 +02:00
|
|
|
smart.mineSCSIBytesRead()
|
|
|
|
smart.mineSCSIBytesWritten()
|
2023-10-16 07:59:57 +02:00
|
|
|
}
|
2019-08-14 22:34:49 +02:00
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineExitStatus() {
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
2019-08-15 23:01:16 +02:00
|
|
|
metricDeviceExitStatus,
|
2019-08-14 22:34:49 +02:00
|
|
|
prometheus.GaugeValue,
|
2019-08-15 23:01:16 +02:00
|
|
|
smart.json.Get("smartctl.exit_status").Float(),
|
|
|
|
smart.device.device,
|
2019-08-14 22:34:49 +02:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineDevice() {
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceModel,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
1,
|
|
|
|
smart.device.device,
|
2023-10-16 07:59:57 +02:00
|
|
|
smart.device.interface_,
|
|
|
|
smart.device.protocol,
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.device.family,
|
|
|
|
smart.device.model,
|
|
|
|
smart.device.serial,
|
|
|
|
GetStringIfExists(smart.json, "ata_additional_product_id", "unknown"),
|
|
|
|
smart.json.Get("firmware_version").String(),
|
|
|
|
smart.json.Get("ata_version.string").String(),
|
|
|
|
smart.json.Get("sata_version.string").String(),
|
2022-10-14 12:48:26 +02:00
|
|
|
smart.json.Get("form_factor.name").String(),
|
2023-10-16 07:59:57 +02:00
|
|
|
// scsi_model_name is mapped into model_name
|
|
|
|
smart.json.Get("scsi_vendor").String(),
|
|
|
|
smart.json.Get("scsi_product").String(),
|
|
|
|
smart.json.Get("scsi_revision").String(),
|
|
|
|
smart.json.Get("scsi_version").String(),
|
2019-08-14 22:34:49 +02:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineCapacity() {
|
2023-08-24 13:50:04 +02:00
|
|
|
// The user_capacity exists only when NVMe have single namespace. Otherwise,
|
|
|
|
// for NVMe devices with multiple namespaces, when device name used without
|
|
|
|
// namespace number (exporter case) user_capacity will be absent
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceCapacityBlocks,
|
|
|
|
prometheus.GaugeValue,
|
2023-08-24 13:50:04 +02:00
|
|
|
smart.json.Get("user_capacity.blocks").Float(),
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceCapacityBytes,
|
|
|
|
prometheus.GaugeValue,
|
2023-08-24 13:50:04 +02:00
|
|
|
smart.json.Get("user_capacity.bytes").Float(),
|
2019-08-14 22:34:49 +02:00
|
|
|
smart.device.device,
|
|
|
|
)
|
2023-10-16 07:59:57 +02:00
|
|
|
nvme_total_capacity := smart.json.Get("nvme_total_capacity")
|
|
|
|
if nvme_total_capacity.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceTotalCapacityBytes,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
nvme_total_capacity.Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
2023-08-24 13:50:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (smart *SMARTctl) mineBlockSize() {
|
2019-08-14 22:34:49 +02:00
|
|
|
for _, blockType := range []string{"logical", "physical"} {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceBlockSize,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
smart.json.Get(fmt.Sprintf("%s_block_size", blockType)).Float(),
|
|
|
|
smart.device.device,
|
|
|
|
blockType,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineInterfaceSpeed() {
|
2023-10-16 07:59:57 +02:00
|
|
|
// TODO: Support scsi_sas_port_[01].phy_N.negotiated_logical_link_rate
|
2019-08-14 22:34:49 +02:00
|
|
|
iSpeed := smart.json.Get("interface_speed")
|
2023-10-16 07:59:57 +02:00
|
|
|
if iSpeed.Exists() {
|
|
|
|
for _, speedType := range []string{"max", "current"} {
|
|
|
|
tSpeed := iSpeed.Get(speedType)
|
|
|
|
if tSpeed.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceInterfaceSpeed,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
tSpeed.Get("units_per_second").Float()*tSpeed.Get("bits_per_unit").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
speedType,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2019-08-14 22:34:49 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineDeviceAttribute() {
|
2019-08-14 22:34:49 +02:00
|
|
|
for _, attribute := range smart.json.Get("ata_smart_attributes.table").Array() {
|
|
|
|
name := strings.TrimSpace(attribute.Get("name").String())
|
2019-08-17 12:18:48 +02:00
|
|
|
flagsShort := strings.TrimSpace(attribute.Get("flags.string").String())
|
|
|
|
flagsLong := smart.mineLongFlags(attribute.Get("flags"), []string{
|
|
|
|
"prefailure",
|
|
|
|
"updated_online",
|
|
|
|
"performance",
|
|
|
|
"error_rate",
|
|
|
|
"event_count",
|
|
|
|
"auto_keep",
|
|
|
|
})
|
2019-08-14 22:34:49 +02:00
|
|
|
id := attribute.Get("id").String()
|
|
|
|
for key, path := range map[string]string{
|
|
|
|
"value": "value",
|
|
|
|
"worst": "worst",
|
|
|
|
"thresh": "thresh",
|
|
|
|
"raw": "raw.value",
|
|
|
|
} {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceAttribute,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
attribute.Get(path).Float(),
|
|
|
|
smart.device.device,
|
|
|
|
name,
|
2019-08-17 12:18:48 +02:00
|
|
|
flagsShort,
|
|
|
|
flagsLong,
|
2019-08-14 22:34:49 +02:00
|
|
|
key,
|
|
|
|
id,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) minePowerOnSeconds() {
|
2019-08-14 22:34:49 +02:00
|
|
|
pot := smart.json.Get("power_on_time")
|
2023-10-16 07:59:57 +02:00
|
|
|
// If the power_on_time is NOT present, do not report as 0.
|
|
|
|
if pot.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDevicePowerOnSeconds,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
GetFloatIfExists(pot, "hours", 0)*60*60+GetFloatIfExists(pot, "minutes", 0)*60,
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
2019-08-14 22:34:49 +02:00
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineRotationRate() {
|
2019-08-14 22:34:49 +02:00
|
|
|
rRate := GetFloatIfExists(smart.json, "rotation_rate", 0)
|
2023-10-16 07:59:57 +02:00
|
|
|
// TODO: what should be done if this is absent vs really zero (for
|
|
|
|
// solid-state drives)?
|
2019-08-14 22:34:49 +02:00
|
|
|
if rRate > 0 {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceRotationRate,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
rRate,
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) mineTemperatures() {
|
2019-08-14 22:34:49 +02:00
|
|
|
temperatures := smart.json.Get("temperature")
|
2023-10-16 07:59:57 +02:00
|
|
|
// TODO: Implement scsi_environmental_reports
|
2019-08-14 22:34:49 +02:00
|
|
|
if temperatures.Exists() {
|
|
|
|
temperatures.ForEach(func(key, value gjson.Result) bool {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceTemperature,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
value.Float(),
|
|
|
|
smart.device.device,
|
|
|
|
key.String(),
|
|
|
|
)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-15 23:01:16 +02:00
|
|
|
func (smart *SMARTctl) minePowerCycleCount() {
|
2023-10-16 07:59:57 +02:00
|
|
|
// ATA & NVME
|
|
|
|
powerCycleCount := smart.json.Get("power_cycle_count")
|
|
|
|
if powerCycleCount.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDevicePowerCycleCount,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
powerCycleCount.Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
return
|
|
|
|
}
|
2023-10-16 07:59:57 +02:00
|
|
|
|
|
|
|
// SCSI
|
|
|
|
powerCycleCount = smart.json.Get("scsi_start_stop_cycle_counter.accumulated_start_stop_cycles")
|
|
|
|
if powerCycleCount.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDevicePowerCycleCount,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
powerCycleCount.Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
return
|
|
|
|
}
|
2019-08-14 22:34:49 +02:00
|
|
|
}
|
2019-08-17 12:18:48 +02:00
|
|
|
|
2020-07-26 21:54:54 +02:00
|
|
|
func (smart *SMARTctl) mineDeviceSCTStatus() {
|
|
|
|
status := smart.json.Get("ata_sct_status")
|
|
|
|
if status.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceState,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
status.Get("device_state").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmePercentageUsed() {
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDevicePercentageUsed,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
smart.json.Get("nvme_smart_health_information_log.percentage_used").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeAvailableSpare() {
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceAvailableSpare,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
smart.json.Get("nvme_smart_health_information_log.available_spare").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeAvailableSpareThreshold() {
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceAvailableSpareThreshold,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
smart.json.Get("nvme_smart_health_information_log.available_spare_threshold").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeCriticalWarning() {
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceCriticalWarning,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
smart.json.Get("nvme_smart_health_information_log.critical_warning").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeMediaErrors() {
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceMediaErrors,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
smart.json.Get("nvme_smart_health_information_log.media_errors").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeNumErrLogEntries() {
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceNumErrLogEntries,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
smart.json.Get("nvme_smart_health_information_log.num_err_log_entries").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2024-04-09 09:40:44 +02:00
|
|
|
// https://nvmexpress.org/wp-content/uploads/NVM-Express-NVM-Command-Set-Specification-1.0d-2023.12.28-Ratified.pdf
|
|
|
|
// 4.1.4.2 SMART / Health Information (02h)
|
|
|
|
// The SMART / Health Information log page is as defined in the NVM Express Base Specification. For the
|
|
|
|
// Data Units Read and Data Units Written fields, when the logical block size is a value other than 512 bytes,
|
|
|
|
// the controller shall convert the amount of data read to 512 byte units.
|
|
|
|
|
|
|
|
// https://nvmexpress.org/wp-content/uploads/NVM-Express-Base-Specification-2.0d-2024.01.11-Ratified.pdf
|
|
|
|
// Figure 208: SMART / Health Information Log Page
|
|
|
|
// Bytes 47:32
|
|
|
|
// Data Units Read: Contains the number of 512 byte data units the host has read from the
|
|
|
|
// controller as part of processing a SMART Data Units Read Command; this value does not
|
|
|
|
// include metadata. This value is reported in thousands (i.e., a value of 1 corresponds to 1,000
|
|
|
|
// units of 512 bytes read) and is rounded up (e.g., one indicates that the number of 512 byte
|
|
|
|
// data units read is from 1 to 1,000, three indicates that the number of 512 byte data units read
|
|
|
|
// is from 2,001 to 3,000).
|
|
|
|
//
|
|
|
|
// A value of 0h in this field indicates that the number of SMART Data Units Read is not reported.
|
|
|
|
//
|
|
|
|
// Bytes 63:48
|
|
|
|
//
|
|
|
|
// Data Units Written: Contains the number of 512 byte data units the host has written to the ...
|
|
|
|
// (the same as Data Units Read)
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeBytesRead() {
|
2023-10-16 07:59:57 +02:00
|
|
|
data_units_read := smart.json.Get("nvme_smart_health_information_log.data_units_read")
|
2024-04-09 09:40:44 +02:00
|
|
|
// 0 => not reported by underlying hardware
|
|
|
|
if !data_units_read.Exists() || data_units_read.Int() == 0 {
|
2023-10-16 07:59:57 +02:00
|
|
|
return
|
|
|
|
}
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
2020-10-02 15:14:40 +02:00
|
|
|
metricDeviceBytesRead,
|
2020-10-02 13:30:09 +02:00
|
|
|
prometheus.CounterValue,
|
2024-04-09 09:40:44 +02:00
|
|
|
// WARNING: Float64 will lose precision when drives reach ~32EiB read/write
|
|
|
|
// The underlying data_units_written,data_units_read are 128-bit integers
|
|
|
|
data_units_read.Float()*1000.0*512.0,
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-24 13:50:04 +02:00
|
|
|
func (smart *SMARTctl) mineNvmeBytesWritten() {
|
2023-10-16 07:59:57 +02:00
|
|
|
data_units_written := smart.json.Get("nvme_smart_health_information_log.data_units_written")
|
2024-04-09 09:40:44 +02:00
|
|
|
// 0 => not reported by underlying hardware
|
|
|
|
if !data_units_written.Exists() || data_units_written.Int() == 0 {
|
2023-10-16 07:59:57 +02:00
|
|
|
return
|
|
|
|
}
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
2020-10-02 15:14:40 +02:00
|
|
|
metricDeviceBytesWritten,
|
2020-10-02 13:30:09 +02:00
|
|
|
prometheus.CounterValue,
|
2024-04-09 09:40:44 +02:00
|
|
|
// WARNING: Float64 will lose precision when drives reach ~32EiB read/write
|
|
|
|
// The underlying data_units_written,data_units_read are 128-bit integers
|
|
|
|
data_units_written.Float()*1000.0*512.0,
|
2020-10-02 13:30:09 +02:00
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-10-16 07:59:57 +02:00
|
|
|
func (smart *SMARTctl) mineSCSIBytesRead() {
|
|
|
|
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
|
|
|
if SCSIHealth.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceBytesRead,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
// This value is reported by SMARTctl in GB [10^9].
|
|
|
|
// It is possible that some drives mis-report the value, but
|
|
|
|
// that is not the responsibility of the exporter or smartctl
|
|
|
|
SCSIHealth.Get("read.gigabytes_processed").Float()*1e9,
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (smart *SMARTctl) mineSCSIBytesWritten() {
|
|
|
|
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
|
|
|
if SCSIHealth.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceBytesWritten,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
// This value is reported by SMARTctl in GB [10^9].
|
|
|
|
// It is possible that some drives mis-report the value, but
|
|
|
|
// that is not the responsibility of the exporter or smartctl
|
|
|
|
SCSIHealth.Get("write.gigabytes_processed").Float()*1e9,
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-02 13:30:09 +02:00
|
|
|
func (smart *SMARTctl) mineSmartStatus() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceSmartStatus,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
smart.json.Get("smart_status.passed").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2019-08-17 12:18:48 +02:00
|
|
|
func (smart *SMARTctl) mineDeviceStatistics() {
|
|
|
|
for _, page := range smart.json.Get("ata_device_statistics.pages").Array() {
|
|
|
|
table := strings.TrimSpace(page.Get("name").String())
|
2021-10-03 13:53:20 +02:00
|
|
|
// skip vendor-specific statistics (they lead to duplicate metric labels on Seagate Exos drives,
|
|
|
|
// see https://github.com/Sheridan/smartctl_exporter/issues/3 for details)
|
|
|
|
if table == "Vendor Specific Statistics" {
|
2022-10-19 08:03:40 +02:00
|
|
|
continue
|
2021-10-03 13:53:20 +02:00
|
|
|
}
|
2019-08-17 12:18:48 +02:00
|
|
|
for _, statistic := range page.Get("table").Array() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceStatistics,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
statistic.Get("value").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
table,
|
|
|
|
strings.TrimSpace(statistic.Get("name").String()),
|
|
|
|
strings.TrimSpace(statistic.Get("flags.string").String()),
|
|
|
|
smart.mineLongFlags(statistic.Get("flags"), []string{
|
|
|
|
"valid",
|
|
|
|
"normalized",
|
|
|
|
"supports_dsn",
|
|
|
|
"monitored_condition_met",
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2020-07-26 22:35:22 +02:00
|
|
|
|
|
|
|
for _, statistic := range smart.json.Get("sata_phy_event_counters.table").Array() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceStatistics,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
statistic.Get("value").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
"SATA PHY Event Counters",
|
|
|
|
strings.TrimSpace(statistic.Get("name").String()),
|
2020-07-26 23:40:18 +02:00
|
|
|
"V---",
|
|
|
|
"valid",
|
2020-07-26 22:35:22 +02:00
|
|
|
)
|
|
|
|
}
|
2019-08-17 12:18:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (smart *SMARTctl) mineLongFlags(json gjson.Result, flags []string) string {
|
|
|
|
var result []string
|
|
|
|
for _, flag := range flags {
|
|
|
|
jFlag := json.Get(flag)
|
|
|
|
if jFlag.Exists() && jFlag.Bool() {
|
|
|
|
result = append(result, flag)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return strings.Join(result, ",")
|
|
|
|
}
|
2019-12-19 11:17:35 +01:00
|
|
|
|
2020-07-26 22:48:49 +02:00
|
|
|
func (smart *SMARTctl) mineDeviceErrorLog() {
|
|
|
|
for logType, status := range smart.json.Get("ata_smart_error_log").Map() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceErrorLogCount,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
status.Get("count").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
logType,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2020-07-26 23:16:08 +02:00
|
|
|
|
|
|
|
func (smart *SMARTctl) mineDeviceSelfTestLog() {
|
|
|
|
for logType, status := range smart.json.Get("ata_smart_self_test_log").Map() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceSelfTestLogCount,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
status.Get("count").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
logType,
|
|
|
|
)
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceSelfTestLogErrorCount,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
status.Get("error_count_total").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
logType,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2020-07-26 23:37:43 +02:00
|
|
|
|
|
|
|
func (smart *SMARTctl) mineDeviceERC() {
|
|
|
|
for ercType, status := range smart.json.Get("ata_sct_erc").Map() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricDeviceERCSeconds,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
status.Get("deciseconds").Float()/10.0,
|
|
|
|
smart.device.device,
|
|
|
|
ercType,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2021-04-19 00:07:05 +02:00
|
|
|
|
|
|
|
func (smart *SMARTctl) mineSCSIGrownDefectList() {
|
|
|
|
scsi_grown_defect_list := smart.json.Get("scsi_grown_defect_list")
|
|
|
|
if scsi_grown_defect_list.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricSCSIGrownDefectList,
|
2023-06-29 20:48:38 +02:00
|
|
|
prometheus.GaugeValue,
|
2021-04-19 00:07:05 +02:00
|
|
|
scsi_grown_defect_list.Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (smart *SMARTctl) mineSCSIErrorCounterLog() {
|
|
|
|
SCSIHealth := smart.json.Get("scsi_error_counter_log")
|
|
|
|
if SCSIHealth.Exists() {
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricReadErrorsCorrectedByRereadsRewrites,
|
2023-06-29 20:48:38 +02:00
|
|
|
prometheus.GaugeValue,
|
2021-04-19 00:07:05 +02:00
|
|
|
SCSIHealth.Get("read.errors_corrected_by_rereads_rewrites").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
2023-10-16 07:59:57 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricReadErrorsCorrectedByEccFast,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
SCSIHealth.Get("read.errors_corrected_by_eccfast").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricReadErrorsCorrectedByEccDelayed,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
SCSIHealth.Get("read.errors_corrected_by_eccdelayed").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
2021-04-19 00:07:05 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricReadTotalUncorrectedErrors,
|
2023-06-29 20:48:38 +02:00
|
|
|
prometheus.GaugeValue,
|
2021-04-19 00:07:05 +02:00
|
|
|
SCSIHealth.Get("read.total_uncorrected_errors").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricWriteErrorsCorrectedByRereadsRewrites,
|
2023-06-29 20:48:38 +02:00
|
|
|
prometheus.GaugeValue,
|
2021-04-19 00:07:05 +02:00
|
|
|
SCSIHealth.Get("write.errors_corrected_by_rereads_rewrites").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
2023-10-16 07:59:57 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricWriteErrorsCorrectedByEccFast,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
SCSIHealth.Get("write.errors_corrected_by_eccfast").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricWriteErrorsCorrectedByEccDelayed,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
SCSIHealth.Get("write.errors_corrected_by_eccdelayed").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
2021-04-19 00:07:05 +02:00
|
|
|
smart.ch <- prometheus.MustNewConstMetric(
|
|
|
|
metricWriteTotalUncorrectedErrors,
|
2023-06-29 20:48:38 +02:00
|
|
|
prometheus.GaugeValue,
|
2021-04-19 00:07:05 +02:00
|
|
|
SCSIHealth.Get("write.total_uncorrected_errors").Float(),
|
|
|
|
smart.device.device,
|
|
|
|
)
|
2023-10-16 07:59:57 +02:00
|
|
|
// TODO: Should we also export the verify category?
|
2021-04-19 00:07:05 +02:00
|
|
|
}
|
|
|
|
}
|