Implemented new features - extract raid member disk name.

Modified smartctl.device param - now you can set it as sda, megaraid_disk_01, etc.

Signed-off-by: Denys <zxzharmlesszxz@gmail.com>
This commit is contained in:
mort 2024-03-08 15:39:33 +01:00 committed by Denys
parent 84d8cc3d4d
commit 3a012b5bb1
4 changed files with 69 additions and 30 deletions

1
.gitignore vendored
View file

@ -3,6 +3,7 @@
/.release /.release
/.tarballs /.tarballs
debug/ debug/
.idea/
Manifest Manifest
smartctl_exporter smartctl_exporter

51
main.go
View file

@ -16,6 +16,7 @@ package main
import ( import (
"net/http" "net/http"
"os" "os"
"strings"
"sync" "sync"
"time" "time"
@ -32,11 +33,18 @@ import (
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag" webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
) )
// Device
type Device struct {
Name string `json:"name"`
Info_Name string `json:"info_name"`
Type string `json:"type"`
}
// SMARTctlManagerCollector implements the Collector interface. // SMARTctlManagerCollector implements the Collector interface.
type SMARTctlManagerCollector struct { type SMARTctlManagerCollector struct {
CollectPeriod string CollectPeriod string
CollectPeriodDuration time.Duration CollectPeriodDuration time.Duration
Devices []string Devices []Device
logger log.Logger logger log.Logger
mutex sync.Mutex mutex sync.Mutex
@ -106,24 +114,43 @@ var (
) )
// scanDevices uses smartctl to gather the list of available devices. // scanDevices uses smartctl to gather the list of available devices.
func scanDevices(logger log.Logger) []string { func scanDevices(logger log.Logger) []Device {
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude) filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)
json := readSMARTctlDevices(logger) json := readSMARTctlDevices(logger)
scanDevices := json.Get("devices").Array() scanDevices := json.Get("devices").Array()
var scanDeviceResult []string var scanDeviceResult []Device
for _, d := range scanDevices { for _, d := range scanDevices {
deviceName := d.Get("name").String() deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String()))
if filter.ignored(deviceName) { if filter.ignored(deviceName) {
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName) level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
} else { } else {
level.Info(logger).Log("msg", "Found device", "name", deviceName) level.Info(logger).Log("msg", "Found device", "name", deviceName)
scanDeviceResult = append(scanDeviceResult, deviceName) device := Device{
Name: d.Get("name").String(),
Info_Name: deviceName,
Type: d.Get("type").String(),
}
scanDeviceResult = append(scanDeviceResult, device)
} }
} }
return scanDeviceResult return scanDeviceResult
} }
func filterDevices(logger log.Logger, devices []Device, filters []string) []Device {
var filtered []Device
for _, d := range devices {
for _, filter := range filters {
level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter)
if strings.Contains(d.Info_Name, filter) {
filtered = append(filtered, d)
break
}
}
}
return filtered
}
func main() { func main() {
metricsPath := kingpin.Flag( metricsPath := kingpin.Flag(
"web.telemetry-path", "Path under which to expose metrics", "web.telemetry-path", "Path under which to expose metrics",
@ -140,13 +167,13 @@ func main() {
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info()) level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext()) level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())
var devices []string var devices []Device
devices = scanDevices(logger)
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
if len(*smartctlDevices) > 0 { if len(*smartctlDevices) > 0 {
devices = *smartctlDevices level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
} else { devices = filterDevices(logger, devices, *smartctlDevices)
level.Info(logger).Log("msg", "No devices specified, trying to load them automatically") level.Info(logger).Log("msg", "Devices filtered", "count", len(devices))
devices = scanDevices(logger)
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
} }
collector := SMARTctlManagerCollector{ collector := SMARTctlManagerCollector{
@ -154,7 +181,7 @@ func main() {
logger: logger, logger: logger,
} }
if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 { if *smartctlRescanInterval >= 1*time.Second {
level.Info(logger).Log("msg", "Start background scan process") level.Info(logger).Log("msg", "Start background scan process")
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval) level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
go collector.RescanForDevices() go collector.RescanForDevices()

View file

@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result {
} }
// Reading fake smartctl json // Reading fake smartctl json
func readFakeSMARTctl(logger log.Logger, device string) gjson.Result { func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
s := strings.Split(device, "/") s := strings.Split(device.Name, "/")
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1]) filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename) level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
jsonFile, err := os.ReadFile(filename) jsonFile, err := os.ReadFile(filename)
@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
} }
// Get json from smartctl and parse it // Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) { func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
start := time.Now() start := time.Now()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output() out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device.Name, "-d", device.Type).Output()
if err != nil { if err != nil {
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device) level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name)
} }
json := parseJSON(string(out)) json := parseJSON(string(out))
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int()) rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json) jsonOk := jsonIsOk(logger, json)
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start)) level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
return json, rcOk && jsonOk return json, rcOk && jsonOk
} }
@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
} }
// Select json source and parse // Select json source and parse
func readData(logger log.Logger, device string) gjson.Result { func readData(logger log.Logger, device Device) gjson.Result {
if *smartctlFakeData { if *smartctlFakeData {
return readFakeSMARTctl(logger, device) return readFakeSMARTctl(logger, device)
} }
@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()}) jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
j, found := jsonCache.Load(device) j, found := jsonCache.Load(device)
if !found { if !found {
level.Warn(logger).Log("msg", "device not found", "device", device) level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
} }
return j.(JSONCache).JSON return j.(JSONCache).JSON
} }
@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result {
} }
// Parse smartctl return code // Parse smartctl return code
func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool { func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool {
result := true result := true
if SMARTCtlResult > 0 { if SMARTCtlResult > 0 {
b := SMARTCtlResult b := SMARTCtlResult
if (b & 1) != 0 { if (b & 1) != 0 {
level.Error(logger).Log("msg", "Command line did not parse", "device", device) level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name)
result = false result = false
} }
if (b & (1 << 1)) != 0 { if (b & (1 << 1)) != 0 {
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device) level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name)
result = false result = false
} }
if (b & (1 << 2)) != 0 { if (b & (1 << 2)) != 0 {
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device) level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name)
} }
if (b & (1 << 3)) != 0 { if (b & (1 << 3)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device) level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name)
} }
if (b & (1 << 4)) != 0 { if (b & (1 << 4)) != 0 {
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device) level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name)
} }
if (b & (1 << 5)) != 0 { if (b & (1 << 5)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device) level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name)
} }
if (b & (1 << 6)) != 0 { if (b & (1 << 6)) != 0 {
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device) level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name)
} }
if (b & (1 << 7)) != 0 { if (b & (1 << 7)) != 0 {
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device) level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name)
} }
} }
return result return result

View file

@ -15,6 +15,7 @@ package main
import ( import (
"fmt" "fmt"
"regexp"
"strings" "strings"
"github.com/go-kit/log" "github.com/go-kit/log"
@ -42,6 +43,16 @@ type SMARTctl struct {
device SMARTDevice device SMARTDevice
} }
func extractDiskName(input string) string {
re := regexp.MustCompile(`^(?:/dev/\S+/\S+\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
match := re.FindStringSubmatch(input)
if len(match) > 0 {
return match[re.SubexpIndex("disk")]
}
return ""
}
// NewSMARTctl is smartctl constructor // NewSMARTctl is smartctl constructor
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl { func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
var model_name string var model_name string
@ -60,7 +71,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
json: json, json: json,
logger: logger, logger: logger,
device: SMARTDevice{ device: SMARTDevice{
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"), device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
serial: strings.TrimSpace(json.Get("serial_number").String()), serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")), family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(model_name), model: strings.TrimSpace(model_name),