Implemented new features - extract raid member disk name.

Modified smartctl.device param - now you can set it as sda, megaraid_disk_01, etc.

Signed-off-by: Denys <zxzharmlesszxz@gmail.com>
This commit is contained in:
mort 2024-03-08 15:39:33 +01:00 committed by Denys
parent 84d8cc3d4d
commit 3a012b5bb1
4 changed files with 69 additions and 30 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
/.release
/.tarballs
debug/
.idea/
Manifest
smartctl_exporter

49
main.go
View File

@ -16,6 +16,7 @@ package main
import (
"net/http"
"os"
"strings"
"sync"
"time"
@ -32,11 +33,18 @@ import (
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
)
// Device
type Device struct {
Name string `json:"name"`
Info_Name string `json:"info_name"`
Type string `json:"type"`
}
// SMARTctlManagerCollector implements the Collector interface.
type SMARTctlManagerCollector struct {
CollectPeriod string
CollectPeriodDuration time.Duration
Devices []string
Devices []Device
logger log.Logger
mutex sync.Mutex
@ -106,24 +114,43 @@ var (
)
// scanDevices uses smartctl to gather the list of available devices.
func scanDevices(logger log.Logger) []string {
func scanDevices(logger log.Logger) []Device {
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)
json := readSMARTctlDevices(logger)
scanDevices := json.Get("devices").Array()
var scanDeviceResult []string
var scanDeviceResult []Device
for _, d := range scanDevices {
deviceName := d.Get("name").String()
deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String()))
if filter.ignored(deviceName) {
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
} else {
level.Info(logger).Log("msg", "Found device", "name", deviceName)
scanDeviceResult = append(scanDeviceResult, deviceName)
device := Device{
Name: d.Get("name").String(),
Info_Name: deviceName,
Type: d.Get("type").String(),
}
scanDeviceResult = append(scanDeviceResult, device)
}
}
return scanDeviceResult
}
func filterDevices(logger log.Logger, devices []Device, filters []string) []Device {
var filtered []Device
for _, d := range devices {
for _, filter := range filters {
level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter)
if strings.Contains(d.Info_Name, filter) {
filtered = append(filtered, d)
break
}
}
}
return filtered
}
func main() {
metricsPath := kingpin.Flag(
"web.telemetry-path", "Path under which to expose metrics",
@ -140,13 +167,13 @@ func main() {
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())
var devices []string
if len(*smartctlDevices) > 0 {
devices = *smartctlDevices
} else {
level.Info(logger).Log("msg", "No devices specified, trying to load them automatically")
var devices []Device
devices = scanDevices(logger)
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
if len(*smartctlDevices) > 0 {
level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
devices = filterDevices(logger, devices, *smartctlDevices)
level.Info(logger).Log("msg", "Devices filtered", "count", len(devices))
}
collector := SMARTctlManagerCollector{
@ -154,7 +181,7 @@ func main() {
logger: logger,
}
if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 {
if *smartctlRescanInterval >= 1*time.Second {
level.Info(logger).Log("msg", "Start background scan process")
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
go collector.RescanForDevices()

View File

@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result {
}
// Reading fake smartctl json
func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
s := strings.Split(device, "/")
func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
s := strings.Split(device.Name, "/")
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
jsonFile, err := os.ReadFile(filename)
@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
}
// Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) {
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
start := time.Now()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device.Name, "-d", device.Type).Output()
if err != nil {
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device)
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name)
}
json := parseJSON(string(out))
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json)
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start))
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
return json, rcOk && jsonOk
}
@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
}
// Select json source and parse
func readData(logger log.Logger, device string) gjson.Result {
func readData(logger log.Logger, device Device) gjson.Result {
if *smartctlFakeData {
return readFakeSMARTctl(logger, device)
}
@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
j, found := jsonCache.Load(device)
if !found {
level.Warn(logger).Log("msg", "device not found", "device", device)
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
}
return j.(JSONCache).JSON
}
@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result {
}
// Parse smartctl return code
func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool {
func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool {
result := true
if SMARTCtlResult > 0 {
b := SMARTCtlResult
if (b & 1) != 0 {
level.Error(logger).Log("msg", "Command line did not parse", "device", device)
level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name)
result = false
}
if (b & (1 << 1)) != 0 {
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device)
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name)
result = false
}
if (b & (1 << 2)) != 0 {
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device)
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name)
}
if (b & (1 << 3)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device)
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name)
}
if (b & (1 << 4)) != 0 {
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device)
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name)
}
if (b & (1 << 5)) != 0 {
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device)
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name)
}
if (b & (1 << 6)) != 0 {
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device)
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name)
}
if (b & (1 << 7)) != 0 {
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device)
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name)
}
}
return result

View File

@ -15,6 +15,7 @@ package main
import (
"fmt"
"regexp"
"strings"
"github.com/go-kit/log"
@ -42,6 +43,16 @@ type SMARTctl struct {
device SMARTDevice
}
func extractDiskName(input string) string {
re := regexp.MustCompile(`^(?:/dev/\S+/\S+\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
match := re.FindStringSubmatch(input)
if len(match) > 0 {
return match[re.SubexpIndex("disk")]
}
return ""
}
// NewSMARTctl is smartctl constructor
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
var model_name string
@ -60,7 +71,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
json: json,
logger: logger,
device: SMARTDevice{
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
serial: strings.TrimSpace(json.Get("serial_number").String()),
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
model: strings.TrimSpace(model_name),