mirror of
https://github.com/prometheus-community/smartctl_exporter.git
synced 2024-11-23 01:43:07 +01:00
Implemented new features - extract raid member disk name.
Modified smartctl.device param - now you can set it as sda, megaraid_disk_01, etc. Signed-off-by: Denys <zxzharmlesszxz@gmail.com>
This commit is contained in:
parent
84d8cc3d4d
commit
3a012b5bb1
4 changed files with 69 additions and 30 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,6 +3,7 @@
|
||||||
/.release
|
/.release
|
||||||
/.tarballs
|
/.tarballs
|
||||||
debug/
|
debug/
|
||||||
|
.idea/
|
||||||
|
|
||||||
Manifest
|
Manifest
|
||||||
smartctl_exporter
|
smartctl_exporter
|
||||||
|
|
51
main.go
51
main.go
|
@ -16,6 +16,7 @@ package main
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -32,11 +33,18 @@ import (
|
||||||
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
|
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Device
|
||||||
|
type Device struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Info_Name string `json:"info_name"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
}
|
||||||
|
|
||||||
// SMARTctlManagerCollector implements the Collector interface.
|
// SMARTctlManagerCollector implements the Collector interface.
|
||||||
type SMARTctlManagerCollector struct {
|
type SMARTctlManagerCollector struct {
|
||||||
CollectPeriod string
|
CollectPeriod string
|
||||||
CollectPeriodDuration time.Duration
|
CollectPeriodDuration time.Duration
|
||||||
Devices []string
|
Devices []Device
|
||||||
|
|
||||||
logger log.Logger
|
logger log.Logger
|
||||||
mutex sync.Mutex
|
mutex sync.Mutex
|
||||||
|
@ -106,24 +114,43 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
// scanDevices uses smartctl to gather the list of available devices.
|
// scanDevices uses smartctl to gather the list of available devices.
|
||||||
func scanDevices(logger log.Logger) []string {
|
func scanDevices(logger log.Logger) []Device {
|
||||||
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)
|
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)
|
||||||
|
|
||||||
json := readSMARTctlDevices(logger)
|
json := readSMARTctlDevices(logger)
|
||||||
scanDevices := json.Get("devices").Array()
|
scanDevices := json.Get("devices").Array()
|
||||||
var scanDeviceResult []string
|
var scanDeviceResult []Device
|
||||||
for _, d := range scanDevices {
|
for _, d := range scanDevices {
|
||||||
deviceName := d.Get("name").String()
|
deviceName := extractDiskName(strings.TrimSpace(d.Get("info_name").String()))
|
||||||
if filter.ignored(deviceName) {
|
if filter.ignored(deviceName) {
|
||||||
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
|
level.Info(logger).Log("msg", "Ignoring device", "name", deviceName)
|
||||||
} else {
|
} else {
|
||||||
level.Info(logger).Log("msg", "Found device", "name", deviceName)
|
level.Info(logger).Log("msg", "Found device", "name", deviceName)
|
||||||
scanDeviceResult = append(scanDeviceResult, deviceName)
|
device := Device{
|
||||||
|
Name: d.Get("name").String(),
|
||||||
|
Info_Name: deviceName,
|
||||||
|
Type: d.Get("type").String(),
|
||||||
|
}
|
||||||
|
scanDeviceResult = append(scanDeviceResult, device)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return scanDeviceResult
|
return scanDeviceResult
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func filterDevices(logger log.Logger, devices []Device, filters []string) []Device {
|
||||||
|
var filtered []Device
|
||||||
|
for _, d := range devices {
|
||||||
|
for _, filter := range filters {
|
||||||
|
level.Debug(logger).Log("msg", "filterDevices", "device", d.Info_Name, "filter", filter)
|
||||||
|
if strings.Contains(d.Info_Name, filter) {
|
||||||
|
filtered = append(filtered, d)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
metricsPath := kingpin.Flag(
|
metricsPath := kingpin.Flag(
|
||||||
"web.telemetry-path", "Path under which to expose metrics",
|
"web.telemetry-path", "Path under which to expose metrics",
|
||||||
|
@ -140,13 +167,13 @@ func main() {
|
||||||
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
|
level.Info(logger).Log("msg", "Starting smartctl_exporter", "version", version.Info())
|
||||||
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())
|
level.Info(logger).Log("msg", "Build context", "build_context", version.BuildContext())
|
||||||
|
|
||||||
var devices []string
|
var devices []Device
|
||||||
|
devices = scanDevices(logger)
|
||||||
|
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
|
||||||
if len(*smartctlDevices) > 0 {
|
if len(*smartctlDevices) > 0 {
|
||||||
devices = *smartctlDevices
|
level.Info(logger).Log("msg", "Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
|
||||||
} else {
|
devices = filterDevices(logger, devices, *smartctlDevices)
|
||||||
level.Info(logger).Log("msg", "No devices specified, trying to load them automatically")
|
level.Info(logger).Log("msg", "Devices filtered", "count", len(devices))
|
||||||
devices = scanDevices(logger)
|
|
||||||
level.Info(logger).Log("msg", "Number of devices found", "count", len(devices))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
collector := SMARTctlManagerCollector{
|
collector := SMARTctlManagerCollector{
|
||||||
|
@ -154,7 +181,7 @@ func main() {
|
||||||
logger: logger,
|
logger: logger,
|
||||||
}
|
}
|
||||||
|
|
||||||
if *smartctlRescanInterval >= 1*time.Second && len(*smartctlDevices) == 0 {
|
if *smartctlRescanInterval >= 1*time.Second {
|
||||||
level.Info(logger).Log("msg", "Start background scan process")
|
level.Info(logger).Log("msg", "Start background scan process")
|
||||||
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
|
level.Info(logger).Log("msg", "Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
|
||||||
go collector.RescanForDevices()
|
go collector.RescanForDevices()
|
||||||
|
|
34
readjson.go
34
readjson.go
|
@ -49,8 +49,8 @@ func parseJSON(data string) gjson.Result {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reading fake smartctl json
|
// Reading fake smartctl json
|
||||||
func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
|
func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
|
||||||
s := strings.Split(device, "/")
|
s := strings.Split(device.Name, "/")
|
||||||
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
|
filename := fmt.Sprintf("debug/%s.json", s[len(s)-1])
|
||||||
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
|
level.Debug(logger).Log("msg", "Read fake S.M.A.R.T. data from json", "filename", filename)
|
||||||
jsonFile, err := os.ReadFile(filename)
|
jsonFile, err := os.ReadFile(filename)
|
||||||
|
@ -62,16 +62,16 @@ func readFakeSMARTctl(logger log.Logger, device string) gjson.Result {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get json from smartctl and parse it
|
// Get json from smartctl and parse it
|
||||||
func readSMARTctl(logger log.Logger, device string) (gjson.Result, bool) {
|
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device).Output()
|
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", device.Name, "-d", device.Type).Output()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device)
|
level.Warn(logger).Log("msg", "S.M.A.R.T. output reading", "err", err, "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
json := parseJSON(string(out))
|
json := parseJSON(string(out))
|
||||||
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
|
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
|
||||||
jsonOk := jsonIsOk(logger, json)
|
jsonOk := jsonIsOk(logger, json)
|
||||||
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device, "duration", time.Since(start))
|
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
|
||||||
return json, rcOk && jsonOk
|
return json, rcOk && jsonOk
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Select json source and parse
|
// Select json source and parse
|
||||||
func readData(logger log.Logger, device string) gjson.Result {
|
func readData(logger log.Logger, device Device) gjson.Result {
|
||||||
if *smartctlFakeData {
|
if *smartctlFakeData {
|
||||||
return readFakeSMARTctl(logger, device)
|
return readFakeSMARTctl(logger, device)
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,7 @@ func readData(logger log.Logger, device string) gjson.Result {
|
||||||
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
|
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
|
||||||
j, found := jsonCache.Load(device)
|
j, found := jsonCache.Load(device)
|
||||||
if !found {
|
if !found {
|
||||||
level.Warn(logger).Log("msg", "device not found", "device", device)
|
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
return j.(JSONCache).JSON
|
return j.(JSONCache).JSON
|
||||||
}
|
}
|
||||||
|
@ -112,35 +112,35 @@ func readData(logger log.Logger, device string) gjson.Result {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse smartctl return code
|
// Parse smartctl return code
|
||||||
func resultCodeIsOk(logger log.Logger, device string, SMARTCtlResult int64) bool {
|
func resultCodeIsOk(logger log.Logger, device Device, SMARTCtlResult int64) bool {
|
||||||
result := true
|
result := true
|
||||||
if SMARTCtlResult > 0 {
|
if SMARTCtlResult > 0 {
|
||||||
b := SMARTCtlResult
|
b := SMARTCtlResult
|
||||||
if (b & 1) != 0 {
|
if (b & 1) != 0 {
|
||||||
level.Error(logger).Log("msg", "Command line did not parse", "device", device)
|
level.Error(logger).Log("msg", "Command line did not parse", "device", device.Info_Name)
|
||||||
result = false
|
result = false
|
||||||
}
|
}
|
||||||
if (b & (1 << 1)) != 0 {
|
if (b & (1 << 1)) != 0 {
|
||||||
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device)
|
level.Error(logger).Log("msg", "Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode", "device", device.Info_Name)
|
||||||
result = false
|
result = false
|
||||||
}
|
}
|
||||||
if (b & (1 << 2)) != 0 {
|
if (b & (1 << 2)) != 0 {
|
||||||
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device)
|
level.Warn(logger).Log("msg", "Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
if (b & (1 << 3)) != 0 {
|
if (b & (1 << 3)) != 0 {
|
||||||
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device)
|
level.Warn(logger).Log("msg", "SMART status check returned 'DISK FAILING'", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
if (b & (1 << 4)) != 0 {
|
if (b & (1 << 4)) != 0 {
|
||||||
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device)
|
level.Warn(logger).Log("msg", "We found prefail Attributes <= threshold", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
if (b & (1 << 5)) != 0 {
|
if (b & (1 << 5)) != 0 {
|
||||||
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device)
|
level.Warn(logger).Log("msg", "SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
if (b & (1 << 6)) != 0 {
|
if (b & (1 << 6)) != 0 {
|
||||||
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device)
|
level.Warn(logger).Log("msg", "The device error log contains records of errors", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
if (b & (1 << 7)) != 0 {
|
if (b & (1 << 7)) != 0 {
|
||||||
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device)
|
level.Warn(logger).Log("msg", "The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored", "device", device.Info_Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
|
13
smartctl.go
13
smartctl.go
|
@ -15,6 +15,7 @@ package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/go-kit/log"
|
"github.com/go-kit/log"
|
||||||
|
@ -42,6 +43,16 @@ type SMARTctl struct {
|
||||||
device SMARTDevice
|
device SMARTDevice
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func extractDiskName(input string) string {
|
||||||
|
re := regexp.MustCompile(`^(?:/dev/\S+/\S+\s\[|/dev/|\[)(?:\s\[|)(?P<disk>[a-z0-9_]+)(?:\].*|)$`)
|
||||||
|
match := re.FindStringSubmatch(input)
|
||||||
|
|
||||||
|
if len(match) > 0 {
|
||||||
|
return match[re.SubexpIndex("disk")]
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// NewSMARTctl is smartctl constructor
|
// NewSMARTctl is smartctl constructor
|
||||||
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
|
func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
|
||||||
var model_name string
|
var model_name string
|
||||||
|
@ -60,7 +71,7 @@ func NewSMARTctl(logger log.Logger, json gjson.Result, ch chan<- prometheus.Metr
|
||||||
json: json,
|
json: json,
|
||||||
logger: logger,
|
logger: logger,
|
||||||
device: SMARTDevice{
|
device: SMARTDevice{
|
||||||
device: strings.TrimPrefix(strings.TrimSpace(json.Get("device.name").String()), "/dev/"),
|
device: extractDiskName(strings.TrimSpace(json.Get("device.info_name").String())),
|
||||||
serial: strings.TrimSpace(json.Get("serial_number").String()),
|
serial: strings.TrimSpace(json.Get("serial_number").String()),
|
||||||
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
|
family: strings.TrimSpace(GetStringIfExists(json, "model_family", "unknown")),
|
||||||
model: strings.TrimSpace(model_name),
|
model: strings.TrimSpace(model_name),
|
||||||
|
|
Loading…
Reference in a new issue