Parsing smartctl error code; parsing resulting json for smartctl errors; docker moved to subfolder

This commit is contained in:
Горлов Максим 2020-10-30 00:35:49 +03:00
parent 09cfdec25a
commit cbc437fea9
10 changed files with 100 additions and 21 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
vendor vendor
bin bin
debug
*.json *.json
Manifest Manifest

View File

@ -31,3 +31,8 @@ example:
@echo '```' >> EXAMPLE.md @echo '```' >> EXAMPLE.md
@curl -s localhost:9633/metrics | grep smartctl >> EXAMPLE.md @curl -s localhost:9633/metrics | grep smartctl >> EXAMPLE.md
@echo '```' >> EXAMPLE.md @echo '```' >> EXAMPLE.md
collect_fake_json:
-mkdir debug
-rm -f debug/*json
sudo ./collect_fake_json.sh

View File

@ -2,5 +2,5 @@
for device in $(smartctl --scan | awk '{ print $1}') for device in $(smartctl --scan | awk '{ print $1}')
do do
smartctl --json --xall $device | jq > $(basename $device).json smartctl --json --xall $device | jq > debug/$(basename $device).json
done done

11
main.go
View File

@ -26,10 +26,13 @@ func (i SMARTctlManagerCollector) Describe(ch chan<- *prometheus.Desc) {
func (i SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) { func (i SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) {
info := NewSMARTctlInfo(ch) info := NewSMARTctlInfo(ch)
for _, device := range options.SMARTctl.Devices { for _, device := range options.SMARTctl.Devices {
json := readData(device) if json, err := readData(device); err == nil {
info.SetJSON(json) info.SetJSON(json)
smart := NewSMARTctl(json, ch) smart := NewSMARTctl(json, ch)
smart.Collect() smart.Collect()
} else {
logger.Error(err.Error())
}
} }
info.Collect() info.Collect()
} }

View File

@ -3,6 +3,7 @@ package main
import ( import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os"
"os/exec" "os/exec"
"strings" "strings"
"time" "time"
@ -35,7 +36,7 @@ func parseJSON(data string) gjson.Result {
// Reading fake smartctl json // Reading fake smartctl json
func readFakeSMARTctl(device string) gjson.Result { func readFakeSMARTctl(device string) gjson.Result {
splitted := strings.Split(device, "/") splitted := strings.Split(device, "/")
filename := fmt.Sprintf("%s.json", splitted[len(splitted)-1]) filename := fmt.Sprintf("debug/%s.json", splitted[len(splitted)-1])
logger.Verbose("Read fake S.M.A.R.T. data from json: %s", filename) logger.Verbose("Read fake S.M.A.R.T. data from json: %s", filename)
jsonFile, err := ioutil.ReadFile(filename) jsonFile, err := ioutil.ReadFile(filename)
if err != nil { if err != nil {
@ -46,30 +47,93 @@ func readFakeSMARTctl(device string) gjson.Result {
} }
// Get json from smartctl and parse it // Get json from smartctl and parse it
func readSMARTctl(device string) gjson.Result { func readSMARTctl(device string) (gjson.Result, bool) {
logger.Debug("Collecting S.M.A.R.T. counters, device: %s", device) logger.Debug("Collecting S.M.A.R.T. counters, device: %s", device)
out, err := exec.Command(options.SMARTctl.SMARTctlLocation, "--json", "--xall", device).Output() out, err := exec.Command(options.SMARTctl.SMARTctlLocation, "--json", "--xall", device).Output()
if err != nil { if err != nil {
logger.Warning("S.M.A.R.T. output reading error: %s", err) logger.Warning("S.M.A.R.T. output reading error: %s", err)
} }
return parseJSON(string(out)) json := parseJSON(string(out))
rcOk := resultCodeIsOk(json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(json)
return json, rcOk && jsonOk
} }
// Select json source and parse // Select json source and parse
func readData(device string) gjson.Result { func readData(device string) (gjson.Result, error) {
if options.SMARTctl.FakeJSON { if options.SMARTctl.FakeJSON {
return readFakeSMARTctl(device) return readFakeSMARTctl(device), nil
} }
if value, ok := jsonCache[device]; ok { if _, err := os.Stat(device); err == nil {
// logger.Debug("Cache exists") cacheValue, cacheOk := jsonCache[device]
if time.Now().After(value.LastCollect.Add(options.SMARTctl.CollectPeriodDuration)) { timeToScan := false
// logger.Debug("Cache update") if cacheOk {
jsonCache[device] = JSONCache{JSON: readSMARTctl(device), LastCollect: time.Now()} timeToScan = time.Now().After(cacheValue.LastCollect.Add(options.SMARTctl.CollectPeriodDuration))
} else {
timeToScan = true
} }
} else {
// logger.Debug("Cache not exists") if timeToScan {
jsonCache[device] = JSONCache{JSON: readSMARTctl(device), LastCollect: time.Now()} json, ok := readSMARTctl(device)
if ok {
jsonCache[device] = JSONCache{JSON: json, LastCollect: time.Now()}
return jsonCache[device].JSON, nil
}
return gjson.Parse("{}"), fmt.Errorf("smartctl returned bad data for device %s", device)
}
return gjson.Parse("{}"), fmt.Errorf("Too early collect called for device %s", device)
} }
return jsonCache[device].JSON return gjson.Parse("{}"), fmt.Errorf("Device %s unavialable", device)
}
// Parse smartctl return code
func resultCodeIsOk(SMARTCtlResult int64) bool {
result := true
if SMARTCtlResult > 0 {
bits := fmt.Sprintf("%08b", SMARTCtlResult)
// logger.Debug("Return code: %d: %s", SMARTCtlResult, bits)
if bits[0] == '1' {
logger.Error("Command line did not parse.")
result = false
}
if bits[1] == '1' {
logger.Error("Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode")
result = false
}
if bits[2] == '1' {
logger.Warning("Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure")
}
if bits[3] == '1' {
logger.Warning("SMART status check returned 'DISK FAILING'.")
}
if bits[4] == '1' {
logger.Warning("We found prefail Attributes <= threshold.")
}
if bits[5] == '1' {
logger.Warning("SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past.")
}
if bits[6] == '1' {
logger.Warning("The device error log contains records of errors.")
}
if bits[7] == '1' {
logger.Warning("The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored.")
}
}
return result
}
// Check json
func jsonIsOk(json gjson.Result) bool {
messages := json.Get("smartctl.messages")
// logger.Debug(messages.String())
if messages.Exists() {
for _, message := range messages.Array() {
if message.Get("severity").String() == "error" {
logger.Error(message.Get("string").String())
return false
}
}
}
return true
} }

View File

@ -34,12 +34,12 @@ func NewSMARTctl(json gjson.Result, ch chan<- prometheus.Metric) SMARTctl {
family: strings.TrimSpace(smart.json.Get("model_family").String()), family: strings.TrimSpace(smart.json.Get("model_family").String()),
model: strings.TrimSpace(smart.json.Get("model_name").String()), model: strings.TrimSpace(smart.json.Get("model_name").String()),
} }
logger.Verbose("Collecting metrics from %s: %s, %s", smart.device.device, smart.device.family, smart.device.model)
return smart return smart
} }
// Collect metrics // Collect metrics
func (smart *SMARTctl) Collect() { func (smart *SMARTctl) Collect() {
logger.Verbose("Collecting metrics from %s: %s, %s", smart.device.device, smart.device.family, smart.device.model)
smart.mineExitStatus() smart.mineExitStatus()
smart.mineDevice() smart.mineDevice()
smart.mineCapacity() smart.mineCapacity()

View File

@ -1,7 +1,7 @@
smartctl_exporter: smartctl_exporter:
bind_to: "[::1]:9633" bind_to: "[::1]:9633"
url_path: "/metrics" url_path: "/metrics"
fake_json: yes fake_json: no
smartctl_location: /usr/sbin/smartctl smartctl_location: /usr/sbin/smartctl
collect_not_more_than_period: 20s collect_not_more_than_period: 20s
devices: devices:
@ -11,3 +11,9 @@ smartctl_exporter:
- /dev/sdd - /dev/sdd
- /dev/sde - /dev/sde
- /dev/sdf - /dev/sdf
- /dev/sdg
- /dev/sdh
- /dev/sdi
- /dev/sdj
- /dev/sdk
- /dev/sdl