diff --git a/.gitignore b/.gitignore index 4e0aca0..36784d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ vendor bin +debug *.json Manifest diff --git a/Makefile b/Makefile index fb57953..1b88cc8 100644 --- a/Makefile +++ b/Makefile @@ -31,3 +31,8 @@ example: @echo '```' >> EXAMPLE.md @curl -s localhost:9633/metrics | grep smartctl >> EXAMPLE.md @echo '```' >> EXAMPLE.md + +collect_fake_json: + -mkdir debug + -rm -f debug/*json + sudo ./collect_fake_json.sh diff --git a/collect_fake_json.sh b/collect_fake_json.sh index 87ed1c7..1b8abb4 100755 --- a/collect_fake_json.sh +++ b/collect_fake_json.sh @@ -2,5 +2,5 @@ for device in $(smartctl --scan | awk '{ print $1}') do - smartctl --json --xall $device | jq > $(basename $device).json + smartctl --json --xall $device | jq > debug/$(basename $device).json done diff --git a/Dockerfile b/docker/Dockerfile similarity index 100% rename from Dockerfile rename to docker/Dockerfile diff --git a/config.yaml b/docker/config.yaml similarity index 100% rename from config.yaml rename to docker/config.yaml diff --git a/docker-entrypoint.sh b/docker/docker-entrypoint.sh similarity index 100% rename from docker-entrypoint.sh rename to docker/docker-entrypoint.sh diff --git a/main.go b/main.go index 4c769b4..6e36426 100644 --- a/main.go +++ b/main.go @@ -26,10 +26,13 @@ func (i SMARTctlManagerCollector) Describe(ch chan<- *prometheus.Desc) { func (i SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) { info := NewSMARTctlInfo(ch) for _, device := range options.SMARTctl.Devices { - json := readData(device) - info.SetJSON(json) - smart := NewSMARTctl(json, ch) - smart.Collect() + if json, err := readData(device); err == nil { + info.SetJSON(json) + smart := NewSMARTctl(json, ch) + smart.Collect() + } else { + logger.Error(err.Error()) + } } info.Collect() } diff --git a/readjson.go b/readjson.go index 39141e1..fb8935e 100644 --- a/readjson.go +++ b/readjson.go @@ -3,6 +3,7 @@ package main import ( "fmt" "io/ioutil" + "os" "os/exec" "strings" "time" @@ -35,7 +36,7 @@ func parseJSON(data string) gjson.Result { // Reading fake smartctl json func readFakeSMARTctl(device string) gjson.Result { splitted := strings.Split(device, "/") - filename := fmt.Sprintf("%s.json", splitted[len(splitted)-1]) + filename := fmt.Sprintf("debug/%s.json", splitted[len(splitted)-1]) logger.Verbose("Read fake S.M.A.R.T. data from json: %s", filename) jsonFile, err := ioutil.ReadFile(filename) if err != nil { @@ -46,30 +47,93 @@ func readFakeSMARTctl(device string) gjson.Result { } // Get json from smartctl and parse it -func readSMARTctl(device string) gjson.Result { +func readSMARTctl(device string) (gjson.Result, bool) { logger.Debug("Collecting S.M.A.R.T. counters, device: %s", device) out, err := exec.Command(options.SMARTctl.SMARTctlLocation, "--json", "--xall", device).Output() if err != nil { logger.Warning("S.M.A.R.T. output reading error: %s", err) } - return parseJSON(string(out)) + json := parseJSON(string(out)) + rcOk := resultCodeIsOk(json.Get("smartctl.exit_status").Int()) + jsonOk := jsonIsOk(json) + return json, rcOk && jsonOk } // Select json source and parse -func readData(device string) gjson.Result { +func readData(device string) (gjson.Result, error) { if options.SMARTctl.FakeJSON { - return readFakeSMARTctl(device) + return readFakeSMARTctl(device), nil } - if value, ok := jsonCache[device]; ok { - // logger.Debug("Cache exists") - if time.Now().After(value.LastCollect.Add(options.SMARTctl.CollectPeriodDuration)) { - // logger.Debug("Cache update") - jsonCache[device] = JSONCache{JSON: readSMARTctl(device), LastCollect: time.Now()} + if _, err := os.Stat(device); err == nil { + cacheValue, cacheOk := jsonCache[device] + timeToScan := false + if cacheOk { + timeToScan = time.Now().After(cacheValue.LastCollect.Add(options.SMARTctl.CollectPeriodDuration)) + } else { + timeToScan = true } - } else { - // logger.Debug("Cache not exists") - jsonCache[device] = JSONCache{JSON: readSMARTctl(device), LastCollect: time.Now()} + + if timeToScan { + json, ok := readSMARTctl(device) + if ok { + jsonCache[device] = JSONCache{JSON: json, LastCollect: time.Now()} + return jsonCache[device].JSON, nil + } + return gjson.Parse("{}"), fmt.Errorf("smartctl returned bad data for device %s", device) + } + return gjson.Parse("{}"), fmt.Errorf("Too early collect called for device %s", device) } - return jsonCache[device].JSON + return gjson.Parse("{}"), fmt.Errorf("Device %s unavialable", device) +} + +// Parse smartctl return code +func resultCodeIsOk(SMARTCtlResult int64) bool { + result := true + if SMARTCtlResult > 0 { + bits := fmt.Sprintf("%08b", SMARTCtlResult) + // logger.Debug("Return code: %d: %s", SMARTCtlResult, bits) + if bits[0] == '1' { + logger.Error("Command line did not parse.") + result = false + } + if bits[1] == '1' { + logger.Error("Device open failed, device did not return an IDENTIFY DEVICE structure, or device is in a low-power mode") + result = false + } + if bits[2] == '1' { + logger.Warning("Some SMART or other ATA command to the disk failed, or there was a checksum error in a SMART data structure") + } + if bits[3] == '1' { + logger.Warning("SMART status check returned 'DISK FAILING'.") + } + if bits[4] == '1' { + logger.Warning("We found prefail Attributes <= threshold.") + } + if bits[5] == '1' { + logger.Warning("SMART status check returned 'DISK OK' but we found that some (usage or prefail) Attributes have been <= threshold at some time in the past.") + } + if bits[6] == '1' { + logger.Warning("The device error log contains records of errors.") + } + if bits[7] == '1' { + logger.Warning("The device self-test log contains records of errors. [ATA only] Failed self-tests outdated by a newer successful extended self-test are ignored.") + } + } + return result +} + +// Check json +func jsonIsOk(json gjson.Result) bool { + messages := json.Get("smartctl.messages") + // logger.Debug(messages.String()) + if messages.Exists() { + for _, message := range messages.Array() { + if message.Get("severity").String() == "error" { + logger.Error(message.Get("string").String()) + return false + } + } + } + return true } diff --git a/smartctl.go b/smartctl.go index 24930ae..6e3f8e6 100644 --- a/smartctl.go +++ b/smartctl.go @@ -34,12 +34,12 @@ func NewSMARTctl(json gjson.Result, ch chan<- prometheus.Metric) SMARTctl { family: strings.TrimSpace(smart.json.Get("model_family").String()), model: strings.TrimSpace(smart.json.Get("model_name").String()), } - logger.Verbose("Collecting metrics from %s: %s, %s", smart.device.device, smart.device.family, smart.device.model) return smart } // Collect metrics func (smart *SMARTctl) Collect() { + logger.Verbose("Collecting metrics from %s: %s, %s", smart.device.device, smart.device.family, smart.device.model) smart.mineExitStatus() smart.mineDevice() smart.mineCapacity() diff --git a/smartctl_exporter.yaml b/smartctl_exporter.yaml index 798037b..3e33048 100644 --- a/smartctl_exporter.yaml +++ b/smartctl_exporter.yaml @@ -1,7 +1,7 @@ smartctl_exporter: bind_to: "[::1]:9633" url_path: "/metrics" - fake_json: yes + fake_json: no smartctl_location: /usr/sbin/smartctl collect_not_more_than_period: 20s devices: @@ -11,3 +11,9 @@ smartctl_exporter: - /dev/sdd - /dev/sde - /dev/sdf + - /dev/sdg + - /dev/sdh + - /dev/sdi + - /dev/sdj + - /dev/sdk + - /dev/sdl