smartctl_exporter/main.go
Konstantin Shalygin 7489f4f7aa
Add support for device types and predictable device paths (rebased) (#257)
* Add better error logging on smartctl exec failure

We will now log a warning if smartctl path passed via command line is invalid.

Signed-off-by: Piotr Dobrowolski <admin@tastycode.pl>
(cherry picked from commit 1c9c6943e8)

* Add support for autoscan device types and predictable device paths

This adds a new command line option allowing for customization of
autodetected device types and enables use of special "by-id" device type
that forces use of predictable device paths (/dev/disk/by-id/...)

Relevant change to device name parsing regular expression is included
now, so predictable device paths are now also usable when directly
specified.

Signed-off-by: Piotr Dobrowolski <admin@tastycode.pl>
(cherry picked from commit 4c5f721e11)

Conflicts:
  - file: 'readjson.go'
    comment: 'manually resolve new logger issues'

* Rework device label, fix SATA discovery, per-device type specification

Signed-off-by: Piotr Dobrowolski <admin@tastycode.pl>
(cherry picked from commit 319184ce66)

Conflicts:
  - file: 'main.go'
    comment: 'manually resolve new logger issues'
  - file: 'readjson.go'
    comment: 'manually resolve new logger issues'

---------

Co-authored-by: Piotr Dobrowolski <admin@tastycode.pl>
2024-12-19 11:02:09 +01:00

253 lines
7.6 KiB
Go

// Copyright 2022 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"log/slog"
"net/http"
"os"
"strings"
"sync"
"time"
kingpin "github.com/alecthomas/kingpin/v2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/promslog/flag"
"github.com/prometheus/common/version"
"github.com/prometheus/exporter-toolkit/web"
webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
)
// Device
type Device struct {
Name string
Type string
Label string
}
func (d Device) String() string {
return d.Name + ";" + d.Type + " (" + d.Label + ")"
}
// SMARTctlManagerCollector implements the Collector interface.
type SMARTctlManagerCollector struct {
CollectPeriod string
CollectPeriodDuration time.Duration
Devices []Device
logger *slog.Logger
mutex sync.Mutex
}
// Describe sends the super-set of all possible descriptors of metrics
func (i *SMARTctlManagerCollector) Describe(ch chan<- *prometheus.Desc) {
prometheus.DescribeByCollect(i, ch)
}
// Collect is called by the Prometheus registry when collecting metrics.
func (i *SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) {
info := NewSMARTctlInfo(ch)
i.mutex.Lock()
for _, device := range i.Devices {
json := readData(i.logger, device)
if json.Exists() {
info.SetJSON(json)
smart := NewSMARTctl(i.logger, json, ch)
smart.Collect()
}
}
ch <- prometheus.MustNewConstMetric(
metricDeviceCount,
prometheus.GaugeValue,
float64(len(i.Devices)),
)
info.Collect()
i.mutex.Unlock()
}
func (i *SMARTctlManagerCollector) RescanForDevices() {
for {
time.Sleep(*smartctlRescanInterval)
i.logger.Info("Rescanning for devices")
devices := scanDevices(i.logger)
devices = buildDevicesFromFlag(devices)
i.mutex.Lock()
i.Devices = devices
i.mutex.Unlock()
}
}
var (
smartctlPath = kingpin.Flag("smartctl.path",
"The path to the smartctl binary",
).Default("/usr/sbin/smartctl").String()
smartctlInterval = kingpin.Flag("smartctl.interval",
"The interval between smartctl polls",
).Default("60s").Duration()
smartctlRescanInterval = kingpin.Flag("smartctl.rescan",
"The interval between rescanning for new/disappeared devices. If the interval is smaller than 1s no rescanning takes place. If any devices are configured with smartctl.device also no rescanning takes place.",
).Default("10m").Duration()
smartctlScan = kingpin.Flag("smartctl.scan", "Enable scanning. This is a default if no devices are specified").Default("false").Bool()
smartctlDevices = kingpin.Flag("smartctl.device",
"The device to monitor. Device type can be specified after a semicolon, eg. '/dev/bus/0;megaraid,1' (repeatable)",
).Strings()
smartctlDeviceExclude = kingpin.Flag(
"smartctl.device-exclude",
"Regexp of devices to exclude from automatic scanning. (mutually exclusive to device-include)",
).Default("").String()
smartctlDeviceInclude = kingpin.Flag(
"smartctl.device-include",
"Regexp of devices to exclude from automatic scanning. (mutually exclusive to device-exclude)",
).Default("").String()
smartctlScanDeviceTypes = kingpin.Flag(
"smartctl.scan-device-type",
"Device type to use during automatic scan. Special by-id value forces predictable device names. (repeatable)",
).Strings()
smartctlFakeData = kingpin.Flag("smartctl.fake-data",
"The device to monitor (repeatable)",
).Default("false").Hidden().Bool()
)
// scanDevices uses smartctl to gather the list of available devices.
func scanDevices(logger *slog.Logger) []Device {
filter := newDeviceFilter(*smartctlDeviceExclude, *smartctlDeviceInclude)
json := readSMARTctlDevices(logger)
scanDevices := json.Get("devices").Array()
var scanDeviceResult []Device
for _, d := range scanDevices {
deviceName := d.Get("name").String()
deviceType := d.Get("type").String()
// SATA devices are reported as SCSI during scan - fallback to auto scraping
if deviceType == "scsi" {
deviceType = "auto"
}
deviceLabel := buildDeviceLabel(deviceName, deviceType)
if filter.ignored(deviceLabel) {
logger.Info("Ignoring device", "name", deviceLabel)
} else {
logger.Info("Found device", "name", deviceLabel)
device := Device{
Name: deviceName,
Type: deviceType,
Label: deviceLabel,
}
scanDeviceResult = append(scanDeviceResult, device)
}
}
return scanDeviceResult
}
func buildDevicesFromFlag(devices []Device) []Device {
// TODO: deduplication?
for _, device := range *smartctlDevices {
deviceName, deviceType, _ := strings.Cut(device, ";")
if deviceType == "" {
deviceType = "auto"
}
devices = append(devices, Device{
Name: deviceName,
Type: deviceType,
Label: buildDeviceLabel(deviceName, deviceType),
})
}
return devices
}
func main() {
metricsPath := kingpin.Flag(
"web.telemetry-path", "Path under which to expose metrics",
).Default("/metrics").String()
toolkitFlags := webflag.AddFlags(kingpin.CommandLine, ":9633")
promslogConfig := &promslog.Config{}
flag.AddFlags(kingpin.CommandLine, promslogConfig)
kingpin.Version(version.Print("smartctl_exporter"))
kingpin.HelpFlag.Short('h')
kingpin.Parse()
logger := promslog.New(promslogConfig)
logger.Info("Starting smartctl_exporter", "version", version.Info())
logger.Info("Build context", "build_context", version.BuildContext())
var devices []Device
if len(*smartctlDevices) == 0 {
*smartctlScan = true
}
if *smartctlScan {
devices = scanDevices(logger)
logger.Info("Number of devices found", "count", len(devices))
}
if len(*smartctlDevices) > 0 {
logger.Info("Devices specified", "devices", strings.Join(*smartctlDevices, ", "))
devices = buildDevicesFromFlag(devices)
logger.Info("Devices filtered", "count", len(devices))
}
collector := SMARTctlManagerCollector{
Devices: devices,
logger: logger,
}
if *smartctlScan && *smartctlRescanInterval >= 1*time.Second {
logger.Info("Start background scan process")
logger.Info("Rescanning for devices every", "rescanInterval", *smartctlRescanInterval)
go collector.RescanForDevices()
}
reg := prometheus.NewPedanticRegistry()
reg.MustRegister(
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
collectors.NewGoCollector(),
)
prometheus.WrapRegistererWithPrefix("", reg).MustRegister(&collector)
http.Handle(*metricsPath, promhttp.HandlerFor(reg, promhttp.HandlerOpts{}))
if *metricsPath != "/" && *metricsPath != "" {
landingConfig := web.LandingConfig{
Name: "smartctl_exporter",
Description: "Prometheus Exporter for S.M.A.R.T. devices",
Version: version.Info(),
Links: []web.LandingLinks{
{
Address: *metricsPath,
Text: "Metrics",
},
},
}
landingPage, err := web.NewLandingPage(landingConfig)
if err != nil {
logger.Error("error creating landing page", "err", err)
os.Exit(1)
}
http.Handle("/", landingPage)
}
srv := &http.Server{}
if err := web.ListenAndServe(srv, toolkitFlags, logger); err != nil {
logger.Error("error running HTTP server", "err", err)
os.Exit(1)
}
}