Use worker pool to make running of smartctl parallel (for issue #197).

Signed-off-by: Póka Balázs <poka@idata.hu>
This commit is contained in:
Póka Balázs 2024-02-27 21:13:19 +01:00
parent 2cc2249821
commit f25870d8b6
2 changed files with 49 additions and 13 deletions

View File

@ -59,6 +59,7 @@ func (i *SMARTctlManagerCollector) Describe(ch chan<- *prometheus.Desc) {
func (i *SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) {
info := NewSMARTctlInfo(ch)
i.mutex.Lock()
refreshAllDevices(i.logger, i.Devices)
for _, device := range i.Devices {
json := readData(i.logger, device)
if json.Exists() {

View File

@ -32,6 +32,17 @@ type JSONCache struct {
LastCollect time.Time
}
type SMARTresult struct {
device Device
JSON gjson.Result
ok bool
}
type SMARTctlWorkerPool struct {
results chan SMARTresult
expected int
}
var (
jsonCache sync.Map
)
@ -40,6 +51,10 @@ func init() {
jsonCache.Store("", JSONCache{})
}
func createPool() SMARTctlWorkerPool {
return SMARTctlWorkerPool{make(chan SMARTresult), 0}
}
// Parse json to gjson object
func parseJSON(data string) gjson.Result {
if !gjson.Valid(data) {
@ -62,7 +77,7 @@ func readFakeSMARTctl(logger log.Logger, device Device) gjson.Result {
}
// Get json from smartctl and parse it
func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
func readSMARTctl(logger log.Logger, device Device, results chan<- SMARTresult) {
start := time.Now()
out, err := exec.Command(*smartctlPath, "--json", "--info", "--health", "--attributes", "--tolerance=verypermissive", "--nocheck=standby", "--format=brief", "--log=error", "--device="+device.Type, device.Name).Output()
if err != nil {
@ -72,7 +87,7 @@ func readSMARTctl(logger log.Logger, device Device) (gjson.Result, bool) {
rcOk := resultCodeIsOk(logger, device, json.Get("smartctl.exit_status").Int())
jsonOk := jsonIsOk(logger, json)
level.Debug(logger).Log("msg", "Collected S.M.A.R.T. json data", "device", device.Info_Name, "duration", time.Since(start))
return json, rcOk && jsonOk
results <- SMARTresult{device, json, rcOk && jsonOk}
}
func readSMARTctlDevices(logger log.Logger) gjson.Result {
@ -89,23 +104,43 @@ func readSMARTctlDevices(logger log.Logger) gjson.Result {
return parseJSON(string(out))
}
// Refresh all devices' json
func refreshAllDevices(logger log.Logger, devices []Device) {
if *smartctlFakeData {
return
}
pool := createPool()
for _, device := range devices {
refreshData(logger, device, &pool)
}
for pool.expected > 0 {
result := <-pool.results
if result.ok {
jsonCache.Store(result.device, JSONCache{JSON: result.JSON, LastCollect: time.Now()})
}
pool.expected--
}
close(pool.results)
}
// Select json source and parse
func refreshData(logger log.Logger, device Device, pool *SMARTctlWorkerPool) {
cacheValue, cacheOk := jsonCache.Load(device)
if !cacheOk || time.Now().After(cacheValue.(JSONCache).LastCollect.Add(*smartctlInterval)) {
go readSMARTctl(logger, device, pool.results)
pool.expected++
}
}
func readData(logger log.Logger, device Device) gjson.Result {
if *smartctlFakeData {
return readFakeSMARTctl(logger, device)
}
cacheValue, cacheOk := jsonCache.Load(device)
if !cacheOk || time.Now().After(cacheValue.(JSONCache).LastCollect.Add(*smartctlInterval)) {
json, ok := readSMARTctl(logger, device)
if ok {
jsonCache.Store(device, JSONCache{JSON: json, LastCollect: time.Now()})
j, found := jsonCache.Load(device)
cacheValue, found := jsonCache.Load(device)
if !found {
level.Warn(logger).Log("msg", "device not found", "device", device.Info_Name)
}
return j.(JSONCache).JSON
}
return gjson.Result{}
}
return cacheValue.(JSONCache).JSON