Merge branch 'chripede-master' into merging_commits

This commit is contained in:
Горлов Максим 2020-11-14 18:30:46 +03:00
commit e2df698099
6 changed files with 247 additions and 8 deletions

2
.gitignore vendored
View File

@ -3,3 +3,5 @@ bin
*.json
Manifest
.idea
smartctl_exporter

11
main.go
View File

@ -36,6 +36,17 @@ func (i SMARTctlManagerCollector) Collect(ch chan<- prometheus.Metric) {
func init() {
options = loadOptions()
if len(options.SMARTctl.Devices) == 0 {
logger.Debug("No devices specified, trying to load them automatically")
json := readSMARTctlDevices()
devices := json.Get("devices").Array()
for _, d := range devices {
device := d.Get("name").String()
logger.Debug("Found device: %s", device)
options.SMARTctl.Devices = append(options.SMARTctl.Devices, device)
}
}
}
func main() {

View File

@ -140,6 +140,105 @@ var (
},
nil,
)
metricDevicePercentageUsed = prometheus.NewDesc(
"smartctl_device_percentage_used",
"Device write percentage used",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceAvailableSpare = prometheus.NewDesc(
"smartctl_device_available_spare",
"Normalized percentage (0 to 100%) of the remaining spare capacity available",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceAvailableSpareThreshold = prometheus.NewDesc(
"smartctl_device_available_spare_threshold",
"When the Available Spare falls below the threshold indicated in this field, an asynchronous event completion may occur. The value is indicated as a normalized percentage (0 to 100%)",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceCriticalWarning = prometheus.NewDesc(
"smartctl_device_critical_warning",
"This field indicates critical warnings for the state of the controller",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceMediaErrors = prometheus.NewDesc(
"smartctl_device_media_errors",
"Contains the number of occurrences where the controller detected an unrecovered data integrity error. Errors such as uncorrectable ECC, CRC checksum failure, or LBA tag mismatch are included in this field",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceNumErrLogEntries = prometheus.NewDesc(
"smartctl_device_num_err_log_entries",
"Contains the number of Error Information log entries over the life of the controller",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceBytesRead = prometheus.NewDesc(
"smartctl_device_bytes_read",
"",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceBytesWritten = prometheus.NewDesc(
"smartctl_device_bytes_written",
"",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceSmartStatus = prometheus.NewDesc(
"smartctl_device_smart_status",
"General smart status",
[]string{
"device",
"model_family",
"model_name",
"serial_number",
},
nil,
)
metricDeviceExitStatus = prometheus.NewDesc(
"smartctl_device_smartctl_exit_status",
"Exit status of smartctl on device",

View File

@ -55,6 +55,15 @@ func readSMARTctl(device string) gjson.Result {
return parseJSON(string(out))
}
func readSMARTctlDevices() gjson.Result {
logger.Debug("Collecting devices")
out, err := exec.Command(options.SMARTctl.SMARTctlLocation, "--json", "--scan-open").Output()
if err != nil {
logger.Warning("S.M.A.R.T. output reading error: %s", err)
}
return parseJSON(string(out))
}
// Select json source and parse
func readData(device string) gjson.Result {
if options.SMARTctl.FakeJSON {

View File

@ -57,6 +57,16 @@ func (smart *SMARTctl) Collect() {
smart.mineDeviceErrorLog()
smart.mineDeviceSelfTestLog()
smart.mineDeviceERC()
smart.minePercentageUsed()
smart.mineAvailableSpare()
smart.mineAvailableSpareThreshold()
smart.mineCriticalWarning()
smart.mineMediaErrors()
smart.mineNumErrLogEntries()
smart.mineBytesRead()
smart.mineBytesWritten()
smart.mineSmartStatus()
}
func (smart *SMARTctl) mineExitStatus() {
@ -252,6 +262,116 @@ func (smart *SMARTctl) mineDeviceSCTStatus() {
}
}
func (smart *SMARTctl) minePercentageUsed() {
smart.ch <- prometheus.MustNewConstMetric(
metricDevicePercentageUsed,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.percentage_used").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineAvailableSpare() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceAvailableSpare,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.available_spare").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineAvailableSpareThreshold() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceAvailableSpareThreshold,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.available_spare_threshold").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineCriticalWarning() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceCriticalWarning,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.critical_warning").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineMediaErrors() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceMediaErrors,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.media_errors").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineNumErrLogEntries() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceNumErrLogEntries,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.num_err_log_entries").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineBytesRead() {
blockSize := smart.json.Get("logical_block_size").Float() * 1024
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBytesRead,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.data_units_read").Float()*blockSize,
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineBytesWritten() {
blockSize := smart.json.Get("logical_block_size").Float() * 1024
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceBytesWritten,
prometheus.CounterValue,
smart.json.Get("nvme_smart_health_information_log.data_units_written").Float()*blockSize,
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineSmartStatus() {
smart.ch <- prometheus.MustNewConstMetric(
metricDeviceSmartStatus,
prometheus.GaugeValue,
smart.json.Get("smart_status.passed").Float(),
smart.device.device,
smart.device.family,
smart.device.model,
smart.device.serial,
)
}
func (smart *SMARTctl) mineDeviceStatistics() {
for _, page := range smart.json.Get("ata_device_statistics.pages").Array() {
table := strings.TrimSpace(page.Get("name").String())

14
smartctl_exporter.yaml Normal file → Executable file
View File

@ -1,13 +1,11 @@
smartctl_exporter:
bind_to: "[::1]:9633"
bind_to: "0.0.0.0:9633"
url_path: "/metrics"
fake_json: no
smartctl_location: /usr/sbin/smartctl
collect_not_more_than_period: 20s
devices:
- /dev/sda
- /dev/sdb
- /dev/sdc
- /dev/sdd
- /dev/sde
- /dev/sdf
# devices:
# - /dev/nvme0
# - /dev/nvme1
# - /dev/nvme2
# - /dev/nvme3