Fix and simplify parsing of raid metrics
Fixes the wrong reporting of active+total disk metrics for inactive raids. Also simplifies the code and removes a couple of redundant comments.pull/522/head
parent
faa7483215
commit
0400e437be
|
@ -798,7 +798,7 @@ node_md_disks{device="md10"} 2
|
|||
node_md_disks{device="md11"} 2
|
||||
node_md_disks{device="md12"} 2
|
||||
node_md_disks{device="md127"} 2
|
||||
node_md_disks{device="md219"} 2
|
||||
node_md_disks{device="md219"} 0
|
||||
node_md_disks{device="md3"} 8
|
||||
node_md_disks{device="md4"} 2
|
||||
node_md_disks{device="md6"} 2
|
||||
|
@ -813,7 +813,7 @@ node_md_disks_active{device="md10"} 2
|
|||
node_md_disks_active{device="md11"} 2
|
||||
node_md_disks_active{device="md12"} 2
|
||||
node_md_disks_active{device="md127"} 2
|
||||
node_md_disks_active{device="md219"} 2
|
||||
node_md_disks_active{device="md219"} 0
|
||||
node_md_disks_active{device="md3"} 8
|
||||
node_md_disks_active{device="md4"} 2
|
||||
node_md_disks_active{device="md6"} 1
|
||||
|
|
|
@ -36,8 +36,8 @@ var (
|
|||
)
|
||||
|
||||
type mdStatus struct {
|
||||
mdName string
|
||||
isActive bool
|
||||
name string
|
||||
active bool
|
||||
disksActive int64
|
||||
disksTotal int64
|
||||
blocksTotal int64
|
||||
|
@ -136,97 +136,78 @@ func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) {
|
|||
return []mdStatus{}, fmt.Errorf("error parsing mdstat: %s", err)
|
||||
}
|
||||
|
||||
mdStatusFile := string(content)
|
||||
|
||||
lines := strings.Split(mdStatusFile, "\n")
|
||||
var (
|
||||
currentMD string
|
||||
personality string
|
||||
active, total, size int64
|
||||
)
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
// Each md has at least the deviceline, statusline and one empty line afterwards
|
||||
// so we will have probably something of the order len(lines)/3 devices
|
||||
// so we use that for preallocation.
|
||||
estimateMDs := len(lines) / 3
|
||||
mdStates := make([]mdStatus, 0, estimateMDs)
|
||||
|
||||
for i, l := range lines {
|
||||
if l == "" {
|
||||
// Skip entirely empty lines.
|
||||
mdStates := make([]mdStatus, 0, len(lines)/3)
|
||||
for i, line := range lines {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
if line[0] == ' ' || line[0] == '\t' {
|
||||
// Lines starting with white space are not the beginning of a md-section.
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") {
|
||||
// These lines contain general information.
|
||||
continue
|
||||
}
|
||||
|
||||
if l[0] == ' ' || l[0] == '\t' {
|
||||
// Those lines are not the beginning of a md-section.
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") {
|
||||
// We aren't interested in lines with general info.
|
||||
continue
|
||||
}
|
||||
|
||||
mainLine := strings.Split(l, " ")
|
||||
mainLine := strings.Split(line, " ")
|
||||
if len(mainLine) < 4 {
|
||||
return mdStates, fmt.Errorf("error parsing mdline: %s", l)
|
||||
return mdStates, fmt.Errorf("error parsing mdline: %s", line)
|
||||
}
|
||||
currentMD = mainLine[0] // The name of the md-device.
|
||||
isActive := (mainLine[2] == "active") // The activity status of the md-device.
|
||||
personality = ""
|
||||
md := mdStatus{
|
||||
name: mainLine[0],
|
||||
active: mainLine[2] == "active",
|
||||
}
|
||||
|
||||
if len(lines) <= i+3 {
|
||||
return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", md.name)
|
||||
}
|
||||
|
||||
personality := ""
|
||||
for _, possiblePersonality := range mainLine[3:] {
|
||||
if raidPersonalityRE.MatchString(possiblePersonality) {
|
||||
personality = possiblePersonality
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(lines) <= i+3 {
|
||||
return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", currentMD)
|
||||
}
|
||||
|
||||
switch {
|
||||
case personality == "raid0":
|
||||
active = int64(len(mainLine) - 4) // Get the number of devices from the main line.
|
||||
total = active // Raid0 active and total is always the same if active.
|
||||
size, err = evalRaid0line(lines[i+1]) // Parse statusline, always present.
|
||||
md.disksActive = int64(len(mainLine) - 4) // Get the number of devices from the main line.
|
||||
md.disksTotal = md.disksActive // Raid0 active and total is always the same if active.
|
||||
md.blocksTotal, err = evalRaid0line(lines[i+1])
|
||||
case raidPersonalityRE.MatchString(personality):
|
||||
active, total, size, err = evalStatusline(lines[i+1]) // Parse statusline, always present.
|
||||
md.disksActive, md.disksTotal, md.blocksTotal, err = evalStatusline(lines[i+1])
|
||||
default:
|
||||
log.Infof("Personality unknown: %s\n", mainLine)
|
||||
size, err = evalUnknownPersonalitylineRE(lines[i+1]) // Parse statusline, always present.
|
||||
md.blocksTotal, err = evalUnknownPersonalitylineRE(lines[i+1])
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
|
||||
}
|
||||
|
||||
// Now get the number of synced blocks.
|
||||
var syncedBlocks int64
|
||||
|
||||
// Get the line number of the syncing-line.
|
||||
var j int
|
||||
if strings.Contains(lines[i+2], "bitmap") { // then skip the bitmap line
|
||||
j = i + 3
|
||||
} else {
|
||||
j = i + 2
|
||||
syncLine := lines[i+2]
|
||||
if strings.Contains(syncLine, "bitmap") {
|
||||
syncLine = lines[i+3]
|
||||
}
|
||||
|
||||
// If device is syncing at the moment, get the number of currently synced bytes,
|
||||
// otherwise that number equals the size of the device.
|
||||
if strings.Contains(lines[j], "recovery") ||
|
||||
strings.Contains(lines[j], "resync") &&
|
||||
!strings.Contains(lines[j], "\tresync=") {
|
||||
syncedBlocks, err = evalBuildline(lines[j])
|
||||
if strings.Contains(syncLine, "recovery") ||
|
||||
strings.Contains(syncLine, "resync") &&
|
||||
!strings.Contains(syncLine, "\tresync=") {
|
||||
md.blocksSynced, err = evalBuildline(syncLine)
|
||||
if err != nil {
|
||||
return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
|
||||
}
|
||||
} else {
|
||||
syncedBlocks = size
|
||||
md.blocksSynced = md.blocksTotal
|
||||
}
|
||||
|
||||
mdStates = append(mdStates, mdStatus{currentMD, isActive, active, total, size, syncedBlocks})
|
||||
|
||||
mdStates = append(mdStates, md)
|
||||
}
|
||||
|
||||
return mdStates, nil
|
||||
|
@ -277,68 +258,55 @@ var (
|
|||
func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
|
||||
statusfile := procFilePath("mdstat")
|
||||
if _, err := os.Stat(statusfile); err != nil {
|
||||
// Take care we don't crash on non-existent statusfiles.
|
||||
if os.IsNotExist(err) {
|
||||
// no such file or directory, nothing to do, just return
|
||||
log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// First parse mdstat-file...
|
||||
mdstate, err := parseMdstat(statusfile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing mdstatus: %s", err)
|
||||
}
|
||||
|
||||
// ... and then plug the result into the metrics to be exported.
|
||||
var isActiveFloat float64
|
||||
for _, mds := range mdstate {
|
||||
log.Debugf("collecting metrics for device %s", mds.name)
|
||||
|
||||
log.Debugf("collecting metrics for device %s", mds.mdName)
|
||||
|
||||
if mds.isActive {
|
||||
isActiveFloat = 1
|
||||
} else {
|
||||
isActiveFloat = 0
|
||||
var active float64
|
||||
if mds.active {
|
||||
active = 1
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
isActiveDesc,
|
||||
prometheus.GaugeValue,
|
||||
isActiveFloat,
|
||||
mds.mdName,
|
||||
active,
|
||||
mds.name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
disksActiveDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.disksActive),
|
||||
mds.mdName,
|
||||
mds.name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
disksTotalDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.disksTotal),
|
||||
mds.mdName,
|
||||
mds.name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
blocksTotalDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.blocksTotal),
|
||||
mds.mdName,
|
||||
mds.name,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
blocksSyncedDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.blocksSynced),
|
||||
mds.mdName,
|
||||
mds.name,
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -19,7 +19,6 @@ import (
|
|||
|
||||
func TestMdadm(t *testing.T) {
|
||||
mdStates, err := parseMdstat("fixtures/proc/mdstat")
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("parsing of reference-file failed entirely: %s", err)
|
||||
}
|
||||
|
@ -37,13 +36,13 @@ func TestMdadm(t *testing.T) {
|
|||
"md10": {"md10", true, 2, 2, 314159265, 314159265},
|
||||
"md11": {"md11", true, 2, 2, 4190208, 4190208},
|
||||
"md12": {"md12", true, 2, 2, 3886394368, 3886394368},
|
||||
"md219": {"md219", false, 2, 2, 7932, 7932},
|
||||
"md219": {"md219", false, 0, 0, 7932, 7932},
|
||||
"md00": {"md00", true, 1, 1, 4186624, 4186624},
|
||||
}
|
||||
|
||||
for _, md := range mdStates {
|
||||
if md != refs[md.mdName] {
|
||||
t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.mdName, refs[md.mdName], md)
|
||||
if md != refs[md.name] {
|
||||
t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.name, refs[md.name], md)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,7 +53,6 @@ func TestMdadm(t *testing.T) {
|
|||
|
||||
func TestInvalidMdstat(t *testing.T) {
|
||||
_, err := parseMdstat("fixtures/proc/mdstat_invalid")
|
||||
|
||||
if err == nil {
|
||||
t.Fatalf("parsing of invalid reference file did not find any errors")
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue