Add support for NRestarts counter introduced in systemd 235 (#992)
* Add support for NRestarts counter introduced in systemd 235 `.service` units increment this counter any time the Restart= condition is triggered. Signed-off-by: Matthew McGinn <mamcgi@gmail.com>pull/968/head
parent
ee1e1997bc
commit
8af84a215d
|
@ -3,7 +3,7 @@
|
||||||
**Breaking changes**
|
**Breaking changes**
|
||||||
|
|
||||||
* [CHANGE]
|
* [CHANGE]
|
||||||
* [FEATURE]
|
* [FEATURE] Collect NRestarts property for systemd service units
|
||||||
* [ENHANCEMENT]
|
* [ENHANCEMENT]
|
||||||
* [BUGFIX]
|
* [BUGFIX]
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ type systemdCollector struct {
|
||||||
unitDesc *prometheus.Desc
|
unitDesc *prometheus.Desc
|
||||||
systemRunningDesc *prometheus.Desc
|
systemRunningDesc *prometheus.Desc
|
||||||
summaryDesc *prometheus.Desc
|
summaryDesc *prometheus.Desc
|
||||||
|
nRestartsDesc *prometheus.Desc
|
||||||
timerLastTriggerDesc *prometheus.Desc
|
timerLastTriggerDesc *prometheus.Desc
|
||||||
unitWhitelistPattern *regexp.Regexp
|
unitWhitelistPattern *regexp.Regexp
|
||||||
unitBlacklistPattern *regexp.Regexp
|
unitBlacklistPattern *regexp.Regexp
|
||||||
|
@ -63,6 +64,9 @@ func NewSystemdCollector() (Collector, error) {
|
||||||
summaryDesc := prometheus.NewDesc(
|
summaryDesc := prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(namespace, subsystem, "units"),
|
prometheus.BuildFQName(namespace, subsystem, "units"),
|
||||||
"Summary of systemd unit states", []string{"state"}, nil)
|
"Summary of systemd unit states", []string{"state"}, nil)
|
||||||
|
nRestartsDesc := prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(namespace, subsystem, "service_restart_total"),
|
||||||
|
"Service unit count of Restart triggers", []string{"state"}, nil)
|
||||||
timerLastTriggerDesc := prometheus.NewDesc(
|
timerLastTriggerDesc := prometheus.NewDesc(
|
||||||
prometheus.BuildFQName(namespace, subsystem, "timer_last_trigger_seconds"),
|
prometheus.BuildFQName(namespace, subsystem, "timer_last_trigger_seconds"),
|
||||||
"Seconds since epoch of last trigger.", []string{"name"}, nil)
|
"Seconds since epoch of last trigger.", []string{"name"}, nil)
|
||||||
|
@ -73,6 +77,7 @@ func NewSystemdCollector() (Collector, error) {
|
||||||
unitDesc: unitDesc,
|
unitDesc: unitDesc,
|
||||||
systemRunningDesc: systemRunningDesc,
|
systemRunningDesc: systemRunningDesc,
|
||||||
summaryDesc: summaryDesc,
|
summaryDesc: summaryDesc,
|
||||||
|
nRestartsDesc: nRestartsDesc,
|
||||||
timerLastTriggerDesc: timerLastTriggerDesc,
|
timerLastTriggerDesc: timerLastTriggerDesc,
|
||||||
unitWhitelistPattern: unitWhitelistPattern,
|
unitWhitelistPattern: unitWhitelistPattern,
|
||||||
unitBlacklistPattern: unitBlacklistPattern,
|
unitBlacklistPattern: unitBlacklistPattern,
|
||||||
|
@ -112,6 +117,11 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric,
|
||||||
c.unitDesc, prometheus.GaugeValue, isActive,
|
c.unitDesc, prometheus.GaugeValue, isActive,
|
||||||
unit.Name, stateName)
|
unit.Name, stateName)
|
||||||
}
|
}
|
||||||
|
if strings.HasSuffix(unit.Name, ".service") {
|
||||||
|
ch <- prometheus.MustNewConstMetric(
|
||||||
|
c.nRestartsDesc, prometheus.CounterValue,
|
||||||
|
float64(unit.nRestarts), unit.Name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -153,6 +163,7 @@ func (c *systemdCollector) newDbus() (*dbus.Conn, error) {
|
||||||
type unit struct {
|
type unit struct {
|
||||||
dbus.UnitStatus
|
dbus.UnitStatus
|
||||||
lastTriggerUsec uint64
|
lastTriggerUsec uint64
|
||||||
|
nRestarts uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
||||||
|
@ -181,6 +192,14 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
|
||||||
|
|
||||||
unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64)
|
unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64)
|
||||||
}
|
}
|
||||||
|
if strings.HasSuffix(unit.Name, ".service") {
|
||||||
|
nRestarts, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts")
|
||||||
|
if err != nil {
|
||||||
|
log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
unit.nRestarts = nRestarts.Value.Value().(uint32)
|
||||||
|
}
|
||||||
|
|
||||||
result = append(result, unit)
|
result = append(result, unit)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue