Add Linux NUMA "numastat" metrics (#249)
* Add Linux NUMA "numastat" metrics Read the `numastat` metrics from /sys/devices/system/node/node* when reading NUMA meminfo metrics. * Update end-to-end test output. * Add `numastat` metrics as counters. * Add tests for error conditions. * Refactor meminfo numa metrics struct * Refactor meminfoKey into a simple struct of metric data. This makes it easier to pass slices of metrics around. * Refactor tests. * Fixup: Add suggested fixes. * Fixup: More fixes * Add another scanner.Err() return * Add "_total" to counter metrics.pull/330/head
parent
081ecc5db0
commit
c6162312f2
|
@ -1011,6 +1011,30 @@ node_memory_numa_Writeback{node="1"} 0
|
|||
# TYPE node_memory_numa_WritebackTmp gauge
|
||||
node_memory_numa_WritebackTmp{node="0"} 0
|
||||
node_memory_numa_WritebackTmp{node="1"} 0
|
||||
# HELP node_memory_numa_interleave_hit_total Memory information field interleave_hit_total.
|
||||
# TYPE node_memory_numa_interleave_hit_total counter
|
||||
node_memory_numa_interleave_hit_total{node="0"} 57146
|
||||
node_memory_numa_interleave_hit_total{node="1"} 57286
|
||||
# HELP node_memory_numa_local_node_total Memory information field local_node_total.
|
||||
# TYPE node_memory_numa_local_node_total counter
|
||||
node_memory_numa_local_node_total{node="0"} 1.93454780853e+11
|
||||
node_memory_numa_local_node_total{node="1"} 3.2671904655e+11
|
||||
# HELP node_memory_numa_numa_foreign_total Memory information field numa_foreign_total.
|
||||
# TYPE node_memory_numa_numa_foreign_total counter
|
||||
node_memory_numa_numa_foreign_total{node="0"} 5.98586233e+10
|
||||
node_memory_numa_numa_foreign_total{node="1"} 1.2624528e+07
|
||||
# HELP node_memory_numa_numa_hit_total Memory information field numa_hit_total.
|
||||
# TYPE node_memory_numa_numa_hit_total counter
|
||||
node_memory_numa_numa_hit_total{node="0"} 1.93460335812e+11
|
||||
node_memory_numa_numa_hit_total{node="1"} 3.26720946761e+11
|
||||
# HELP node_memory_numa_numa_miss_total Memory information field numa_miss_total.
|
||||
# TYPE node_memory_numa_numa_miss_total counter
|
||||
node_memory_numa_numa_miss_total{node="0"} 1.2624528e+07
|
||||
node_memory_numa_numa_miss_total{node="1"} 5.9858626709e+10
|
||||
# HELP node_memory_numa_other_node_total Memory information field other_node_total.
|
||||
# TYPE node_memory_numa_other_node_total counter
|
||||
node_memory_numa_other_node_total{node="0"} 1.8179487e+07
|
||||
node_memory_numa_other_node_total{node="1"} 5.986052692e+10
|
||||
# HELP node_net_bonding_slaves Number of configured slaves per bonding interface.
|
||||
# TYPE node_net_bonding_slaves gauge
|
||||
node_net_bonding_slaves{master="bond0"} 0
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
numa_hit 193460335812
|
||||
numa_miss 12624528
|
||||
numa_foreign 59858623300
|
||||
interleave_hit 57146
|
||||
local_node 193454780853
|
||||
other_node 18179487
|
|
@ -0,0 +1,6 @@
|
|||
numa_hit 326720946761
|
||||
numa_miss 59858626709
|
||||
numa_foreign 12624528
|
||||
interleave_hit 57286
|
||||
local_node 326719046550
|
||||
other_node 59860526920
|
|
@ -33,8 +33,13 @@ const (
|
|||
memInfoNumaSubsystem = "memory_numa"
|
||||
)
|
||||
|
||||
type meminfoKey struct {
|
||||
metricName, numaNode string
|
||||
var meminfoNodeRE = regexp.MustCompile(`.*devices/system/node/node([0-9]*)`)
|
||||
|
||||
type meminfoMetric struct {
|
||||
metricName string
|
||||
metricType prometheus.ValueType
|
||||
numaNode string
|
||||
value float64
|
||||
}
|
||||
|
||||
type meminfoNumaCollector struct {
|
||||
|
@ -54,53 +59,70 @@ func NewMeminfoNumaCollector() (Collector, error) {
|
|||
}
|
||||
|
||||
func (c *meminfoNumaCollector) Update(ch chan<- prometheus.Metric) (err error) {
|
||||
memInfoNuma, err := getMemInfoNuma()
|
||||
metrics, err := getMemInfoNuma()
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't get NUMA meminfo: %s", err)
|
||||
}
|
||||
for k, v := range memInfoNuma {
|
||||
desc, ok := c.metricDescs[k.metricName]
|
||||
for _, v := range metrics {
|
||||
desc, ok := c.metricDescs[v.metricName]
|
||||
if !ok {
|
||||
desc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, k.metricName),
|
||||
fmt.Sprintf("Memory information field %s.", k.metricName),
|
||||
prometheus.BuildFQName(Namespace, memInfoNumaSubsystem, v.metricName),
|
||||
fmt.Sprintf("Memory information field %s.", v.metricName),
|
||||
[]string{"node"}, nil)
|
||||
c.metricDescs[k.metricName] = desc
|
||||
c.metricDescs[v.metricName] = desc
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, v, k.numaNode)
|
||||
ch <- prometheus.MustNewConstMetric(desc, v.metricType, v.value, v.numaNode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemInfoNuma() (map[meminfoKey]float64, error) {
|
||||
info := make(map[meminfoKey]float64)
|
||||
func getMemInfoNuma() ([]meminfoMetric, error) {
|
||||
var (
|
||||
metrics []meminfoMetric
|
||||
)
|
||||
|
||||
nodes, err := filepath.Glob(sysFilePath("devices/system/node/node[0-9]*"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, node := range nodes {
|
||||
file, err := os.Open(path.Join(node, "meminfo"))
|
||||
meminfoFile, err := os.Open(path.Join(node, "meminfo"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
defer meminfoFile.Close()
|
||||
|
||||
numaInfo, err := parseMemInfoNuma(file)
|
||||
numaInfo, err := parseMemInfoNuma(meminfoFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range numaInfo {
|
||||
info[k] = v
|
||||
metrics = append(metrics, numaInfo...)
|
||||
|
||||
numastatFile, err := os.Open(path.Join(node, "numastat"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer numastatFile.Close()
|
||||
|
||||
nodeNumber := meminfoNodeRE.FindStringSubmatch(node)
|
||||
if nodeNumber == nil {
|
||||
return nil, fmt.Errorf("device node string didn't match regexp: %s", node)
|
||||
}
|
||||
|
||||
numaStat, err := parseMemInfoNumaStat(numastatFile, nodeNumber[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
metrics = append(metrics, numaStat...)
|
||||
}
|
||||
|
||||
return info, nil
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) {
|
||||
func parseMemInfoNuma(r io.Reader) ([]meminfoMetric, error) {
|
||||
var (
|
||||
memInfo = map[meminfoKey]float64{}
|
||||
memInfo []meminfoMetric
|
||||
scanner = bufio.NewScanner(r)
|
||||
re = regexp.MustCompile("\\((.*)\\)")
|
||||
)
|
||||
|
@ -127,8 +149,34 @@ func parseMemInfoNuma(r io.Reader) (map[meminfoKey]float64, error) {
|
|||
|
||||
// Active(anon) -> Active_anon
|
||||
metric = re.ReplaceAllString(metric, "_${1}")
|
||||
memInfo[meminfoKey{metric, parts[1]}] = fv
|
||||
memInfo = append(memInfo, meminfoMetric{metric, prometheus.GaugeValue, parts[1], fv})
|
||||
}
|
||||
|
||||
return memInfo, nil
|
||||
return memInfo, scanner.Err()
|
||||
}
|
||||
|
||||
func parseMemInfoNumaStat(r io.Reader, nodeNumber string) ([]meminfoMetric, error) {
|
||||
var (
|
||||
numaStat []meminfoMetric
|
||||
scanner = bufio.NewScanner(r)
|
||||
)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
parts := strings.Fields(string(line))
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("line scan did not return 2 fields: %s", line)
|
||||
}
|
||||
|
||||
fv, err := strconv.ParseFloat(parts[1], 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid value in numastat: %s", err)
|
||||
}
|
||||
|
||||
numaStat = append(numaStat, meminfoMetric{parts[0] + "_total", prometheus.CounterValue, nodeNumber, fv})
|
||||
}
|
||||
return numaStat, scanner.Err()
|
||||
}
|
||||
|
|
|
@ -30,11 +30,15 @@ func TestMemInfoNuma(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if want, got := 707915776.0, memInfo[meminfoKey{"Active_anon", "0"}]; want != got {
|
||||
t.Errorf("want memory Active(anon) %f, got %f", want, got)
|
||||
if want, got := 707915776.0, memInfo[5].value; want != got {
|
||||
t.Errorf("want memory Active(anon) value %f, got %f", want, got)
|
||||
}
|
||||
|
||||
if want, got := 150994944.0, memInfo[meminfoKey{"AnonHugePages", "0"}]; want != got {
|
||||
if want, got := "Active_anon", memInfo[5].metricName; want != got {
|
||||
t.Errorf("want metric Active(anon) metricName %s, got %s", want, got)
|
||||
}
|
||||
|
||||
if want, got := 150994944.0, memInfo[25].value; want != got {
|
||||
t.Errorf("want memory AnonHugePages %f, got %f", want, got)
|
||||
}
|
||||
|
||||
|
@ -49,11 +53,55 @@ func TestMemInfoNuma(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if want, got := 291930112.0, memInfo[meminfoKey{"Inactive_anon", "1"}]; want != got {
|
||||
if want, got := 291930112.0, memInfo[6].value; want != got {
|
||||
t.Errorf("want memory Inactive(anon) %f, got %f", want, got)
|
||||
}
|
||||
|
||||
if want, got := 85585088512.0, memInfo[meminfoKey{"FilePages", "1"}]; want != got {
|
||||
if want, got := 85585088512.0, memInfo[13].value; want != got {
|
||||
t.Errorf("want memory FilePages %f, got %f", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemInfoNumaStat(t *testing.T) {
|
||||
file, err := os.Open("fixtures/sys/devices/system/node/node0/numastat")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
numaStat, err := parseMemInfoNumaStat(file, "0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if want, got := 193460335812.0, numaStat[0].value; want != got {
|
||||
t.Errorf("want numa stat numa_hit value %f, got %f", want, got)
|
||||
}
|
||||
|
||||
if want, got := "numa_hit_total", numaStat[0].metricName; want != got {
|
||||
t.Errorf("want numa stat numa_hit metricName %s, got %s", want, got)
|
||||
}
|
||||
|
||||
if want, got := 193454780853.0, numaStat[4].value; want != got {
|
||||
t.Errorf("want numa stat local_node %f, got %f", want, got)
|
||||
}
|
||||
|
||||
file, err = os.Open("fixtures/sys/devices/system/node/node1/numastat")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
numaStat, err = parseMemInfoNumaStat(file, "1")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if want, got := 59858626709.0, numaStat[1].value; want != got {
|
||||
t.Errorf("want numa stat numa_miss %f, got %f", want, got)
|
||||
}
|
||||
|
||||
if want, got := 59860526920.0, numaStat[5].value; want != got {
|
||||
t.Errorf("want numa stat other_node %f, got %f", want, got)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue