From 5d68d5b9ad4d4d953e08b8fdde1e4f8549058f8f Mon Sep 17 00:00:00 2001 From: david Date: Wed, 15 Dec 2021 14:35:19 +0100 Subject: [PATCH] move logic to procfs; create a new metric for isolation Signed-off-by: david --- collector/cpu_linux.go | 123 ++++++++++++++++-------------------- collector/cpu_linux_test.go | 28 -------- 2 files changed, 54 insertions(+), 97 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index eec6c4eb..63c3768f 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -18,17 +18,16 @@ package collector import ( "fmt" - "io/ioutil" "path/filepath" "regexp" "strconv" - "strings" "sync" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" + "github.com/prometheus/procfs/sysfs" "gopkg.in/alecthomas/kingpin.v2" ) @@ -41,6 +40,7 @@ type cpuCollector struct { cpuGuest *prometheus.Desc cpuCoreThrottle *prometheus.Desc cpuPackageThrottle *prometheus.Desc + cpuIsolated *prometheus.Desc logger log.Logger cpuStats []procfs.CPUStat cpuStatsMutex sync.Mutex @@ -65,50 +65,6 @@ func init() { registerCollector("cpu", defaultEnabled, NewCPUCollector) } -func parseIsolCpus(data []byte) ([]uint16, error) { - isolcpus_str := strings.TrimRight(string(data), "\n") - - var isolcpus_int = []uint16{} - - for _, cpu := range strings.Split(isolcpus_str, ",") { - if cpu == "" { - continue - } - if strings.Contains(cpu, "-") { - ranges := strings.Split(cpu, "-") - startRange, err := strconv.Atoi(ranges[0]) - if err != nil { - return nil, err - } - endRange, err := strconv.Atoi(ranges[1]) - if err != nil { - return nil, err - } - - for i := startRange; i <= endRange; i++ { - isolcpus_int = append(isolcpus_int, uint16(i)) - } - continue - } - - _cpu, err := strconv.Atoi(cpu) - if err != nil { - return nil, err - } - isolcpus_int = append(isolcpus_int, uint16(_cpu)) - } - return isolcpus_int, nil -} - -func readIsolCpus() ([]uint16, error) { - isolcpus, err := ioutil.ReadFile(sysFilePath("devices/system/cpu/isolated")) - if err != nil { - return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) - } - - return parseIsolCpus(isolcpus) -} - // NewCPUCollector returns a new Collector exposing kernel/system statistics. func NewCPUCollector(logger log.Logger) (Collector, error) { fs, err := procfs.NewFS(*procPath) @@ -116,18 +72,19 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { return nil, fmt.Errorf("failed to open procfs: %w", err) } - isolcpus, err := readIsolCpus() + sysfs, err := sysfs.NewFS(*sysPath) if err != nil { - return nil, fmt.Errorf("failed to open procfs: %w", err) + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + isolcpus, err := sysfs.IsolatedCPUs() + if err != nil { + return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) } c := &cpuCollector{ - fs: fs, - cpu: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"), - "Seconds the CPUs spent in each mode.", - []string{"cpu", "mode", "isolated"}, nil, - ), + fs: fs, + cpu: nodeCPUSecondsDesc, cpuInfo: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), "CPU information from /proc/cpuinfo.", @@ -146,7 +103,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { cpuGuest: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), "Seconds the CPUs spent in guests (VMs) for each mode.", - []string{"cpu", "mode", "isolated"}, nil, + []string{"cpu", "mode"}, nil, ), cpuCoreThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), @@ -158,6 +115,11 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { "Number of times this CPU package has been throttled.", []string{"package"}, nil, ), + cpuIsolated: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "isolated"), + "Whether each core is isolated, information from /sys/devices/system/cpu/isolated.", + []string{"cpu"}, nil, + ), logger: logger, isolatedCpus: isolcpus, } @@ -200,6 +162,9 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { if err := c.updateStat(ch); err != nil { return err } + if err := c.updateIsolated(ch); err != nil { + return err + } return c.updateThermalThrottle(ch) } @@ -343,6 +308,30 @@ func contains(s []uint16, e uint16) bool { return false } +// updateStat reads /proc/stat through procfs and exports CPU-related metrics. +func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) error { + stats, err := c.fs.Stat() + if err != nil { + return err + } + + c.updateCPUStats(stats.CPU) + + // Acquire a lock to read the stats. + c.cpuStatsMutex.Lock() + defer c.cpuStatsMutex.Unlock() + for cpuID, _ := range c.cpuStats { + cpuNum := strconv.Itoa(cpuID) + isIsolated := 0.0 + if contains(c.isolatedCpus, uint16(cpuID)) { + isIsolated = 1.0 + } + ch <- prometheus.MustNewConstMetric(c.cpuIsolated, prometheus.GaugeValue, isIsolated, cpuNum) + } + + return nil +} + // updateStat reads /proc/stat through procfs and exports CPU-related metrics. func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { stats, err := c.fs.Stat() @@ -357,23 +346,19 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { defer c.cpuStatsMutex.Unlock() for cpuID, cpuStat := range c.cpuStats { cpuNum := strconv.Itoa(cpuID) - isIsolated := "0" - if contains(c.isolatedCpus, uint16(cpuID)) { - isIsolated = "1" - } - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal") if *enableCPUGuest { // Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics. - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user") + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice") } } diff --git a/collector/cpu_linux_test.go b/collector/cpu_linux_test.go index 7bcd5e49..93b493b2 100644 --- a/collector/cpu_linux_test.go +++ b/collector/cpu_linux_test.go @@ -104,31 +104,3 @@ func TestCPU(t *testing.T) { t.Fatalf("should have %v CPU Stat: got %v", resetIdle, got) } } -func TestIsolatedParsingCPU(t *testing.T) { - var testParams = []struct { - in []byte - res []uint16 - err error - }{ - {[]byte(""), []uint16{}, nil}, - {[]byte("1\n"), []uint16{1}, nil}, - {[]byte("1"), []uint16{1}, nil}, - {[]byte("1,2"), []uint16{1, 2}, nil}, - {[]byte("1-2"), []uint16{1, 2}, nil}, - {[]byte("1-3"), []uint16{1, 2, 3}, nil}, - {[]byte("1,2-4"), []uint16{1, 2, 3, 4}, nil}, - {[]byte("1,3-4"), []uint16{1, 3, 4}, nil}, - {[]byte("1,3-4,7,20-21"), []uint16{1, 3, 4, 7, 20, 21}, nil}, - } - for _, params := range testParams { - t.Run("blabla", func(t *testing.T) { - res, err := parseIsolCpus(params.in) - if !reflect.DeepEqual(res, params.res) { - t.Fatalf("should have %v result: got %v", params.res, res) - } - if err != params.err { - t.Fatalf("should have %v error: got %v", params.err, err) - } - }) - } -}