collector: Unwrap glob textfile directories (#1985)

* collector: Unwrap glob textfile directories
* collector: Store full path in mtime's file label

The point is to avoid duplicated gauges from files with the same name in
different directories.

This introduces support for exporting from multiple directories matching
given pattern (e.g. `/home/*/metrics/`).

Signed-off-by: Kiril Vladimirov <kiril@vladimiroff.org>
pull/2170/head
Kiril Vladimirov 2021-10-18 15:05:21 +03:00 committed by GitHub
parent 5a38949451
commit 1721de0c38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 95 additions and 33 deletions

View File

@ -26,7 +26,7 @@ events_total{foo="bar"} 10
events_total{foo="baz"} 20
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/different_metric_types/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -0,0 +1,49 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/histogram_extra_dimension/metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/summary_extra_dimension/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
# HELP prometheus_rule_evaluation_duration_seconds The duration for a rule to execute.
# TYPE prometheus_rule_evaluation_duration_seconds summary
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="alerting",quantile="0.9"} 0.001765451
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="alerting",quantile="0.99"} 0.018672076
prometheus_rule_evaluation_duration_seconds_sum{handler="",rule_type="alerting"} 214.85081044700146
prometheus_rule_evaluation_duration_seconds_count{handler="",rule_type="alerting"} 185209
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="recording",quantile="0.5"} 4.3132e-05
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="recording",quantile="0.9"} 8.9295e-05
prometheus_rule_evaluation_duration_seconds{handler="",rule_type="recording",quantile="0.99"} 0.000193657
prometheus_rule_evaluation_duration_seconds_sum{handler="",rule_type="recording"} 185091.01317759082
prometheus_rule_evaluation_duration_seconds_count{handler="",rule_type="recording"} 1.0020195e+08
prometheus_rule_evaluation_duration_seconds{handler="foo",rule_type="alerting",quantile="0.5"} 0.000571464
prometheus_rule_evaluation_duration_seconds_sum{handler="foo",rule_type="alerting"} 0
prometheus_rule_evaluation_duration_seconds_count{handler="foo",rule_type="alerting"} 0
# HELP prometheus_tsdb_compaction_chunk_range Final time range of chunks on their first compaction
# TYPE prometheus_tsdb_compaction_chunk_range histogram
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="100"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="1600"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="6400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="25600"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="102400"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="409600"} 1.412839e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="1.6384e+06"} 1.69185e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="6.5536e+06"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="2.62144e+07"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="bar",le="+Inf"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_sum{foo="bar"} 6.71393432189e+11
prometheus_tsdb_compaction_chunk_range_count{foo="bar"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="100"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="1600"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="6400"} 0
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="25600"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="102400"} 7
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="409600"} 1.412839e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="1.6384e+06"} 1.69185e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="6.5536e+06"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="2.62144e+07"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_bucket{foo="baz",le="+Inf"} 1.691853e+06
prometheus_tsdb_compaction_chunk_range_sum{foo="baz"} 6.71393432189e+11
prometheus_tsdb_compaction_chunk_range_count{foo="baz"} 1.691853e+06

View File

@ -1,6 +1,6 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/histogram/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -1,6 +1,6 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/histogram_extra_dimension/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -23,7 +23,7 @@ http_requests_total{baz="",code="503",foo="",handler="query_range",method="get"}
http_requests_total{baz="bar",code="200",foo="",handler="",method="get"} 93
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/inconsistent_metrics/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -22,7 +22,7 @@ event_duration_seconds_total_sum{baz="result_sort"} 3.4123187829998307
event_duration_seconds_total_count{baz="result_sort"} 1.427647e+06
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/summary/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -1,6 +1,6 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/summary_extra_dimension/metrics.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -1,7 +1,7 @@
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="metrics1.prom"} 1
node_textfile_mtime_seconds{file="metrics2.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/two_metric_files/metrics1.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/two_metric_files/metrics2.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View File

@ -172,18 +172,18 @@ func (c *textFileCollector) exportMTimes(mtimes map[string]time.Time, ch chan<-
// Export the mtimes of the successful files.
// Sorting is needed for predictable output comparison in tests.
filenames := make([]string, 0, len(mtimes))
for filename := range mtimes {
filenames = append(filenames, filename)
filepaths := make([]string, 0, len(mtimes))
for path := range mtimes {
filepaths = append(filepaths, path)
}
sort.Strings(filenames)
sort.Strings(filepaths)
for _, filename := range filenames {
mtime := float64(mtimes[filename].UnixNano() / 1e9)
for _, path := range filepaths {
mtime := float64(mtimes[path].UnixNano() / 1e9)
if c.mtime != nil {
mtime = *c.mtime
}
ch <- prometheus.MustNewConstMetric(mtimeDesc, prometheus.GaugeValue, mtime, filename)
ch <- prometheus.MustNewConstMetric(mtimeDesc, prometheus.GaugeValue, mtime, path)
}
}
@ -192,28 +192,37 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
// Iterate over files and accumulate their metrics, but also track any
// parsing errors so an error metric can be reported.
var errored bool
files, err := ioutil.ReadDir(c.path)
if err != nil && c.path != "" {
errored = true
level.Error(c.logger).Log("msg", "failed to read textfile collector directory", "path", c.path, "err", err)
paths, err := filepath.Glob(c.path)
if err != nil || len(paths) == 0 {
// not glob or not accessible path either way assume single
// directory and let ioutil.ReadDir handle it
paths = []string{c.path}
}
mtimes := make(map[string]time.Time, len(files))
for _, f := range files {
if !strings.HasSuffix(f.Name(), ".prom") {
continue
}
mtime, err := c.processFile(f.Name(), ch)
if err != nil {
mtimes := make(map[string]time.Time)
for _, path := range paths {
files, err := ioutil.ReadDir(path)
if err != nil && path != "" {
errored = true
level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err)
continue
level.Error(c.logger).Log("msg", "failed to read textfile collector directory", "path", path, "err", err)
}
mtimes[f.Name()] = *mtime
}
for _, f := range files {
if !strings.HasSuffix(f.Name(), ".prom") {
continue
}
mtime, err := c.processFile(path, f.Name(), ch)
if err != nil {
errored = true
level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err)
continue
}
mtimes[filepath.Join(path, f.Name())] = *mtime
}
}
c.exportMTimes(mtimes, ch)
// Export if there were errors.
@ -235,8 +244,8 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
}
// processFile processes a single file, returning its modification time on success.
func (c *textFileCollector) processFile(name string, ch chan<- prometheus.Metric) (*time.Time, error) {
path := filepath.Join(c.path, name)
func (c *textFileCollector) processFile(dir, name string, ch chan<- prometheus.Metric) (*time.Time, error) {
path := filepath.Join(dir, name)
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open textfile data file %q: %w", path, err)

View File

@ -91,6 +91,10 @@ func TestTextfileCollector(t *testing.T) {
path: "fixtures/textfile/summary_extra_dimension",
out: "fixtures/textfile/summary_extra_dimension.out",
},
{
path: "fixtures/textfile/*_extra_dimension",
out: "fixtures/textfile/glob_extra_dimension.out",
},
}
for i, test := range tests {