api: provide per metric metadata (#6420)

* api: provide per metric metadata This adds a new endpoint that provides per metric metadata via the V1 API. It collapses metadata that is equal across all targets, and aggregates under the same metric name the ones that differ. * Allow tests to be asserted on response length Some tests e.g. limit on API responses, don't require an assertion on equality. This allows us to assert against response length instead of equality. Signed-off-by: gotjosh <josue@grafana.com>
5 years ago · 0a0a228db3
parent 466cc36ba0
commit 0a0a228db3
3 changed files with 346 additions and 41 deletions
--- a/docs/querying/api.md
+++ b/docs/querying/api.md
@ -569,7 +569,7 @@ $ curl http://localhost:9090/api/v1/alerts

 ## Querying target metadata

-The following endpoint returns metadata about metrics currently scraped by targets.
+The following endpoint returns metadata about metrics currently scraped from targets.
 This is **experimental** and might change in the future.

 ```
@ -653,6 +653,52 @@ curl -G http://localhost:9091/api/v1/targets/metadata \
 }
 ```

+## Querying metric metadata
+
+It returns metadata about metrics currently scrapped from targets. However, it does not provide any target information.
+This is considered **experimental** and might change in the future.
+
+```
+GET /api/v1/metadata
+```
+
+URL query parameters:
+
+- `limit=<number>`: Maximum number of metrics to return.
+
+The `data` section of the query result consists of an object where each key is a metric name and each value is a list of unique metadata objects, as exposed for that metric name across all targets.
+
+The following example returns two metrics. Note that the metric `http_requests_total` has more than one object in the list. At least one target has a value for `HELP` that do not match with the rest.
+
+```json
+curl -G http://localhost:9090/api/v1/metadata?limit=2
+
+{
+  "status": "success",
+  "data": {
+    "cortex_ring_tokens": [
+      {
+        "type": "gauge",
+        "help": "Number of tokens in the ring",
+        "unit": ""
+      }
+    ],
+    "http_requests_total": [
+      {
+        "type": "counter",
+        "help": "Number of HTTP requests",
+        "unit": ""
+      },
+      {
+        "type": "counter",
+        "help": "Amount of HTTP requests",
+        "unit": ""
+      }
+    ]
+  }
+}
+```
+
 ## Alertmanagers

 The following endpoint returns an overview of the current state of the
--- a/web/api/v1/api.go
+++ b/web/api/v1/api.go
@ -276,6 +276,8 @@ func (api *API) Register(r *route.Router) {
 	r.Get("/targets/metadata", wrap(api.targetMetadata))
 	r.Get("/alertmanagers", wrap(api.alertmanagers))

+	r.Get("/metadata", wrap(api.metricMetadata))
+
 	r.Get("/status/config", wrap(api.serveConfig))
 	r.Get("/status/runtimeinfo", wrap(api.serveRuntimeInfo))
 	r.Get("/status/buildinfo", wrap(api.serveBuildInfo))
@ -688,7 +690,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult {

 	metric := r.FormValue("metric")

-	var res []metricMetadata
+	res := []metricMetadata{}
 	for _, tt := range api.targetRetriever.TargetsActive() {
 		for _, t := range tt {
 			if limit >= 0 && len(res) >= limit {
@ -722,9 +724,7 @@ func (api *API) targetMetadata(r *http.Request) apiFuncResult {
 			}
 		}
 	}
-	if len(res) == 0 {
-		return apiFuncResult{nil, &apiError{errorNotFound, errors.New("specified metadata not found")}, nil, nil}
-	}
+
 	return apiFuncResult{res, nil, nil, nil}
 }

@ -805,6 +805,58 @@ func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert {
 	return apiAlerts
 }

+type metadata struct {
+	Type textparse.MetricType `json:"type"`
+	Help string               `json:"help"`
+	Unit string               `json:"unit"`
+}
+
+func (api *API) metricMetadata(r *http.Request) apiFuncResult {
+	metrics := map[string]map[metadata]struct{}{}
+
+	limit := -1
+	if s := r.FormValue("limit"); s != "" {
+		var err error
+		if limit, err = strconv.Atoi(s); err != nil {
+			return apiFuncResult{nil, &apiError{errorBadData, errors.New("limit must be a number")}, nil, nil}
+		}
+	}
+
+	for _, tt := range api.targetRetriever.TargetsActive() {
+		for _, t := range tt {
+			for _, mm := range t.MetadataList() {
+				m := metadata{Type: mm.Type, Help: mm.Help, Unit: mm.Unit}
+				ms, ok := metrics[mm.Metric]
+
+				if !ok {
+					ms = map[metadata]struct{}{}
+					metrics[mm.Metric] = ms
+				}
+
+				ms[m] = struct{}{}
+			}
+		}
+	}
+
+	res := map[string][]metadata{}
+
+	for name, set := range metrics {
+		if limit >= 0 && len(res) >= limit {
+			break
+		}
+
+		s := []metadata{}
+
+		for metadata := range set {
+			s = append(s, metadata)
+		}
+
+		res[name] = s
+	}
+
+	return apiFuncResult{res, nil, nil, nil}
+}
+
 // RuleDiscovery has info for all rules
 type RuleDiscovery struct {
 	RuleGroups []*RuleGroup `json:"groups"`
--- a/web/api/v1/api_test.go
+++ b/web/api/v1/api_test.go
@ -137,7 +137,7 @@ func (t testTargetRetriever) TargetsDropped() map[string][]*scrape.Target {
 	return t.droppedTargets
 }

-func (t testTargetRetriever) setMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error {
+func (t *testTargetRetriever) SetMetadataStoreForTargets(identifier string, metadata scrape.MetricMetadataStore) error {
 	targets, ok := t.activeTargets[identifier]

 	if !ok {
@ -151,6 +151,14 @@ func (t testTargetRetriever) setMetadataStoreForTargets(identifier string, metad
 	return nil
 }

+func (t *testTargetRetriever) ResetMetadataStore() {
+	for _, at := range t.activeTargets {
+		for _, tt := range at {
+			tt.SetMetadataStore(&testMetaStore{})
+		}
+	}
+}
+
 type testAlertmanagerRetriever struct{}

 func (t testAlertmanagerRetriever) Alertmanagers() []*url.URL {
@ -302,7 +310,7 @@ func TestEndpoints(t *testing.T) {
 			rulesRetriever:        algr,
 		}

-		testEndpoints(t, api, true)
+		testEndpoints(t, api, testTargetRetriever, true)
 	})

 	// Run all the API tests against a API that is wired to forward queries via
@ -366,7 +374,7 @@ func TestEndpoints(t *testing.T) {
 			rulesRetriever:        algr,
 		}

-		testEndpoints(t, api, false)
+		testEndpoints(t, api, testTargetRetriever, false)
 	})

 }
@ -449,31 +457,8 @@ func setupTestTargetRetriever(t *testing.T) *testTargetRetriever {
 			Active: false,
 		},
 	}
-	targetRetriever := newTestTargetRetriever(targets)
-
-	targetRetriever.setMetadataStoreForTargets("test", &testMetaStore{
-		Metadata: []scrape.MetricMetadata{
-			{
-				Metric: "go_threads",
-				Type:   textparse.MetricTypeGauge,
-				Help:   "Number of OS threads created.",
-				Unit:   "",
-			},
-		},
-	})
-
-	targetRetriever.setMetadataStoreForTargets("blackbox", &testMetaStore{
-		Metadata: []scrape.MetricMetadata{
-			{
-				Metric: "prometheus_tsdb_storage_blocks_bytes",
-				Type:   textparse.MetricTypeGauge,
-				Help:   "The number of bytes that are currently used for local storage by all blocks.",
-				Unit:   "",
-			},
-		},
-	})

-	return targetRetriever
+	return newTestTargetRetriever(targets)
 }

 func setupRemote(s storage.Storage) *httptest.Server {
@ -531,16 +516,23 @@ func setupRemote(s storage.Storage) *httptest.Server {
 	return httptest.NewServer(handler)
 }

-func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
+func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, testLabelAPI bool) {
 	start := time.Unix(0, 0)

+	type targetMetadata struct {
+		identifier string
+		metadata   []scrape.MetricMetadata
+	}
+
 	type test struct {
 		endpoint    apiFunc
 		params      map[string]string
 		query       url.Values
 		response    interface{}
+		responseLen int
 		errType     errorType
 		sorter      func(interface{})
+		metadata    []targetMetadata
 	}

 	var tests = []test{
@ -959,6 +951,19 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 			query: url.Values{
 				"metric": []string{"go_threads"},
 			},
+			metadata: []targetMetadata{
+				{
+					identifier: "test",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads created.",
+							Unit:   "",
+						},
+					},
+				},
+			},
 			response: []metricMetadata{
 				{
 					Target: labels.FromMap(map[string]string{
@ -976,6 +981,19 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 			query: url.Values{
 				"match_target": []string{"{job=\"blackbox\"}"},
 			},
+			metadata: []targetMetadata{
+				{
+					identifier: "blackbox",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "prometheus_tsdb_storage_blocks_bytes",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "The number of bytes that are currently used for local storage by all blocks.",
+							Unit:   "",
+						},
+					},
+				},
+			},
 			response: []metricMetadata{
 				{
 					Target: labels.FromMap(map[string]string{
@ -991,6 +1009,30 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 		// Without a target or metric.
 		{
 			endpoint: api.targetMetadata,
+			metadata: []targetMetadata{
+				{
+					identifier: "test",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads created.",
+							Unit:   "",
+						},
+					},
+				},
+				{
+					identifier: "blackbox",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "prometheus_tsdb_storage_blocks_bytes",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "The number of bytes that are currently used for local storage by all blocks.",
+							Unit:   "",
+						},
+					},
+				},
+			},
 			response: []metricMetadata{
 				{
 					Target: labels.FromMap(map[string]string{
@ -1024,7 +1066,7 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 			query: url.Values{
 				"match_target": []string{"{job=\"non-existentblackbox\"}"},
 			},
-			errType: errorNotFound,
+			response: []metricMetadata{},
 		},
 		{
 			endpoint: api.alertmanagers,
@ -1041,6 +1083,148 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 				},
 			},
 		},
+		// With metadata available.
+		{
+			endpoint: api.metricMetadata,
+			metadata: []targetMetadata{
+				{
+					identifier: "test",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "prometheus_engine_query_duration_seconds",
+							Type:   textparse.MetricTypeSummary,
+							Help:   "Query timings",
+							Unit:   "",
+						},
+						{
+							Metric: "go_info",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Information about the Go environment.",
+							Unit:   "",
+						},
+					},
+				},
+			},
+			response: map[string][]metadata{
+				"prometheus_engine_query_duration_seconds": {{textparse.MetricTypeSummary, "Query timings", ""}},
+				"go_info": {{textparse.MetricTypeGauge, "Information about the Go environment.", ""}},
+			},
+		},
+		// With duplicate metadata for a metric that comes from different targets.
+		{
+			endpoint: api.metricMetadata,
+			metadata: []targetMetadata{
+				{
+					identifier: "test",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads created",
+							Unit:   "",
+						},
+					},
+				},
+				{
+					identifier: "blackbox",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads created",
+							Unit:   "",
+						},
+					},
+				},
+			},
+			response: map[string][]metadata{
+				"go_threads": {{textparse.MetricTypeGauge, "Number of OS threads created", ""}},
+			},
+		},
+		// With non-duplicate metadata for the same metric from different targets.
+		{
+			endpoint: api.metricMetadata,
+			metadata: []targetMetadata{
+				{
+					identifier: "test",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads created",
+							Unit:   "",
+						},
+					},
+				},
+				{
+					identifier: "blackbox",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads that were created.",
+							Unit:   "",
+						},
+					},
+				},
+			},
+			response: map[string][]metadata{
+				"go_threads": []metadata{
+					{textparse.MetricTypeGauge, "Number of OS threads created", ""},
+					{textparse.MetricTypeGauge, "Number of OS threads that were created.", ""},
+				},
+			},
+			sorter: func(m interface{}) {
+				v := m.(map[string][]metadata)["go_threads"]
+
+				sort.Slice(v, func(i, j int) bool {
+					return v[i].Help < v[j].Help
+				})
+			},
+		},
+		// With a limit for the number of metrics returned
+		{
+			endpoint: api.metricMetadata,
+			query: url.Values{
+				"limit": []string{"2"},
+			},
+			metadata: []targetMetadata{
+				{
+					identifier: "test",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_threads",
+							Type:   textparse.MetricTypeGauge,
+							Help:   "Number of OS threads created",
+							Unit:   "",
+						},
+						{
+							Metric: "prometheus_engine_query_duration_seconds",
+							Type:   textparse.MetricTypeSummary,
+							Help:   "Query Timmings.",
+							Unit:   "",
+						},
+					},
+				},
+				{
+					identifier: "blackbox",
+					metadata: []scrape.MetricMetadata{
+						{
+							Metric: "go_gc_duration_seconds",
+							Type:   textparse.MetricTypeSummary,
+							Help:   "A summary of the GC invocation durations.",
+							Unit:   "",
+						},
+					},
+				},
+			},
+			responseLen: 2,
+		},
+		// With no available metadata
+		{
+			endpoint: api.metricMetadata,
+			response: map[string][]metadata{},
+		},
 		{
 			endpoint: api.serveConfig,
 			response: &prometheusConfig{
@ -1233,6 +1417,12 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 			if err != nil {
 				t.Fatal(err)
 			}
+
+			tr.ResetMetadataStore()
+			for _, tm := range test.metadata {
+				tr.SetMetadataStoreForTargets(tm.identifier, &testMetaStore{Metadata: tm.metadata})
+			}
+
 			res := test.endpoint(req.WithContext(ctx))
 			assertAPIError(t, res.err, test.errType)

@ -1240,10 +1430,14 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
 				test.sorter(res.data)
 			}

+			if test.responseLen != 0 {
+				assertAPIResponseLength(t, res.data, test.responseLen)
+			} else {
 				assertAPIResponse(t, res.data, test.response)
 			}
 		}
 	}
+}

 func assertAPIError(t *testing.T, got *apiError, exp errorType) {
 	t.Helper()
@ -1284,6 +1478,19 @@ func assertAPIResponse(t *testing.T, got interface{}, exp interface{}) {
 	}
 }

+func assertAPIResponseLength(t *testing.T, got interface{}, expLen int) {
+	t.Helper()
+
+	gotLen := reflect.ValueOf(got).Len()
+	if gotLen != expLen {
+		t.Fatalf(
+			"Response length does not match, expected:\n%d\ngot:\n%d",
+			expLen,
+			gotLen,
+		)
+	}
+}
+
 func TestSampledReadEndpoint(t *testing.T) {
 	suite, err := promql.NewTest(t, `
 		load 1m