api/v1: Coninue work exposing rules and alerts

Signed-off-by: Max Leonard Inden <IndenML@gmail.com>
pull/4318/head
Max Leonard Inden 2018-06-27 15:15:17 +08:00
parent 31f8ca0dfb
commit 71fafad099
No known key found for this signature in database
GPG Key ID: 5403C5464810BC26
6 changed files with 312 additions and 161 deletions

View File

@ -363,6 +363,103 @@ $ curl http://localhost:9090/api/v1/targets
} }
``` ```
## Rules
The `/rules` API endpoint returns a list of alerting and recording rules that
are currently loaded. In addition it returns the currently active alerts fired
by the Prometheus instance of each alerting rule.
As the `/rules` endpoint is fairly new, it does not have the same stability
guarantees as the overarching API v1.
```
GET /api/v1/rules
```
```json
$ curl http://localhost:9090/api/v1/rules
{
"data": {
"groups": [
{
"rules": [
{
"alerts": [
{
"activeAt": "2018-07-04T20:27:12.60602144+02:00",
"annotations": {
"summary": "High request latency"
},
"labels": {
"alertname": "HighRequestLatency",
"severity": "page"
},
"state": "firing",
"value": 1
}
],
"annotations": {
"summary": "High request latency"
},
"duration": 600,
"labels": {
"severity": "page"
},
"name": "HighRequestLatency",
"query": "job:request_latency_seconds:mean5m{job=\"myjob\"} > 0.5",
"type": "alerting"
},
{
"name": "job:http_inprogress_requests:sum",
"query": "sum(http_inprogress_requests) by (job)",
"type": "recording"
}
],
"file": "/rules.yaml",
"interval": 60,
"name": "example"
}
]
},
"status": "success"
}
```
## Alerts
The `/alerts` endpoint returns a list of all active alerts.
As the `/alerts` endpoint is fairly new, it does not have the same stability
guarantees as the overarching API v1.
```
GET /api/v1/alerts
```
```json
$ curl http://localhost:9090/api/v1/alerts
{
"data": {
"alerts": [
{
"activeAt": "2018-07-04T20:27:12.60602144+02:00",
"annotations": {},
"labels": {
"alertname": "my-alert"
},
"state": "firing",
"value": 1
}
]
},
"status": "success"
}
```
## Querying target metadata ## Querying target metadata
The following endpoint returns metadata about metrics currently scraped by targets. The following endpoint returns metadata about metrics currently scraped by targets.

View File

@ -126,46 +126,31 @@ func NewAlertingRule(name string, vec promql.Expr, hold time.Duration, lbls, ann
} }
} }
// Name returns the name of the alert. // Name returns the name of the alerting rule.
func (r *AlertingRule) Name() string { func (r *AlertingRule) Name() string {
return r.name return r.name
} }
// Query returns the query expression of the alert. // Query returns the query expression of the alerting rule.
func (r *AlertingRule) Query() promql.Expr { func (r *AlertingRule) Query() promql.Expr {
return r.vector return r.vector
} }
// Duration returns the hold duration of the alert. // Duration returns the hold duration of the alerting rule.
func (r *AlertingRule) Duration() time.Duration { func (r *AlertingRule) Duration() time.Duration {
return r.holdDuration return r.holdDuration
} }
// Labels returns the labels of the alert. // Labels returns the labels of the alerting rule.
func (r *AlertingRule) Labels() labels.Labels { func (r *AlertingRule) Labels() labels.Labels {
return r.labels return r.labels
} }
// Annotations returns the annotations of the alert. // Annotations returns the annotations of the alerting rule.
func (r *AlertingRule) Annotations() labels.Labels { func (r *AlertingRule) Annotations() labels.Labels {
return r.annotations return r.annotations
} }
// Alertinfo return an array of alerts
func (r *AlertingRule) Alertinfo() []*Alert {
activealerts := &r.active
alertsarr := make([]*Alert, 0)
if len(*activealerts) > 0 {
for _, a := range *activealerts {
if a.ResolvedAt.IsZero() {
alertsarr = append(alertsarr, a)
}
}
return alertsarr
}
return nil
}
func (r *AlertingRule) equal(o *AlertingRule) bool { func (r *AlertingRule) equal(o *AlertingRule) bool {
return r.name == o.name && labels.Equal(r.labels, o.labels) return r.name == o.name && labels.Equal(r.labels, o.labels)
} }

View File

@ -188,6 +188,9 @@ func (g *Group) File() string { return g.file }
// Rules returns the group's rules. // Rules returns the group's rules.
func (g *Group) Rules() []Rule { return g.rules } func (g *Group) Rules() []Rule { return g.rules }
// Interval returns the group's interval.
func (g *Group) Interval() time.Duration { return g.interval }
func (g *Group) run(ctx context.Context) { func (g *Group) run(ctx context.Context) {
defer close(g.terminated) defer close(g.terminated)

View File

@ -52,6 +52,16 @@ func (rule *RecordingRule) Name() string {
return rule.name return rule.name
} }
// Query returns the rule query expression.
func (rule *RecordingRule) Query() promql.Expr {
return rule.vector
}
// Labels returns the rule labels.
func (rule *RecordingRule) Labels() labels.Labels {
return rule.labels
}
// Eval evaluates the rule and then overrides the metric names and labels accordingly. // Eval evaluates the rule and then overrides the metric names and labels accordingly.
func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL) (promql.Vector, error) { func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL) (promql.Vector, error) {
vector, err := query(ctx, rule.vector.String(), ts) vector, err := query(ctx, rule.vector.String(), ts)

View File

@ -96,17 +96,9 @@ type alertmanagerRetriever interface {
DroppedAlertmanagers() []*url.URL DroppedAlertmanagers() []*url.URL
} }
type alertRetreiver interface { type rulesRetriever interface {
AlertingRules() []*rules.AlertingRule
}
type rulesRetreiver interface {
RuleGroups() []*rules.Group RuleGroups() []*rules.Group
} AlertingRules() []*rules.AlertingRule
type alertsrulesRetreiver interface {
alertRetreiver
rulesRetreiver
} }
type response struct { type response struct {
@ -133,7 +125,7 @@ type API struct {
targetRetriever targetRetriever targetRetriever targetRetriever
alertmanagerRetriever alertmanagerRetriever alertmanagerRetriever alertmanagerRetriever
alertsrulesRetreiver alertsrulesRetreiver rulesRetriever rulesRetriever
now func() time.Time now func() time.Time
config func() config.Config config func() config.Config
flagsMap map[string]string flagsMap map[string]string
@ -156,20 +148,20 @@ func NewAPI(
db func() *tsdb.DB, db func() *tsdb.DB,
enableAdmin bool, enableAdmin bool,
logger log.Logger, logger log.Logger,
al alertsrulesRetreiver, rr rulesRetriever,
) *API { ) *API {
return &API{ return &API{
QueryEngine: qe, QueryEngine: qe,
Queryable: q, Queryable: q,
targetRetriever: tr, targetRetriever: tr,
alertmanagerRetriever: ar, alertmanagerRetriever: ar,
now: time.Now, now: time.Now,
config: configFunc, config: configFunc,
flagsMap: flagsMap, flagsMap: flagsMap,
ready: readyFunc, ready: readyFunc,
db: db, db: db,
enableAdmin: enableAdmin, enableAdmin: enableAdmin,
alertsrulesRetreiver: al, rulesRetriever: rr,
} }
} }
@ -597,92 +589,130 @@ func (api *API) alertmanagers(r *http.Request) (interface{}, *apiError, func())
return ams, nil, nil return ams, nil, nil
} }
// AlertDiscovery has info for all alerts // AlertDiscovery has info for all active alerts.
type AlertDiscovery struct { type AlertDiscovery struct {
Alertgrps []*Alertgrp `json:"alertgrp"` Alerts []*Alert `json:"alerts"`
} }
// Alert has info for a alert // Alert has info for an alert.
type Alert struct { type Alert struct {
Labels labels.Labels `json:"labels"` Labels labels.Labels `json:"labels"`
Status string `json:"status"` Annotations labels.Labels `json:"annotations"`
Activesince *time.Time `json:"activesince,omitempty"` State string `json:"state"`
ActiveAt *time.Time `json:"activeAt,omitempty"`
Value float64 `json:"value"`
} }
// Alertgrp has info for alerts part of a group func (api *API) alerts(r *http.Request) (interface{}, *apiError, func()) {
type Alertgrp struct { alertingRules := api.rulesRetriever.AlertingRules()
alerts := []*Alert{}
for _, alertingRule := range alertingRules {
alerts = append(
alerts,
rulesAlertsToAPIAlerts(alertingRule.ActiveAlerts())...,
)
}
res := &AlertDiscovery{Alerts: alerts}
return res, nil, nil
}
func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert {
apiAlerts := make([]*Alert, len(rulesAlerts))
for i, ruleAlert := range rulesAlerts {
apiAlerts[i] = &Alert{
Labels: ruleAlert.Labels,
Annotations: ruleAlert.Annotations,
State: ruleAlert.State.String(),
ActiveAt: &ruleAlert.ActiveAt,
Value: ruleAlert.Value,
}
}
return apiAlerts
}
// RuleDiscovery has info for all rules
type RuleDiscovery struct {
RuleGroups []*RuleGroup `json:"groups"`
}
// RuleGroup has info for rules which are part of a group
type RuleGroup struct {
Name string `json:"name"`
File string `json:"file"`
// In order to preserve rule ordering, while exposing type (alerting or recording)
// specific properties, both alerting and recording rules are exposed in the
// same array.
Rules []rule `json:"rules"`
Interval float64 `json:"interval"`
}
type rule interface{}
type alertingRule struct {
Name string `json:"name"` Name string `json:"name"`
Query string `json:"query"` Query string `json:"query"`
Duration string `json:"duration"` Duration float64 `json:"duration"`
Annotations labels.Labels `json:"annotations,omitempty"` Labels labels.Labels `json:"labels"`
Annotations labels.Labels `json:"annotations"`
Alerts []*Alert `json:"alerts"` Alerts []*Alert `json:"alerts"`
// Type of an alertingRule is always "alerting".
Type string `json:"type"`
} }
func (api *API) alerts(r *http.Request) (interface{}, *apiError) { type recordingRule struct {
alertingrules := api.alertsrulesRetreiver.AlertingRules() Name string `json:"name"`
var alertgrps []*Alertgrp Query string `json:"query"`
res := &AlertDiscovery{Alertgrps: alertgrps} Labels labels.Labels `json:"labels,omitempty"`
for _, activerule := range alertingrules { // Type of a recordingRule is always "recording".
t := &Alertgrp{ Type string `json:"type"`
Name: activerule.Name(), }
Query: fmt.Sprintf("%v", activerule.Query()),
Duration: activerule.Duration().String(), func (api *API) rules(r *http.Request) (interface{}, *apiError, func()) {
Annotations: activerule.Annotations(), ruleGroups := api.rulesRetriever.RuleGroups()
res := &RuleDiscovery{RuleGroups: make([]*RuleGroup, len(ruleGroups))}
for i, grp := range ruleGroups {
apiRuleGroup := &RuleGroup{
Name: grp.Name(),
File: grp.File(),
Interval: grp.Interval().Seconds(),
Rules: []rule{},
} }
alerts := activerule.Alertinfo()
var activealerts []*Alert for _, r := range grp.Rules() {
for _, alert := range alerts { var enrichedRule rule
q := &Alert{
Labels: alert.Labels, switch rule := r.(type) {
Status: alert.State.String(), case *rules.AlertingRule:
Activesince: &alert.ActiveAt, enrichedRule = alertingRule{
Name: rule.Name(),
Query: rule.Query().String(),
Duration: rule.Duration().Seconds(),
Labels: rule.Labels(),
Annotations: rule.Annotations(),
Alerts: rulesAlertsToAPIAlerts(rule.ActiveAlerts()),
Type: "alerting",
}
case *rules.RecordingRule:
enrichedRule = recordingRule{
Name: rule.Name(),
Query: rule.Query().String(),
Labels: rule.Labels(),
Type: "recording",
}
default:
err := fmt.Errorf("failed to assert type of rule '%v'", rule.Name())
return nil, &apiError{errorInternal, err}, nil
} }
activealerts = append(activealerts, q) apiRuleGroup.Rules = append(apiRuleGroup.Rules, enrichedRule)
} }
t.Alerts = activealerts res.RuleGroups[i] = apiRuleGroup
res.Alertgrps = append(res.Alertgrps, t)
} }
return res, nil, nil
return res, nil
}
// GroupDiscovery has info for all rules
type GroupDiscovery struct {
Rulegrps []*Rulegrp `json:"groups"`
}
// Rulegrp has info for rules which are part of a group
type Rulegrp struct {
Name string `json:"name"`
File string `json:"file"`
Rules []*Ruleinfo `json:"rules"`
}
// Ruleinfo has rule in human readable format using \n as line separators
type Ruleinfo struct {
Rule string `json:"rule"`
}
func (api *API) rules(r *http.Request) (interface{}, *apiError) {
grps := api.alertsrulesRetreiver.RuleGroups()
res := &GroupDiscovery{Rulegrps: make([]*Rulegrp, len(grps))}
for i, grp := range grps {
t := &Rulegrp{
Name: grp.Name(),
File: grp.File(),
}
var rulearr []*Ruleinfo
for _, rule := range grp.Rules() {
q := &Ruleinfo{
Rule: rule.String(),
}
rulearr = append(rulearr, q)
}
t.Rules = rulearr
res.Rulegrps[i] = t
}
return res, nil
} }
type prometheusConfig struct { type prometheusConfig struct {

View File

@ -21,7 +21,6 @@ import (
"fmt" "fmt"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"io/ioutil" "io/ioutil"
stdlog "log"
"math" "math"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
@ -102,18 +101,18 @@ func (t testAlertmanagerRetriever) DroppedAlertmanagers() []*url.URL {
} }
} }
type testalertsrulesfunc struct { type rulesRetrieverMock struct {
test *testing.T testing *testing.T
} }
func (t testalertsrulesfunc) AlertingRules() []*rules.AlertingRule { func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule {
expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`) expr1, err := promql.ParseExpr(`absent(test_metric3) != 1`)
if err != nil { if err != nil {
stdlog.Fatalf("Unable to parse alert expression: %s", err) m.testing.Fatalf("unable to parse alert expression: %s", err)
} }
expr2, err := promql.ParseExpr(`up == 1`) expr2, err := promql.ParseExpr(`up == 1`)
if err != nil { if err != nil {
stdlog.Fatalf("Unable to parse alert expression: %s", err) m.testing.Fatalf("Unable to parse alert expression: %s", err)
} }
rule1 := rules.NewAlertingRule( rule1 := rules.NewAlertingRule(
@ -138,10 +137,10 @@ func (t testalertsrulesfunc) AlertingRules() []*rules.AlertingRule {
return r return r
} }
func (t testalertsrulesfunc) RuleGroups() []*rules.Group { func (m rulesRetrieverMock) RuleGroups() []*rules.Group {
var ar testalertsrulesfunc var ar rulesRetrieverMock
arules := ar.AlertingRules() arules := ar.AlertingRules()
storage := testutil.NewStorage(t.test) storage := testutil.NewStorage(m.testing)
defer storage.Close() defer storage.Close()
engine := promql.NewEngine(nil, nil, 10, 10*time.Second) engine := promql.NewEngine(nil, nil, 10, 10*time.Second)
@ -158,10 +157,15 @@ func (t testalertsrulesfunc) RuleGroups() []*rules.Group {
r = append(r, alertrule) r = append(r, alertrule)
} }
group := rules.NewGroup("grp", "/path/to/file", time.Second, r, opts) recordingExpr, err := promql.ParseExpr(`vector(1)`)
fmt.Println(group) if err != nil {
return []*rules.Group{group} m.testing.Fatalf("unable to parse alert expression: %s", err)
}
recordingRule := rules.NewRecordingRule("recording-rule-1", recordingExpr, labels.Labels{})
r = append(r, recordingRule)
group := rules.NewGroup("grp", "/path/to/file", time.Second, r, opts)
return []*rules.Group{group}
} }
var samplePrometheusCfg = config.Config{ var samplePrometheusCfg = config.Config{
@ -196,10 +200,14 @@ func TestEndpoints(t *testing.T) {
now := time.Now() now := time.Now()
t.Run("local", func(t *testing.T) { var algr rulesRetrieverMock
algr.testing = t
algr.AlertingRules()
algr.RuleGroups()
var algr testalertsrulesfunc t.Run("local", func(t *testing.T) {
algr.test = t var algr rulesRetrieverMock
algr.testing = t
algr.AlertingRules() algr.AlertingRules()
@ -210,11 +218,11 @@ func TestEndpoints(t *testing.T) {
QueryEngine: suite.QueryEngine(), QueryEngine: suite.QueryEngine(),
targetRetriever: testTargetRetriever{}, targetRetriever: testTargetRetriever{},
alertmanagerRetriever: testAlertmanagerRetriever{}, alertmanagerRetriever: testAlertmanagerRetriever{},
now: func() time.Time { return now }, now: func() time.Time { return now },
config: func() config.Config { return samplePrometheusCfg }, config: func() config.Config { return samplePrometheusCfg },
flagsMap: sampleFlagMap, flagsMap: sampleFlagMap,
ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, ready: func(f http.HandlerFunc) http.HandlerFunc { return f },
alertsrulesRetreiver: algr, rulesRetriever: algr,
} }
testEndpoints(t, api, true) testEndpoints(t, api, true)
@ -251,8 +259,8 @@ func TestEndpoints(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
var algr testalertsrulesfunc var algr rulesRetrieverMock
algr.test = t algr.testing = t
algr.AlertingRules() algr.AlertingRules()
@ -263,11 +271,11 @@ func TestEndpoints(t *testing.T) {
QueryEngine: suite.QueryEngine(), QueryEngine: suite.QueryEngine(),
targetRetriever: testTargetRetriever{}, targetRetriever: testTargetRetriever{},
alertmanagerRetriever: testAlertmanagerRetriever{}, alertmanagerRetriever: testAlertmanagerRetriever{},
now: func() time.Time { return now }, now: func() time.Time { return now },
config: func() config.Config { return samplePrometheusCfg }, config: func() config.Config { return samplePrometheusCfg },
flagsMap: sampleFlagMap, flagsMap: sampleFlagMap,
ready: func(f http.HandlerFunc) http.HandlerFunc { return f }, ready: func(f http.HandlerFunc) http.HandlerFunc { return f },
alertsrulesRetreiver: algr, rulesRetriever: algr,
} }
testEndpoints(t, api, false) testEndpoints(t, api, false)
@ -652,37 +660,41 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
{ {
endpoint: api.alerts, endpoint: api.alerts,
response: &AlertDiscovery{ response: &AlertDiscovery{
Alertgrps: []*Alertgrp{ Alerts: []*Alert{},
{
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: "1s",
Alerts: nil,
Annotations: labels.Labels{},
},
{
Name: "test_metric4",
Query: "up == 1",
Duration: "1s",
Alerts: nil,
Annotations: labels.Labels{},
},
},
}, },
}, },
{ {
endpoint: api.rules, endpoint: api.rules,
response: &GroupDiscovery{ response: &RuleDiscovery{
Rulegrps: []*Rulegrp{ RuleGroups: []*RuleGroup{
{ {
Name: "grp", Name: "grp",
File: "/path/to/file", File: "/path/to/file",
Rules: []*Ruleinfo{ Interval: 1,
{ Rules: []rule{
Rule: "alert: test_metric3\nexpr: absent(test_metric3) != 1\nfor: 1s\n", alertingRule{
Name: "test_metric3",
Query: "absent(test_metric3) != 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Type: "alerting",
}, },
{ alertingRule{
Rule: "alert: test_metric4\nexpr: up == 1\nfor: 1s\n", Name: "test_metric4",
Query: "up == 1",
Duration: 1,
Labels: labels.Labels{},
Annotations: labels.Labels{},
Alerts: []*Alert{},
Type: "alerting",
},
recordingRule{
Name: "recording-rule-1",
Query: "vector(1)",
Labels: labels.Labels{},
Type: "recording",
}, },
}, },
}, },
@ -768,7 +780,21 @@ func testEndpoints(t *testing.T, api *API, testLabelAPI bool) {
t.Fatalf("Expected error of type %q but got none", test.errType) t.Fatalf("Expected error of type %q but got none", test.errType)
} }
if !reflect.DeepEqual(resp, test.response) { if !reflect.DeepEqual(resp, test.response) {
t.Fatalf("Response does not match, expected:\n%+v\ngot:\n%+v", test.response, resp) respJSON, err := json.Marshal(resp)
if err != nil {
t.Fatalf("failed to marshal response as JSON: %v", err.Error())
}
expectedRespJSON, err := json.Marshal(test.response)
if err != nil {
t.Fatalf("failed to marshal expected response as JSON: %v", err.Error())
}
t.Fatalf(
"Response does not match, expected:\n%+v\ngot:\n%+v",
string(expectedRespJSON),
string(respJSON),
)
} }
} }
} }