Browse Source

Consider a series stale after 4.1 intervals with no data.

To cover the cases where stale markers may not be available,
we need to infer the interval and mark series stale based on that.
As we're lacking stale markers this is less accurate, however
it should be good enough for these cases.

We need 4 intervals as if say we had data at t=0 and t=10,
coming via federation. The next data point should be at t=20 however it
could take up to t=30 for it actually to be ingested, t=40 for it to be
scraped via federation and t=50 for it to be ingested.
We then add 10% on to that for slack, as we do elsewhere.
pull/2681/head^2
Brian Brazil 8 years ago
parent
commit
220e78b9c3
  1. 19
      promql/engine.go
  2. 27
      promql/testdata/staleness.test
  3. 2
      web/federate.go

19
promql/engine.go

@ -751,8 +751,10 @@ func (ev *evaluator) vectorSelector(node *VectorSelector) Vector {
} }
t, v := it.Values() t, v := it.Values()
peek := 1
if !ok || t > refTime { if !ok || t > refTime {
t, v, ok = it.PeekBack(1) t, v, ok = it.PeekBack(peek)
peek += 1
if !ok || t < refTime-durationMilliseconds(StalenessDelta) { if !ok || t < refTime-durationMilliseconds(StalenessDelta) {
continue continue
} }
@ -760,6 +762,21 @@ func (ev *evaluator) vectorSelector(node *VectorSelector) Vector {
if value.IsStaleNaN(v) { if value.IsStaleNaN(v) {
continue continue
} }
// Find timestamp before this point, within the staleness delta.
prevT, _, ok := it.PeekBack(peek)
if ok && prevT >= refTime-durationMilliseconds(StalenessDelta) {
interval := t - prevT
if interval*4+interval/10 < refTime-t {
// It is more than 4 (+10% for safety) intervals
// since the last data point, skip as stale.
//
// We need 4 to allow for federation, as with a 10s einterval an eval
// started at t=10 could be ingested at t=20, scraped for federation at
// t=30 and only ingested by federation at t=40.
continue
}
}
vec = append(vec, Sample{ vec = append(vec, Sample{
Metric: node.series[i].Labels(), Metric: node.series[i].Labels(),
Point: Point{V: v, T: t}, Point: Point{V: v, T: t},

27
promql/testdata/staleness.test vendored

@ -10,6 +10,15 @@ eval instant at 20s metric
eval instant at 30s metric eval instant at 30s metric
{__name__="metric"} 2 {__name__="metric"} 2
eval instant at 40s metric
{__name__="metric"} 2
# It goes stale 4 intervals + 10% after the last sample.
eval instant at 71s metric
{__name__="metric"} 2
eval instant at 72s metric
# Range vector ignores stale sample. # Range vector ignores stale sample.
eval instant at 30s count_over_time(metric[1m]) eval instant at 30s count_over_time(metric[1m])
@ -22,3 +31,21 @@ eval instant at 20s count_over_time(metric[1s])
eval instant at 20s count_over_time(metric[10s]) eval instant at 20s count_over_time(metric[10s])
{} 1 {} 1
clear
load 10s
metric 0
# Series with single point goes stale after 5 minutes.
eval instant at 0s metric
{__name__="metric"} 0
eval instant at 150s metric
{__name__="metric"} 0
eval instant at 300s metric
{__name__="metric"} 0
eval instant at 301s metric

2
web/federate.go

@ -94,7 +94,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) {
if ok { if ok {
t, v = it.Values() t, v = it.Values()
} else { } else {
t, v, ok = it.PeekBack(0) t, v, ok = it.PeekBack(1)
if !ok { if !ok {
continue continue
} }

Loading…
Cancel
Save