|
|
|
@ -14,7 +14,6 @@
|
|
|
|
|
package retrieval
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"errors"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
|
|
|
|
"io/ioutil"
|
|
|
|
@ -24,58 +23,16 @@ import (
|
|
|
|
|
"sync"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
|
"github.com/prometheus/common/expfmt"
|
|
|
|
|
"github.com/prometheus/common/log"
|
|
|
|
|
"github.com/prometheus/common/model"
|
|
|
|
|
"golang.org/x/net/context"
|
|
|
|
|
"golang.org/x/net/context/ctxhttp"
|
|
|
|
|
|
|
|
|
|
"github.com/prometheus/prometheus/config"
|
|
|
|
|
"github.com/prometheus/prometheus/storage"
|
|
|
|
|
"github.com/prometheus/prometheus/storage/local"
|
|
|
|
|
"github.com/prometheus/prometheus/util/httputil"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
scrapeHealthMetricName = "up"
|
|
|
|
|
scrapeDurationMetricName = "scrape_duration_seconds"
|
|
|
|
|
|
|
|
|
|
// Capacity of the channel to buffer samples during ingestion.
|
|
|
|
|
ingestedSamplesCap = 256
|
|
|
|
|
|
|
|
|
|
// Constants for instrumentation.
|
|
|
|
|
namespace = "prometheus"
|
|
|
|
|
interval = "interval"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
errSkippedScrape = errors.New("scrape skipped due to throttled ingestion")
|
|
|
|
|
|
|
|
|
|
targetIntervalLength = prometheus.NewSummaryVec(
|
|
|
|
|
prometheus.SummaryOpts{
|
|
|
|
|
Namespace: namespace,
|
|
|
|
|
Name: "target_interval_length_seconds",
|
|
|
|
|
Help: "Actual intervals between scrapes.",
|
|
|
|
|
Objectives: map[float64]float64{0.01: 0.001, 0.05: 0.005, 0.5: 0.05, 0.90: 0.01, 0.99: 0.001},
|
|
|
|
|
},
|
|
|
|
|
[]string{interval},
|
|
|
|
|
)
|
|
|
|
|
targetSkippedScrapes = prometheus.NewCounterVec(
|
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
|
Namespace: namespace,
|
|
|
|
|
Name: "target_skipped_scrapes_total",
|
|
|
|
|
Help: "Total number of scrapes that were skipped because the metric storage was throttled.",
|
|
|
|
|
},
|
|
|
|
|
[]string{interval},
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
prometheus.MustRegister(targetIntervalLength)
|
|
|
|
|
prometheus.MustRegister(targetSkippedScrapes)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TargetHealth describes the health state of a target.
|
|
|
|
|
type TargetHealth int
|
|
|
|
|
|
|
|
|
@ -163,10 +120,8 @@ func (ts *TargetStatus) setLastError(err error) {
|
|
|
|
|
type Target struct {
|
|
|
|
|
// The status object for the target. It is only set once on initialization.
|
|
|
|
|
status *TargetStatus
|
|
|
|
|
// Closing scraperStopping signals that scraping should stop.
|
|
|
|
|
scraperStopping chan struct{}
|
|
|
|
|
// Closing scraperStopped signals that scraping has been stopped.
|
|
|
|
|
scraperStopped chan struct{}
|
|
|
|
|
|
|
|
|
|
scrapeLoop *scrapeLoop
|
|
|
|
|
|
|
|
|
|
// Mutex protects the members below.
|
|
|
|
|
sync.RWMutex
|
|
|
|
@ -189,13 +144,11 @@ func NewTarget(cfg *config.ScrapeConfig, labels, metaLabels model.LabelSet) (*Ta
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
t := &Target{
|
|
|
|
|
status: &TargetStatus{},
|
|
|
|
|
scraperStopping: make(chan struct{}),
|
|
|
|
|
scraperStopped: make(chan struct{}),
|
|
|
|
|
scrapeConfig: cfg,
|
|
|
|
|
labels: labels,
|
|
|
|
|
metaLabels: metaLabels,
|
|
|
|
|
httpClient: client,
|
|
|
|
|
status: &TargetStatus{},
|
|
|
|
|
scrapeConfig: cfg,
|
|
|
|
|
labels: labels,
|
|
|
|
|
metaLabels: metaLabels,
|
|
|
|
|
httpClient: client,
|
|
|
|
|
}
|
|
|
|
|
return t, nil
|
|
|
|
|
}
|
|
|
|
@ -385,184 +338,60 @@ func (t *Target) InstanceIdentifier() string {
|
|
|
|
|
return t.host()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// RunScraper implements Target.
|
|
|
|
|
func (t *Target) RunScraper(sampleAppender storage.SampleAppender) {
|
|
|
|
|
log.Debugf("Running scraper for %v", t)
|
|
|
|
|
|
|
|
|
|
defer close(t.scraperStopped)
|
|
|
|
|
|
|
|
|
|
lastScrapeInterval := t.interval()
|
|
|
|
|
|
|
|
|
|
select {
|
|
|
|
|
case <-time.After(t.offset(lastScrapeInterval)):
|
|
|
|
|
// Continue after scraping offset.
|
|
|
|
|
case <-t.scraperStopping:
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ticker := time.NewTicker(lastScrapeInterval)
|
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
|
|
t.scrape(sampleAppender)
|
|
|
|
|
|
|
|
|
|
// Explanation of the contraption below:
|
|
|
|
|
//
|
|
|
|
|
// In case t.scraperStopping has something to receive, we want to read
|
|
|
|
|
// from that channel rather than starting a new scrape (which might take very
|
|
|
|
|
// long). That's why the outer select has no ticker.C. Should t.scraperStopping
|
|
|
|
|
// not have anything to receive, we go into the inner select, where ticker.C
|
|
|
|
|
// is in the mix.
|
|
|
|
|
for {
|
|
|
|
|
select {
|
|
|
|
|
case <-t.scraperStopping:
|
|
|
|
|
return
|
|
|
|
|
default:
|
|
|
|
|
select {
|
|
|
|
|
case <-t.scraperStopping:
|
|
|
|
|
return
|
|
|
|
|
case <-ticker.C:
|
|
|
|
|
took := time.Since(t.status.LastScrape())
|
|
|
|
|
|
|
|
|
|
intervalStr := lastScrapeInterval.String()
|
|
|
|
|
|
|
|
|
|
// On changed scrape interval the new interval becomes effective
|
|
|
|
|
// after the next scrape.
|
|
|
|
|
if iv := t.interval(); iv != lastScrapeInterval {
|
|
|
|
|
ticker.Stop()
|
|
|
|
|
ticker = time.NewTicker(iv)
|
|
|
|
|
lastScrapeInterval = iv
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
targetIntervalLength.WithLabelValues(intervalStr).Observe(
|
|
|
|
|
float64(took) / float64(time.Second), // Sub-second precision.
|
|
|
|
|
)
|
|
|
|
|
if sampleAppender.NeedsThrottling() {
|
|
|
|
|
targetSkippedScrapes.WithLabelValues(intervalStr).Inc()
|
|
|
|
|
t.status.setLastError(errSkippedScrape)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
t.scrape(sampleAppender)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// StopScraper implements Target.
|
|
|
|
|
func (t *Target) StopScraper() {
|
|
|
|
|
log.Debugf("Stopping scraper for target %v...", t)
|
|
|
|
|
|
|
|
|
|
close(t.scraperStopping)
|
|
|
|
|
<-t.scraperStopped
|
|
|
|
|
|
|
|
|
|
log.Debugf("Scraper for target %v stopped.", t)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,application/json;schema="prometheus/telemetry";version=0.0.2;q=0.2,*/*;q=0.1`
|
|
|
|
|
|
|
|
|
|
func (t *Target) scrape(appender storage.SampleAppender) error {
|
|
|
|
|
var (
|
|
|
|
|
err error
|
|
|
|
|
start = time.Now()
|
|
|
|
|
)
|
|
|
|
|
defer func(appender storage.SampleAppender) {
|
|
|
|
|
t.report(appender, start, time.Since(start), err)
|
|
|
|
|
}(appender)
|
|
|
|
|
|
|
|
|
|
func (t *Target) scrape(ctx context.Context) (model.Samples, error) {
|
|
|
|
|
t.RLock()
|
|
|
|
|
|
|
|
|
|
appender = t.wrapAppender(appender)
|
|
|
|
|
|
|
|
|
|
client := t.httpClient
|
|
|
|
|
t.RUnlock()
|
|
|
|
|
|
|
|
|
|
start := time.Now()
|
|
|
|
|
|
|
|
|
|
req, err := http.NewRequest("GET", t.URL().String(), nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
req.Header.Add("Accept", acceptHeader)
|
|
|
|
|
|
|
|
|
|
ctx, _ := context.WithTimeout(context.Background(), t.timeout())
|
|
|
|
|
resp, err := ctxhttp.Do(ctx, client, req)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return fmt.Errorf("server returned HTTP status %s", resp.Status)
|
|
|
|
|
return nil, fmt.Errorf("server returned HTTP status %s", resp.Status)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dec := expfmt.NewDecoder(resp.Body, expfmt.ResponseFormat(resp.Header))
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
allSamples = make(model.Samples, 0, 200)
|
|
|
|
|
decSamples = make(model.Vector, 0, 50)
|
|
|
|
|
)
|
|
|
|
|
sdec := expfmt.SampleDecoder{
|
|
|
|
|
Dec: dec,
|
|
|
|
|
Dec: expfmt.NewDecoder(resp.Body, expfmt.ResponseFormat(resp.Header)),
|
|
|
|
|
Opts: &expfmt.DecodeOptions{
|
|
|
|
|
Timestamp: model.TimeFromUnixNano(start.UnixNano()),
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var (
|
|
|
|
|
samples model.Vector
|
|
|
|
|
numOutOfOrder int
|
|
|
|
|
logger = log.With("target", t.InstanceIdentifier())
|
|
|
|
|
)
|
|
|
|
|
for {
|
|
|
|
|
if err = sdec.Decode(&samples); err != nil {
|
|
|
|
|
if err = sdec.Decode(&decSamples); err != nil {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
for _, s := range samples {
|
|
|
|
|
err := appender.Append(s)
|
|
|
|
|
if err != nil {
|
|
|
|
|
if err == local.ErrOutOfOrderSample {
|
|
|
|
|
numOutOfOrder++
|
|
|
|
|
} else {
|
|
|
|
|
logger.With("sample", s).Warnf("Error inserting sample: %s", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if numOutOfOrder > 0 {
|
|
|
|
|
logger.With("numDropped", numOutOfOrder).Warn("Error on ingesting out-of-order samples")
|
|
|
|
|
allSamples = append(allSamples, decSamples...)
|
|
|
|
|
decSamples = decSamples[:0]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err == io.EOF {
|
|
|
|
|
// Set err to nil since it is used in the scrape health recording.
|
|
|
|
|
err = nil
|
|
|
|
|
}
|
|
|
|
|
return err
|
|
|
|
|
return allSamples, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (t *Target) report(app storage.SampleAppender, start time.Time, duration time.Duration, err error) {
|
|
|
|
|
t.status.setLastScrape(start)
|
|
|
|
|
func (t *Target) report(start time.Time, dur time.Duration, err error) {
|
|
|
|
|
t.status.setLastError(err)
|
|
|
|
|
|
|
|
|
|
ts := model.TimeFromUnixNano(start.UnixNano())
|
|
|
|
|
|
|
|
|
|
var health model.SampleValue
|
|
|
|
|
if err == nil {
|
|
|
|
|
health = 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
healthSample := &model.Sample{
|
|
|
|
|
Metric: model.Metric{
|
|
|
|
|
model.MetricNameLabel: scrapeHealthMetricName,
|
|
|
|
|
},
|
|
|
|
|
Timestamp: ts,
|
|
|
|
|
Value: health,
|
|
|
|
|
}
|
|
|
|
|
durationSample := &model.Sample{
|
|
|
|
|
Metric: model.Metric{
|
|
|
|
|
model.MetricNameLabel: scrapeDurationMetricName,
|
|
|
|
|
},
|
|
|
|
|
Timestamp: ts,
|
|
|
|
|
Value: model.SampleValue(float64(duration) / float64(time.Second)),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
app = t.wrapReportingAppender(app)
|
|
|
|
|
|
|
|
|
|
app.Append(healthSample)
|
|
|
|
|
app.Append(durationSample)
|
|
|
|
|
t.status.setLastScrape(start)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Merges the ingested sample's metric with the label set. On a collision the
|
|
|
|
|