2017-05-10 09:44:13 +00:00
|
|
|
// Copyright 2016 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package remote
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
2017-10-25 04:21:42 +00:00
|
|
|
"context"
|
2017-05-10 09:44:13 +00:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"net/http"
|
2020-06-16 14:11:41 +00:00
|
|
|
"strconv"
|
2019-08-28 13:25:28 +00:00
|
|
|
"strings"
|
2017-05-10 09:44:13 +00:00
|
|
|
"time"
|
|
|
|
|
2017-11-01 22:03:46 +00:00
|
|
|
"github.com/gogo/protobuf/proto"
|
2017-05-10 09:44:13 +00:00
|
|
|
"github.com/golang/snappy"
|
2020-04-23 00:05:55 +00:00
|
|
|
"github.com/opentracing/opentracing-go"
|
2019-03-25 23:01:12 +00:00
|
|
|
"github.com/pkg/errors"
|
2020-06-16 14:11:41 +00:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
2019-03-25 23:01:12 +00:00
|
|
|
config_util "github.com/prometheus/common/config"
|
2017-10-25 04:21:42 +00:00
|
|
|
"github.com/prometheus/common/model"
|
2018-11-23 14:49:49 +00:00
|
|
|
"github.com/prometheus/common/version"
|
2017-05-10 09:44:13 +00:00
|
|
|
|
2020-05-02 20:41:55 +00:00
|
|
|
"github.com/opentracing-contrib/go-stdlib/nethttp"
|
2017-07-12 21:06:35 +00:00
|
|
|
"github.com/prometheus/prometheus/prompb"
|
2017-05-10 09:44:13 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const maxErrMsgLen = 256
|
|
|
|
|
2018-11-23 14:49:49 +00:00
|
|
|
var userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
var (
|
|
|
|
remoteReadQueriesTotal = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: subsystem,
|
|
|
|
Name: "read_queries_total",
|
|
|
|
Help: "The total number of remote read queries.",
|
|
|
|
},
|
|
|
|
[]string{remoteName, endpoint, "code"},
|
|
|
|
)
|
|
|
|
remoteReadQueries = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: subsystem,
|
|
|
|
Name: "remote_read_queries",
|
|
|
|
Help: "The number of in-flight remote read queries.",
|
|
|
|
},
|
|
|
|
[]string{remoteName, endpoint},
|
|
|
|
)
|
|
|
|
remoteReadQueryDuration = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: namespace,
|
|
|
|
Subsystem: subsystem,
|
|
|
|
Name: "read_request_duration_seconds",
|
|
|
|
Help: "Histogram of the latency for remote read requests.",
|
|
|
|
Buckets: append(prometheus.DefBuckets, 25, 60),
|
|
|
|
},
|
|
|
|
[]string{remoteName, endpoint},
|
|
|
|
)
|
2020-06-16 14:11:41 +00:00
|
|
|
)
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
func init() {
|
|
|
|
prometheus.MustRegister(remoteReadQueriesTotal, remoteReadQueries, remoteReadQueryDuration)
|
|
|
|
}
|
|
|
|
|
|
|
|
// client allows reading and writing from/to a remote HTTP endpoint.
|
|
|
|
type client struct {
|
2019-12-12 20:47:23 +00:00
|
|
|
remoteName string // Used to differentiate clients in metrics.
|
|
|
|
url *config_util.URL
|
|
|
|
client *http.Client
|
|
|
|
timeout time.Duration
|
2020-06-24 13:41:52 +00:00
|
|
|
|
|
|
|
readQueries prometheus.Gauge
|
|
|
|
readQueriesTotal *prometheus.CounterVec
|
|
|
|
readQueriesDuration prometheus.Observer
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
// ClientConfig configures a client.
|
2017-10-23 13:53:43 +00:00
|
|
|
type ClientConfig struct {
|
Refactor SD configuration to remove `config` dependency (#3629)
* refactor: move targetGroup struct and CheckOverflow() to their own package
* refactor: move auth and security related structs to a utility package, fix import error in utility package
* refactor: Azure SD, remove SD struct from config
* refactor: DNS SD, remove SD struct from config into dns package
* refactor: ec2 SD, move SD struct from config into the ec2 package
* refactor: file SD, move SD struct from config to file discovery package
* refactor: gce, move SD struct from config to gce discovery package
* refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil
* refactor: consul, move SD struct from config into consul discovery package
* refactor: marathon, move SD struct from config into marathon discovery package
* refactor: triton, move SD struct from config to triton discovery package, fix test
* refactor: zookeeper, move SD structs from config to zookeeper discovery package
* refactor: openstack, remove SD struct from config, move into openstack discovery package
* refactor: kubernetes, move SD struct from config into kubernetes discovery package
* refactor: notifier, use targetgroup package instead of config
* refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup
* refactor: retrieval, use targetgroup package instead of config.TargetGroup
* refactor: storage, use config util package
* refactor: discovery manager, use targetgroup package instead of config.TargetGroup
* refactor: use HTTPClient and TLS config from configUtil instead of config
* refactor: tests, use targetgroup package instead of config.TargetGroup
* refactor: fix tagetgroup.Group pointers that were removed by mistake
* refactor: openstack, kubernetes: drop prefixes
* refactor: remove import aliases forced due to vscode bug
* refactor: move main SD struct out of config into discovery/config
* refactor: rename configUtil to config_util
* refactor: rename yamlUtil to yaml_config
* refactor: kubernetes, remove prefixes
* refactor: move the TargetGroup package to discovery/
* refactor: fix order of imports
2017-12-29 20:01:34 +00:00
|
|
|
URL *config_util.URL
|
2017-10-23 13:53:43 +00:00
|
|
|
Timeout model.Duration
|
Refactor SD configuration to remove `config` dependency (#3629)
* refactor: move targetGroup struct and CheckOverflow() to their own package
* refactor: move auth and security related structs to a utility package, fix import error in utility package
* refactor: Azure SD, remove SD struct from config
* refactor: DNS SD, remove SD struct from config into dns package
* refactor: ec2 SD, move SD struct from config into the ec2 package
* refactor: file SD, move SD struct from config to file discovery package
* refactor: gce, move SD struct from config to gce discovery package
* refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil
* refactor: consul, move SD struct from config into consul discovery package
* refactor: marathon, move SD struct from config into marathon discovery package
* refactor: triton, move SD struct from config to triton discovery package, fix test
* refactor: zookeeper, move SD structs from config to zookeeper discovery package
* refactor: openstack, remove SD struct from config, move into openstack discovery package
* refactor: kubernetes, move SD struct from config into kubernetes discovery package
* refactor: notifier, use targetgroup package instead of config
* refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup
* refactor: retrieval, use targetgroup package instead of config.TargetGroup
* refactor: storage, use config util package
* refactor: discovery manager, use targetgroup package instead of config.TargetGroup
* refactor: use HTTPClient and TLS config from configUtil instead of config
* refactor: tests, use targetgroup package instead of config.TargetGroup
* refactor: fix tagetgroup.Group pointers that were removed by mistake
* refactor: openstack, kubernetes: drop prefixes
* refactor: remove import aliases forced due to vscode bug
* refactor: move main SD struct out of config into discovery/config
* refactor: rename configUtil to config_util
* refactor: rename yamlUtil to yaml_config
* refactor: kubernetes, remove prefixes
* refactor: move the TargetGroup package to discovery/
* refactor: fix order of imports
2017-12-29 20:01:34 +00:00
|
|
|
HTTPClientConfig config_util.HTTPClientConfig
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
// ReadClient uses the SAMPLES method of remote read to read series samples from remote server.
|
|
|
|
// TODO(bwplotka): Add streamed chunked remote read method as well (https://github.com/prometheus/prometheus/issues/5926).
|
|
|
|
type ReadClient interface {
|
|
|
|
Read(ctx context.Context, query *prompb.Query) (*prompb.QueryResult, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// newReadClient creates a new client for remote read.
|
|
|
|
func newReadClient(name string, conf *ClientConfig) (ReadClient, error) {
|
|
|
|
httpClient, err := config_util.NewClientFromConfig(conf.HTTPClientConfig, "remote_storage", false)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return &client{
|
|
|
|
remoteName: name,
|
|
|
|
url: conf.URL,
|
|
|
|
client: httpClient,
|
|
|
|
timeout: time.Duration(conf.Timeout),
|
|
|
|
readQueries: remoteReadQueries.WithLabelValues(name, conf.URL.String()),
|
|
|
|
readQueriesTotal: remoteReadQueriesTotal.MustCurryWith(prometheus.Labels{remoteName: name, endpoint: conf.URL.String()}),
|
|
|
|
readQueriesDuration: remoteReadQueryDuration.WithLabelValues(name, conf.URL.String()),
|
|
|
|
}, nil
|
2020-06-16 14:11:41 +00:00
|
|
|
}
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
// NewWriteClient creates a new client for remote write.
|
|
|
|
func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) {
|
2019-08-14 09:00:39 +00:00
|
|
|
httpClient, err := config_util.NewClientFromConfig(conf.HTTPClientConfig, "remote_storage", false)
|
2017-05-10 09:44:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-05-02 20:41:55 +00:00
|
|
|
t := httpClient.Transport
|
|
|
|
httpClient.Transport = &nethttp.Transport{
|
|
|
|
RoundTripper: t,
|
|
|
|
}
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
return &client{
|
|
|
|
remoteName: name,
|
2019-12-12 20:47:23 +00:00
|
|
|
url: conf.URL,
|
|
|
|
client: httpClient,
|
|
|
|
timeout: time.Duration(conf.Timeout),
|
2017-05-10 09:44:13 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type recoverableError struct {
|
|
|
|
error
|
|
|
|
}
|
|
|
|
|
2018-09-07 21:26:04 +00:00
|
|
|
// Store sends a batch of samples to the HTTP endpoint, the request is the proto marshalled
|
|
|
|
// and encoded bytes from codec.go.
|
2020-06-24 13:41:52 +00:00
|
|
|
func (c *client) Store(ctx context.Context, req []byte) error {
|
2018-09-07 21:26:04 +00:00
|
|
|
httpReq, err := http.NewRequest("POST", c.url.String(), bytes.NewReader(req))
|
2017-05-10 09:44:13 +00:00
|
|
|
if err != nil {
|
2019-09-30 15:54:55 +00:00
|
|
|
// Errors from NewRequest are from unparsable URLs, so are not
|
2017-05-10 09:44:13 +00:00
|
|
|
// recoverable.
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
httpReq.Header.Add("Content-Encoding", "snappy")
|
|
|
|
httpReq.Header.Set("Content-Type", "application/x-protobuf")
|
2018-11-23 14:49:49 +00:00
|
|
|
httpReq.Header.Set("User-Agent", userAgent)
|
2017-05-10 09:44:13 +00:00
|
|
|
httpReq.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
|
2020-01-27 14:43:20 +00:00
|
|
|
ctx, cancel := context.WithTimeout(ctx, c.timeout)
|
2017-05-10 09:44:13 +00:00
|
|
|
defer cancel()
|
|
|
|
|
2020-06-01 15:21:13 +00:00
|
|
|
httpReq = httpReq.WithContext(ctx)
|
|
|
|
|
|
|
|
if parentSpan := opentracing.SpanFromContext(ctx); parentSpan != nil {
|
|
|
|
var ht *nethttp.Tracer
|
|
|
|
httpReq, ht = nethttp.TraceRequest(
|
|
|
|
parentSpan.Tracer(),
|
|
|
|
httpReq,
|
|
|
|
nethttp.OperationName("Remote Store"),
|
|
|
|
nethttp.ClientTrace(false),
|
|
|
|
)
|
|
|
|
defer ht.Finish()
|
|
|
|
}
|
|
|
|
|
|
|
|
httpResp, err := c.client.Do(httpReq)
|
2017-05-10 09:44:13 +00:00
|
|
|
if err != nil {
|
|
|
|
// Errors from client.Do are from (for example) network errors, so are
|
|
|
|
// recoverable.
|
|
|
|
return recoverableError{err}
|
|
|
|
}
|
2019-04-18 08:50:37 +00:00
|
|
|
defer func() {
|
|
|
|
io.Copy(ioutil.Discard, httpResp.Body)
|
|
|
|
httpResp.Body.Close()
|
|
|
|
}()
|
2017-05-10 09:44:13 +00:00
|
|
|
|
|
|
|
if httpResp.StatusCode/100 != 2 {
|
|
|
|
scanner := bufio.NewScanner(io.LimitReader(httpResp.Body, maxErrMsgLen))
|
|
|
|
line := ""
|
|
|
|
if scanner.Scan() {
|
|
|
|
line = scanner.Text()
|
|
|
|
}
|
2019-03-25 23:01:12 +00:00
|
|
|
err = errors.Errorf("server returned HTTP status %s: %s", httpResp.Status, line)
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
if httpResp.StatusCode/100 == 5 {
|
|
|
|
return recoverableError{err}
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2019-12-12 20:47:23 +00:00
|
|
|
// Name uniquely identifies the client.
|
2020-06-24 13:41:52 +00:00
|
|
|
func (c client) Name() string {
|
2019-12-12 20:47:23 +00:00
|
|
|
return c.remoteName
|
|
|
|
}
|
|
|
|
|
|
|
|
// Endpoint is the remote read or write endpoint.
|
2020-06-24 13:41:52 +00:00
|
|
|
func (c client) Endpoint() string {
|
2019-12-12 20:47:23 +00:00
|
|
|
return c.url.String()
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Read reads from a remote endpoint.
|
2020-06-24 13:41:52 +00:00
|
|
|
func (c *client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryResult, error) {
|
|
|
|
c.readQueries.Inc()
|
|
|
|
defer c.readQueries.Dec()
|
|
|
|
|
2017-10-23 13:44:57 +00:00
|
|
|
req := &prompb.ReadRequest{
|
2017-10-23 13:53:43 +00:00
|
|
|
// TODO: Support batching multiple queries into one read request,
|
|
|
|
// as the protobuf interface allows for it.
|
2017-10-23 13:44:57 +00:00
|
|
|
Queries: []*prompb.Query{
|
|
|
|
query,
|
|
|
|
},
|
|
|
|
}
|
2017-05-10 09:44:13 +00:00
|
|
|
data, err := proto.Marshal(req)
|
|
|
|
if err != nil {
|
2019-03-25 23:01:12 +00:00
|
|
|
return nil, errors.Wrapf(err, "unable to marshal read request")
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
compressed := snappy.Encode(nil, data)
|
|
|
|
httpReq, err := http.NewRequest("POST", c.url.String(), bytes.NewReader(compressed))
|
|
|
|
if err != nil {
|
2019-03-25 23:01:12 +00:00
|
|
|
return nil, errors.Wrap(err, "unable to create request")
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
httpReq.Header.Add("Content-Encoding", "snappy")
|
2018-07-26 11:45:04 +00:00
|
|
|
httpReq.Header.Add("Accept-Encoding", "snappy")
|
2017-05-10 09:44:13 +00:00
|
|
|
httpReq.Header.Set("Content-Type", "application/x-protobuf")
|
2018-11-23 14:49:49 +00:00
|
|
|
httpReq.Header.Set("User-Agent", userAgent)
|
2017-05-10 09:44:13 +00:00
|
|
|
httpReq.Header.Set("X-Prometheus-Remote-Read-Version", "0.1.0")
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, c.timeout)
|
|
|
|
defer cancel()
|
|
|
|
|
2020-05-02 20:41:55 +00:00
|
|
|
httpReq = httpReq.WithContext(ctx)
|
|
|
|
|
|
|
|
if parentSpan := opentracing.SpanFromContext(ctx); parentSpan != nil {
|
|
|
|
var ht *nethttp.Tracer
|
|
|
|
httpReq, ht = nethttp.TraceRequest(
|
|
|
|
parentSpan.Tracer(),
|
|
|
|
httpReq,
|
|
|
|
nethttp.OperationName("Remote Read"),
|
|
|
|
nethttp.ClientTrace(false),
|
|
|
|
)
|
|
|
|
defer ht.Finish()
|
|
|
|
}
|
|
|
|
|
2020-06-24 13:41:52 +00:00
|
|
|
start := time.Now()
|
2020-05-02 20:41:55 +00:00
|
|
|
httpResp, err := c.client.Do(httpReq)
|
2017-05-10 09:44:13 +00:00
|
|
|
if err != nil {
|
2019-03-25 23:01:12 +00:00
|
|
|
return nil, errors.Wrap(err, "error sending request")
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
2019-04-18 08:50:37 +00:00
|
|
|
defer func() {
|
|
|
|
io.Copy(ioutil.Discard, httpResp.Body)
|
|
|
|
httpResp.Body.Close()
|
|
|
|
}()
|
2020-06-24 13:41:52 +00:00
|
|
|
c.readQueriesDuration.Observe(time.Since(start).Seconds())
|
|
|
|
c.readQueriesTotal.WithLabelValues(strconv.Itoa(httpResp.StatusCode)).Inc()
|
2020-06-16 14:11:41 +00:00
|
|
|
|
2017-05-10 09:44:13 +00:00
|
|
|
compressed, err = ioutil.ReadAll(httpResp.Body)
|
|
|
|
if err != nil {
|
2019-08-28 13:25:28 +00:00
|
|
|
return nil, errors.Wrap(err, fmt.Sprintf("error reading response. HTTP status code: %s", httpResp.Status))
|
|
|
|
}
|
|
|
|
|
|
|
|
if httpResp.StatusCode/100 != 2 {
|
|
|
|
return nil, errors.Errorf("remote server %s returned HTTP status %s: %s", c.url.String(), httpResp.Status, strings.TrimSpace(string(compressed)))
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
uncompressed, err := snappy.Decode(nil, compressed)
|
|
|
|
if err != nil {
|
2019-03-25 23:01:12 +00:00
|
|
|
return nil, errors.Wrap(err, "error reading response")
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
2017-07-12 21:06:35 +00:00
|
|
|
var resp prompb.ReadResponse
|
2017-05-10 09:44:13 +00:00
|
|
|
err = proto.Unmarshal(uncompressed, &resp)
|
|
|
|
if err != nil {
|
2019-03-25 23:01:12 +00:00
|
|
|
return nil, errors.Wrap(err, "unable to unmarshal response body")
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(resp.Results) != len(req.Queries) {
|
2019-03-25 23:01:12 +00:00
|
|
|
return nil, errors.Errorf("responses: want %d, got %d", len(req.Queries), len(resp.Results))
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|
|
|
|
|
2017-10-23 20:28:17 +00:00
|
|
|
return resp.Results[0], nil
|
2017-05-10 09:44:13 +00:00
|
|
|
}
|