@ -16,6 +16,7 @@ package remote
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
@ -36,13 +37,14 @@ import (
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/storage/remote/azuread"
"github.com/prometheus/prometheus/storage/remote/googleiam"
)
const maxErrMsgLen = 1024
const (
maxErrMsgLen = 1024
RemoteWriteVersionHeader = "X-Prometheus-Remote-Write-Version"
RemoteWriteVersion1HeaderValue = "0.1.0"
RemoteWriteVersion20HeaderValue = "2.0.0"
@ -68,9 +70,12 @@ var (
config . RemoteWriteProtoMsgV1 : appProtoContentType , // Also application/x-protobuf;proto=prometheus.WriteRequest but simplified for compatibility with 1.x spec.
config . RemoteWriteProtoMsgV2 : appProtoContentType + ";proto=io.prometheus.write.v2.Request" ,
}
)
var (
AcceptedResponseTypes = [ ] prompb . ReadRequest_ResponseType {
prompb . ReadRequest_STREAMED_XOR_CHUNKS ,
prompb . ReadRequest_SAMPLES ,
}
remoteReadQueriesTotal = prometheus . NewCounterVec (
prometheus . CounterOpts {
Namespace : namespace ,
@ -78,7 +83,7 @@ var (
Name : "read_queries_total" ,
Help : "The total number of remote read queries." ,
} ,
[ ] string { remoteName , endpoint , "code" } ,
[ ] string { remoteName , endpoint , "response_type" , " code" } ,
)
remoteReadQueries = prometheus . NewGaugeVec (
prometheus . GaugeOpts {
@ -94,13 +99,13 @@ var (
Namespace : namespace ,
Subsystem : subsystem ,
Name : "read_request_duration_seconds" ,
Help : "Histogram of the latency for remote read requests." ,
Help : "Histogram of the latency for remote read requests. Note that for streamed responses this is only the duration of the initial call and does not include the processing of the stream. " ,
Buckets : append ( prometheus . DefBuckets , 25 , 60 ) ,
NativeHistogramBucketFactor : 1.1 ,
NativeHistogramMaxBucketNumber : 100 ,
NativeHistogramMinResetDuration : 1 * time . Hour ,
} ,
[ ] string { remoteName , endpoint } ,
[ ] string { remoteName , endpoint , "response_type" } ,
)
)
@ -116,10 +121,11 @@ type Client struct {
timeout time . Duration
retryOnRateLimit bool
chunkedReadLimit uint64
readQueries prometheus . Gauge
readQueriesTotal * prometheus . CounterVec
readQueriesDuration prometheus . Observer
readQueriesDuration prometheus . ObserverVec
writeProtoMsg config . RemoteWriteProtoMsg
writeCompression Compression // Not exposed by ClientConfig for now.
@ -136,12 +142,13 @@ type ClientConfig struct {
Headers map [ string ] string
RetryOnRateLimit bool
WriteProtoMsg config . RemoteWriteProtoMsg
ChunkedReadLimit uint64
}
// ReadClient uses the SAMPLES method of remote read to read series samples from remote server.
// TODO(bwplotka): Add streamed chunked remote read method as well (https://github.com/prometheus/prometheus/issues/5926) .
// ReadClient will request the STREAMED_XOR_CHUNKS method of remote read but can
// also fall back to the SAMPLES method if necessary .
type ReadClient interface {
Read ( ctx context . Context , query * prompb . Query ) ( * prompb . QueryResul t, error )
Read ( ctx context . Context , query * prompb . Query , sortSeries bool ) ( storage . SeriesSe t, error )
}
// NewReadClient creates a new client for remote read.
@ -162,9 +169,10 @@ func NewReadClient(name string, conf *ClientConfig) (ReadClient, error) {
urlString : conf . URL . String ( ) ,
Client : httpClient ,
timeout : time . Duration ( conf . Timeout ) ,
chunkedReadLimit : conf . ChunkedReadLimit ,
readQueries : remoteReadQueries . WithLabelValues ( name , conf . URL . String ( ) ) ,
readQueriesTotal : remoteReadQueriesTotal . MustCurryWith ( prometheus . Labels { remoteName : name , endpoint : conf . URL . String ( ) } ) ,
readQueriesDuration : remoteReadQueryDuration . WithLabelValues ( name , conf . URL . String ( ) ) ,
readQueriesDuration : remoteReadQueryDuration . MustCurryWith ( prometheus . Labels { remoteName : name , endpoint : conf . URL . String ( ) } ) ,
} , nil
}
@ -278,8 +286,8 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo
return WriteResponseStats { } , RecoverableError { err , defaultBackoff }
}
defer func ( ) {
io . Copy ( io . Discard , httpResp . Body )
httpResp . Body . Close ( )
_ , _ = io . Copy ( io . Discard , httpResp . Body )
_ = httpResp . Body . Close ( )
} ( )
// TODO(bwplotka): Pass logger and emit debug on error?
@ -329,17 +337,17 @@ func (c *Client) Endpoint() string {
return c . urlString
}
// Read reads from a remote endpoint.
func ( c * Client ) Read ( ctx context . Context , query * prompb . Query ) ( * prompb . QueryResult , error ) {
// Read reads from a remote endpoint. The sortSeries parameter is only respected in the case of a sampled response;
// chunked responses arrive already sorted by the server.
func ( c * Client ) Read ( ctx context . Context , query * prompb . Query , sortSeries bool ) ( storage . SeriesSet , error ) {
c . readQueries . Inc ( )
defer c . readQueries . Dec ( )
req := & prompb . ReadRequest {
// TODO: Support batching multiple queries into one read request,
// as the protobuf interface allows for it.
Queries : [ ] * prompb . Query {
query ,
} ,
Queries : [ ] * prompb . Query { query } ,
AcceptedResponseTypes : AcceptedResponseTypes ,
}
data , err := proto . Marshal ( req )
if err != nil {
@ -358,7 +366,6 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
httpReq . Header . Set ( "X-Prometheus-Remote-Read-Version" , "0.1.0" )
ctx , cancel := context . WithTimeout ( ctx , c . timeout )
defer cancel ( )
ctx , span := otel . Tracer ( "" ) . Start ( ctx , "Remote Read" , trace . WithSpanKind ( trace . SpanKindClient ) )
defer span . End ( )
@ -366,23 +373,57 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
start := time . Now ( )
httpResp , err := c . Client . Do ( httpReq . WithContext ( ctx ) )
if err != nil {
cancel ( )
return nil , fmt . Errorf ( "error sending request: %w" , err )
}
defer func ( ) {
io . Copy ( io . Discard , httpResp . Body )
httpResp . Body . Close ( )
} ( )
c . readQueriesDuration . Observe ( time . Since ( start ) . Seconds ( ) )
c . readQueriesTotal . WithLabelValues ( strconv . Itoa ( httpResp . StatusCode ) ) . Inc ( )
compressed , err = io . ReadAll ( httpResp . Body )
if err != nil {
return nil , fmt . Errorf ( "error reading response. HTTP status code: %s: %w" , httpResp . Status , err )
if httpResp . StatusCode / 100 != 2 {
// Make an attempt at getting an error message.
body , _ := io . ReadAll ( httpResp . Body )
_ = httpResp . Body . Close ( )
cancel ( )
return nil , fmt . Errorf ( "remote server %s returned http status %s: %s" , c . urlString , httpResp . Status , string ( body ) )
}
if httpResp . StatusCode / 100 != 2 {
return nil , fmt . Errorf ( "remote server %s returned HTTP status %s: %s" , c . urlString , httpResp . Status , strings . TrimSpace ( string ( compressed ) ) )
contentType := httpResp . Header . Get ( "Content-Type" )
switch {
case strings . HasPrefix ( contentType , "application/x-protobuf" ) :
c . readQueriesDuration . WithLabelValues ( "sampled" ) . Observe ( time . Since ( start ) . Seconds ( ) )
c . readQueriesTotal . WithLabelValues ( "sampled" , strconv . Itoa ( httpResp . StatusCode ) ) . Inc ( )
ss , err := c . handleSampledResponse ( req , httpResp , sortSeries )
cancel ( )
return ss , err
case strings . HasPrefix ( contentType , "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse" ) :
c . readQueriesDuration . WithLabelValues ( "chunked" ) . Observe ( time . Since ( start ) . Seconds ( ) )
s := NewChunkedReader ( httpResp . Body , c . chunkedReadLimit , nil )
return NewChunkedSeriesSet ( s , httpResp . Body , query . StartTimestampMs , query . EndTimestampMs , func ( err error ) {
code := strconv . Itoa ( httpResp . StatusCode )
if ! errors . Is ( err , io . EOF ) {
code = "aborted_stream"
}
c . readQueriesTotal . WithLabelValues ( "chunked" , code ) . Inc ( )
cancel ( )
} ) , nil
default :
c . readQueriesDuration . WithLabelValues ( "unsupported" ) . Observe ( time . Since ( start ) . Seconds ( ) )
c . readQueriesTotal . WithLabelValues ( "unsupported" , strconv . Itoa ( httpResp . StatusCode ) ) . Inc ( )
cancel ( )
return nil , fmt . Errorf ( "unsupported content type: %s" , contentType )
}
}
func ( c * Client ) handleSampledResponse ( req * prompb . ReadRequest , httpResp * http . Response , sortSeries bool ) ( storage . SeriesSet , error ) {
compressed , err := io . ReadAll ( httpResp . Body )
if err != nil {
return nil , fmt . Errorf ( "error reading response. HTTP status code: %s: %w" , httpResp . Status , err )
}
defer func ( ) {
_ , _ = io . Copy ( io . Discard , httpResp . Body )
_ = httpResp . Body . Close ( )
} ( )
uncompressed , err := snappy . Decode ( nil , compressed )
if err != nil {
@ -399,5 +440,8 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
return nil , fmt . Errorf ( "responses: want %d, got %d" , len ( req . Queries ) , len ( resp . Results ) )
}
return resp . Results [ 0 ] , nil
// This client does not batch queries so there's always only 1 result.
res := resp . Results [ 0 ]
return FromQueryResult ( sortSeries , res ) , nil
}