Browse Source

retrieval: handle GZIP compression ourselves

The automatic GZIP handling of net/http does not preserve
buffers across requests and thus generates a lot of garbage.
We handle GZIP ourselves to circumvent this.t
pull/2643/head
Fabian Reinartz 8 years ago
parent
commit
cc0ff26f1f
  1. 80
      retrieval/scrape.go
  2. 7
      retrieval/target.go

80
retrieval/scrape.go

@ -14,12 +14,16 @@
package retrieval
import (
"bufio"
"bytes"
"compress/gzip"
"fmt"
"io"
"net/http"
"reflect"
"sync"
"time"
"unsafe"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
@ -347,35 +351,30 @@ type scraper interface {
// targetScraper implements the scraper interface for a target.
type targetScraper struct {
*Target
client *http.Client
}
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,*/*;q=0.1`
var scrapeBufPool = sync.Pool{}
client *http.Client
req *http.Request
func getScrapeBuf() []byte {
b := scrapeBufPool.Get()
if b == nil {
return make([]byte, 0, 8192)
}
return b.([]byte)
gzipr *gzip.Reader
buf *bufio.Reader
}
func putScrapeBuf(b []byte) {
b = b[:0]
scrapeBufPool.Put(b)
}
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,*/*;q=0.1`
func (s *targetScraper) scrape(ctx context.Context, w io.Writer) error {
req, err := http.NewRequest("GET", s.URL().String(), nil)
if err != nil {
return err
if s.req == nil {
req, err := http.NewRequest("GET", s.URL().String(), nil)
if err != nil {
return err
}
// Disable accept header to always negotiate for text format.
// req.Header.Add("Accept", acceptHeader)
req.Header.Add("Accept-Encoding", "gzip")
s.req = req
}
// Disable accept header to always negotiate for text format.
// req.Header.Add("Accept", acceptHeader)
resp, err := ctxhttp.Do(ctx, s.client, req)
resp, err := ctxhttp.Do(ctx, s.client, s.req)
if err != nil {
return err
}
@ -385,7 +384,24 @@ func (s *targetScraper) scrape(ctx context.Context, w io.Writer) error {
return fmt.Errorf("server returned HTTP status %s", resp.Status)
}
_, err = io.Copy(w, resp.Body)
if resp.Header.Get("Content-Encoding") != "gzip" {
_, err = io.Copy(w, resp.Body)
return err
}
if s.gzipr == nil {
s.buf = bufio.NewReader(resp.Body)
s.gzipr, err = gzip.NewReader(s.buf)
if err != nil {
return err
}
} else {
s.buf.Reset(resp.Body)
s.gzipr.Reset(s.buf)
}
_, err = io.Copy(w, s.gzipr)
s.gzipr.Close()
return err
}
@ -436,7 +452,10 @@ func (sl *scrapeLoop) run(interval, timeout time.Duration, errc chan<- error) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
buf := bytes.NewBuffer(make([]byte, 0, 16000))
for {
buf.Reset()
select {
case <-sl.ctx.Done():
return
@ -456,8 +475,6 @@ func (sl *scrapeLoop) run(interval, timeout time.Duration, errc chan<- error) {
)
}
buf := bytes.NewBuffer(getScrapeBuf())
err := sl.scraper.scrape(scrapeCtx, buf)
if err == nil {
b := buf.Bytes()
@ -465,7 +482,6 @@ func (sl *scrapeLoop) run(interval, timeout time.Duration, errc chan<- error) {
if total, added, err = sl.append(b, start); err != nil {
log.With("err", err).Error("append failed")
}
putScrapeBuf(b)
} else if errc != nil {
errc <- err
}
@ -524,7 +540,7 @@ loop:
t = *tp
}
mets := string(met)
mets := yoloString(met)
ref, ok := sl.cache[mets]
if ok {
switch err = app.AddFast(ref, t, v); err {
@ -550,6 +566,8 @@ loop:
default:
break loop
}
// Allocate a real string.
mets = string(met)
sl.cache[mets] = ref
}
added++
@ -567,6 +585,16 @@ loop:
return total, added, nil
}
func yoloString(b []byte) string {
sh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
h := reflect.StringHeader{
Data: sh.Data,
Len: sh.Len,
}
return *((*string)(unsafe.Pointer(&h)))
}
func (sl *scrapeLoop) report(start time.Time, duration time.Duration, scraped, appended int, err error) error {
sl.scraper.report(start, duration, err)

7
retrieval/target.go

@ -78,9 +78,10 @@ func NewHTTPClient(cfg config.HTTPClientConfig) (*http.Client, error) {
// The only timeout we care about is the configured scrape timeout.
// It is applied on request. So we leave out any timings here.
var rt http.RoundTripper = &http.Transport{
Proxy: http.ProxyURL(cfg.ProxyURL.URL),
MaxIdleConns: 10000,
TLSClientConfig: tlsConfig,
Proxy: http.ProxyURL(cfg.ProxyURL.URL),
MaxIdleConns: 10000,
TLSClientConfig: tlsConfig,
DisableCompression: true,
}
// If a bearer token is provided, create a round tripper that will set the

Loading…
Cancel
Save