prometheus/storage/remote/client_test.go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package remote

import (
	"context"
	"net/http"
	"net/http/httptest"
	"net/url"
	"strings"
	"testing"
	"time"

	"github.com/pkg/errors"
	config_util "github.com/prometheus/common/config"
	"github.com/prometheus/common/model"
	"github.com/stretchr/testify/require"
)

var longErrMessage = strings.Repeat("error message", maxErrMsgLen)

func TestStoreHTTPErrorHandling(t *testing.T) {
	tests := []struct {
		code int
		err  error
	}{
		{
			code: 200,
			err:  nil,
		},
		{
			code: 300,
			err:  errors.New("server returned HTTP status 300 Multiple Choices: " + longErrMessage[:maxErrMsgLen]),
		},
		{
			code: 404,
			err:  errors.New("server returned HTTP status 404 Not Found: " + longErrMessage[:maxErrMsgLen]),
		},
		{
			code: 500,
			err:  RecoverableError{errors.New("server returned HTTP status 500 Internal Server Error: " + longErrMessage[:maxErrMsgLen]), defaultBackoff},
		},
	}

	for _, test := range tests {
		server := httptest.NewServer(
			http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
				http.Error(w, longErrMessage, test.code)
			}),
		)

		serverURL, err := url.Parse(server.URL)
		require.NoError(t, err)

		conf := &ClientConfig{
			URL:     &config_util.URL{URL: serverURL},
			Timeout: model.Duration(time.Second),
		}

		hash, err := toHash(conf)
		require.NoError(t, err)
		c, err := NewWriteClient(hash, conf)
		require.NoError(t, err)

		err = c.Store(context.Background(), []byte{})
		if test.err != nil {
			require.EqualError(t, err, test.err.Error())
		} else {
			require.NoError(t, err)
		}

		server.Close()
	}
}

func TestClientRetryAfter(t *testing.T) {
	server := httptest.NewServer(
		http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			http.Error(w, longErrMessage, 429)
		}),
	)
	defer server.Close()

	getClient := func(conf *ClientConfig) WriteClient {
		hash, err := toHash(conf)
		require.NoError(t, err)
		c, err := NewWriteClient(hash, conf)
		require.NoError(t, err)
		return c
	}

	serverURL, err := url.Parse(server.URL)
	require.NoError(t, err)

	conf := &ClientConfig{
		URL:              &config_util.URL{URL: serverURL},
		Timeout:          model.Duration(time.Second),
		RetryOnRateLimit: false,
	}

	c := getClient(conf)
	err = c.Store(context.Background(), []byte{})
	_, ok := err.(RecoverableError)
	require.False(t, ok, "Recoverable error not expected.")

	conf = &ClientConfig{
		URL:              &config_util.URL{URL: serverURL},
		Timeout:          model.Duration(time.Second),
		RetryOnRateLimit: true,
	}

	c = getClient(conf)
	err = c.Store(context.Background(), []byte{})
	_, ok = err.(RecoverableError)
	require.True(t, ok, "Recoverable error was expected.")
}

func TestRetryAfterDuration(t *testing.T) {
	tc := []struct {
		name     string
		tInput   string
		expected model.Duration
	}{
		{
			name:     "seconds",
			tInput:   "120",
			expected: model.Duration(time.Second * 120),
		},
		{
			name:     "date-time default",
			tInput:   time.RFC1123, // Expected layout is http.TimeFormat, hence an error.
			expected: defaultBackoff,
		},
		{
			name:     "retry-after not provided",
			tInput:   "", // Expected layout is http.TimeFormat, hence an error.
			expected: defaultBackoff,
		},
	}
	for _, c := range tc {
		require.Equal(t, c.expected, retryAfterDuration(c.tInput), c.name)
	}
}
Initial port of remote storage to v2. 8 years ago			`// Copyright 2017 The Prometheus Authors`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
Remove "package remote" garbage from license headers (#3304) 7 years ago			`// limitations under the License.`
Initial port of remote storage to v2. 8 years ago
			`package remote`

			`import (`
Add proper unclean shutdown handling with a cancellable context. Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com> 7 years ago			`"context"`
Initial port of remote storage to v2. 8 years ago			`"net/http"`
			`"net/http/httptest"`
			`"net/url"`
			`"strings"`
			`"testing"`
			`"time"`

refine error handling in prometheus (#5388) i) Uses the more idiomatic Wrap and Wrapf methods for creating nested errors. ii) Fixes some incorrect usages of fmt.Errorf where the error messages don't have any formatting directives. iii) Does away with the use of fmt package for errors in favour of pkg/errors Signed-off-by: tariqibrahim <tariq181290@gmail.com> 6 years ago			`"github.com/pkg/errors"`
Use shared types from the `common` repo (#3674) * refactor: use shared types from common repo, remove util/config * vendor: add common/config * fix nit 7 years ago			`config_util "github.com/prometheus/common/config"`
Initial port of remote storage to v2. 8 years ago			`"github.com/prometheus/common/model"`
Testify: move to require (#8122) * Testify: move to require Moving testify to require to fail tests early in case of errors. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * More moves Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`"github.com/stretchr/testify/require"`
Initial port of remote storage to v2. 8 years ago			`)`

			`var longErrMessage = strings.Repeat("error message", maxErrMsgLen)`

			`func TestStoreHTTPErrorHandling(t *testing.T) {`
			`tests := []struct {`
			`code int`
			`err error`
			`}{`
			`{`
			`code: 200,`
			`err: nil,`
			`},`
			`{`
			`code: 300,`
refine error handling in prometheus (#5388) i) Uses the more idiomatic Wrap and Wrapf methods for creating nested errors. ii) Fixes some incorrect usages of fmt.Errorf where the error messages don't have any formatting directives. iii) Does away with the use of fmt package for errors in favour of pkg/errors Signed-off-by: tariqibrahim <tariq181290@gmail.com> 6 years ago			`err: errors.New("server returned HTTP status 300 Multiple Choices: " + longErrMessage[:maxErrMsgLen]),`
Initial port of remote storage to v2. 8 years ago			`},`
			`{`
			`code: 404,`
refine error handling in prometheus (#5388) i) Uses the more idiomatic Wrap and Wrapf methods for creating nested errors. ii) Fixes some incorrect usages of fmt.Errorf where the error messages don't have any formatting directives. iii) Does away with the use of fmt package for errors in favour of pkg/errors Signed-off-by: tariqibrahim <tariq181290@gmail.com> 6 years ago			`err: errors.New("server returned HTTP status 404 Not Found: " + longErrMessage[:maxErrMsgLen]),`
Initial port of remote storage to v2. 8 years ago			`},`
			`{`
			`code: 500,`
Consider status code 429 as recoverable errors to avoid resharding (#8237) * Consider status code 429 as recoverable errors to avoid resharding. * Adds support for Retry-After in backoff logic in remote storage. Signed-off-by: Harkishen-Singh <harkishensingh@hotmail.com> 4 years ago			`err: RecoverableError{errors.New("server returned HTTP status 500 Internal Server Error: " + longErrMessage[:maxErrMsgLen]), defaultBackoff},`
Initial port of remote storage to v2. 8 years ago			`},`
			`}`

Move away from testutil, refactor imports (#8087) Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`for _, test := range tests {`
Initial port of remote storage to v2. 8 years ago			`server := httptest.NewServer(`
			`http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {`
			`http.Error(w, longErrMessage, test.code)`
			`}),`
			`)`

			`serverURL, err := url.Parse(server.URL)`
Testify: move to require (#8122) * Testify: move to require Moving testify to require to fail tests early in case of errors. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * More moves Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`require.NoError(t, err)`
Initial port of remote storage to v2. 8 years ago
Add config option for remote job name (#6043) * Track remote write queues via a map so we don't care about index. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Support a job name for remote write/read so we can differentiate between them using the name. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Remote write/read has Name to not confuse the meaning of the field with scrape job names. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Split queue/client label into remote_name and url labels. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Don't allow for duplicate remote write/read configs. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Ensure we restart remote write queues if the hash of their config has not changed, but the remote name has changed. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Include name in remote read/write config hashes, simplify duplicates check, update test accordingly. Signed-off-by: Callum Styan <callumstyan@gmail.com> 5 years ago			`conf := &ClientConfig{`
Refactor SD configuration to remove `config` dependency (#3629) * refactor: move targetGroup struct and CheckOverflow() to their own package * refactor: move auth and security related structs to a utility package, fix import error in utility package * refactor: Azure SD, remove SD struct from config * refactor: DNS SD, remove SD struct from config into dns package * refactor: ec2 SD, move SD struct from config into the ec2 package * refactor: file SD, move SD struct from config to file discovery package * refactor: gce, move SD struct from config to gce discovery package * refactor: move HTTPClientConfig and URL into util/config, fix import error in httputil * refactor: consul, move SD struct from config into consul discovery package * refactor: marathon, move SD struct from config into marathon discovery package * refactor: triton, move SD struct from config to triton discovery package, fix test * refactor: zookeeper, move SD structs from config to zookeeper discovery package * refactor: openstack, remove SD struct from config, move into openstack discovery package * refactor: kubernetes, move SD struct from config into kubernetes discovery package * refactor: notifier, use targetgroup package instead of config * refactor: tests for file, marathon, triton SD - use targetgroup package instead of config.TargetGroup * refactor: retrieval, use targetgroup package instead of config.TargetGroup * refactor: storage, use config util package * refactor: discovery manager, use targetgroup package instead of config.TargetGroup * refactor: use HTTPClient and TLS config from configUtil instead of config * refactor: tests, use targetgroup package instead of config.TargetGroup * refactor: fix tagetgroup.Group pointers that were removed by mistake * refactor: openstack, kubernetes: drop prefixes * refactor: remove import aliases forced due to vscode bug * refactor: move main SD struct out of config into discovery/config * refactor: rename configUtil to config_util * refactor: rename yamlUtil to yaml_config * refactor: kubernetes, remove prefixes * refactor: move the TargetGroup package to discovery/ * refactor: fix order of imports 7 years ago			`URL: &config_util.URL{URL: serverURL},`
remote: Expose ClientConfig type (see #3165) 7 years ago			`Timeout: model.Duration(time.Second),`
Ensure all values are used or _ 7 years ago			`}`
Initial port of remote storage to v2. 8 years ago
Add config option for remote job name (#6043) * Track remote write queues via a map so we don't care about index. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Support a job name for remote write/read so we can differentiate between them using the name. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Remote write/read has Name to not confuse the meaning of the field with scrape job names. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Split queue/client label into remote_name and url labels. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Don't allow for duplicate remote write/read configs. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Ensure we restart remote write queues if the hash of their config has not changed, but the remote name has changed. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Include name in remote read/write config hashes, simplify duplicates check, update test accordingly. Signed-off-by: Callum Styan <callumstyan@gmail.com> 5 years ago			`hash, err := toHash(conf)`
Testify: move to require (#8122) * Testify: move to require Moving testify to require to fail tests early in case of errors. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * More moves Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`require.NoError(t, err)`
storage: Adjusted fully storage layer support for chunk iterators: Remote read client, readyStorage, fanout. (#7059) * Fixed nits introduced by https://github.com/prometheus/prometheus/pull/7334 * Added ChunkQueryable implementation to fanout and readyStorage. * Added more comments. * Changed NewVerticalChunkSeriesMerger to CompactingChunkSeriesMerger, removed tiny interface by reusing VerticalSeriesMergeFunc for overlapping algorithm for both chunks and series, for both querying and compacting (!) + made sure duplicates are merged. * Added ErrChunkSeriesSet * Added Samples interface for seamless []promb.Sample to []tsdbutil.Sample conversion. * Deprecating non chunks serieset based StreamChunkedReadResponses, added chunk one. * Improved tests. * Split remote client into Write (old storage) and read. * Queryable client is now SampleAndChunkQueryable. Since we cannot use nice QueryableFunc I moved all config based options to sampleAndChunkQueryableClient to aboid boilerplate. In next commit: Changes for TSDB. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> 4 years ago			`c, err := NewWriteClient(hash, conf)`
Testify: move to require (#8122) * Testify: move to require Moving testify to require to fail tests early in case of errors. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * More moves Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`require.NoError(t, err)`
Add config option for remote job name (#6043) * Track remote write queues via a map so we don't care about index. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Support a job name for remote write/read so we can differentiate between them using the name. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Remote write/read has Name to not confuse the meaning of the field with scrape job names. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Split queue/client label into remote_name and url labels. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Don't allow for duplicate remote write/read configs. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Ensure we restart remote write queues if the hash of their config has not changed, but the remote name has changed. Signed-off-by: Callum Styan <callumstyan@gmail.com> * Include name in remote read/write config hashes, simplify duplicates check, update test accordingly. Signed-off-by: Callum Styan <callumstyan@gmail.com> 5 years ago
Tail the TSDB WAL for remote_write This change switches the remote_write API to use the TSDB WAL. This should reduce memory usage and prevent sample loss when the remote end point is down. We use the new LiveReader from TSDB to tail WAL segments. Logic for finding the tracking segment is included in this PR. The WAL is tailed once for each remote_write endpoint specified. Reading from the segment is based on a ticker rather than relying on fsnotify write events, which were found to be complicated and unreliable in early prototypes. Enqueuing a sample for sending via remote_write can now block, to provide back pressure. Queues are still required to acheive parallelism and batching. We have updated the queue config based on new defaults for queue capacity and pending samples values - much smaller values are now possible. The remote_write resharding code has been updated to prevent deadlocks, and extra tests have been added for these cases. As part of this change, we attempt to guarantee that samples are not lost; however this initial version doesn't guarantee this across Prometheus restarts or non-retryable errors from the remote end (eg 400s). This changes also includes the following optimisations: - only marshal the proto request once, not once per retry - maintain a single copy of the labels for given series to reduce GC pressure Other minor tweaks: - only reshard if we've also successfully sent recently - add pending samples, latest sent timestamp, WAL events processed metrics Co-authored-by: Chris Marchbanks <csmarchbanks.com> (initial prototype) Co-authored-by: Tom Wilkie <tom.wilkie@gmail.com> (sharding changes) Signed-off-by: Callum Styan <callumstyan@gmail.com> 6 years ago			`err = c.Store(context.Background(), []byte{})`
Move away from testutil, refactor imports (#8087) Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`if test.err != nil {`
Testify: move to require (#8122) * Testify: move to require Moving testify to require to fail tests early in case of errors. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * More moves Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`require.EqualError(t, err, test.err.Error())`
Move away from testutil, refactor imports (#8087) Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`} else {`
Testify: move to require (#8122) * Testify: move to require Moving testify to require to fail tests early in case of errors. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * More moves Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`require.NoError(t, err)`
Move away from testutil, refactor imports (#8087) Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> 4 years ago			`}`
Initial port of remote storage to v2. 8 years ago
			`server.Close()`
			`}`
			`}`
Consider status code 429 as recoverable errors to avoid resharding (#8237) * Consider status code 429 as recoverable errors to avoid resharding. * Adds support for Retry-After in backoff logic in remote storage. Signed-off-by: Harkishen-Singh <harkishensingh@hotmail.com> 4 years ago
Adds support to configure retry on Rate-Limiting from remote-write config. Signed-off-by: Harkishen-Singh <harkishensingh@hotmail.com> 4 years ago			`func TestClientRetryAfter(t *testing.T) {`
			`server := httptest.NewServer(`
			`http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {`
			`http.Error(w, longErrMessage, 429)`
			`}),`
			`)`
			`defer server.Close()`

			`getClient := func(conf *ClientConfig) WriteClient {`
			`hash, err := toHash(conf)`
			`require.NoError(t, err)`
			`c, err := NewWriteClient(hash, conf)`
			`require.NoError(t, err)`
			`return c`
			`}`

			`serverURL, err := url.Parse(server.URL)`
			`require.NoError(t, err)`

			`conf := &ClientConfig{`
			`URL: &config_util.URL{URL: serverURL},`
			`Timeout: model.Duration(time.Second),`
			`RetryOnRateLimit: false,`
			`}`

			`c := getClient(conf)`
			`err = c.Store(context.Background(), []byte{})`
tests: Move from t.Errorf and others. (Part 2) (#9309) * Refactor util tests. Signed-off-by: Paweł Szulik <paul.szulik@gmail.com> 3 years ago			`_, ok := err.(RecoverableError)`
			`require.False(t, ok, "Recoverable error not expected.")`
Adds support to configure retry on Rate-Limiting from remote-write config. Signed-off-by: Harkishen-Singh <harkishensingh@hotmail.com> 4 years ago
			`conf = &ClientConfig{`
			`URL: &config_util.URL{URL: serverURL},`
			`Timeout: model.Duration(time.Second),`
			`RetryOnRateLimit: true,`
			`}`

			`c = getClient(conf)`
			`err = c.Store(context.Background(), []byte{})`
tests: Move from t.Errorf and others. (Part 2) (#9309) * Refactor util tests. Signed-off-by: Paweł Szulik <paul.szulik@gmail.com> 3 years ago			`_, ok = err.(RecoverableError)`
			`require.True(t, ok, "Recoverable error was expected.")`
Adds support to configure retry on Rate-Limiting from remote-write config. Signed-off-by: Harkishen-Singh <harkishensingh@hotmail.com> 4 years ago			`}`

Consider status code 429 as recoverable errors to avoid resharding (#8237) * Consider status code 429 as recoverable errors to avoid resharding. * Adds support for Retry-After in backoff logic in remote storage. Signed-off-by: Harkishen-Singh <harkishensingh@hotmail.com> 4 years ago			`func TestRetryAfterDuration(t *testing.T) {`
			`tc := []struct {`
			`name string`
			`tInput string`
			`expected model.Duration`
			`}{`
			`{`
			`name: "seconds",`
			`tInput: "120",`
			`expected: model.Duration(time.Second * 120),`
			`},`
			`{`
			`name: "date-time default",`
			`tInput: time.RFC1123, // Expected layout is http.TimeFormat, hence an error.`
			`expected: defaultBackoff,`
			`},`
			`{`
			`name: "retry-after not provided",`
			`tInput: "", // Expected layout is http.TimeFormat, hence an error.`
			`expected: defaultBackoff,`
			`},`
			`}`
			`for _, c := range tc {`
			`require.Equal(t, c.expected, retryAfterDuration(c.tInput), c.name)`
			`}`
			`}`