mirror of https://github.com/prometheus/prometheus
Signed-off-by: Julien <roidelapluie@o11y.eu>pull/14734/head
parent
70bb219d33
commit
ce0f09b125
@ -0,0 +1,193 @@
|
||||
// Copyright 2024 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/atomic"
|
||||
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
func TestScrapeFailureLogFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
|
||||
// Tracks the number of requests made to the mock server.
|
||||
var requestCount atomic.Int32
|
||||
|
||||
// Starts a server that always returns HTTP 500 errors.
|
||||
mockServerAddress := startGarbageServer(t, &requestCount)
|
||||
|
||||
// Create a temporary directory for Prometheus configuration and logs.
|
||||
tempDir := t.TempDir()
|
||||
|
||||
// Define file paths for the scrape failure log and Prometheus configuration.
|
||||
// Like other files, the scrape failure log file should be relative to the
|
||||
// config file. Therefore, we split the name we put in the file and the full
|
||||
// path used to check the content of the file.
|
||||
scrapeFailureLogFileName := "scrape_failure.log"
|
||||
scrapeFailureLogFile := filepath.Join(tempDir, scrapeFailureLogFileName)
|
||||
promConfigFile := filepath.Join(tempDir, "prometheus.yml")
|
||||
|
||||
// Step 1: Set up an initial Prometheus configuration that globally
|
||||
// specifies a scrape failure log file.
|
||||
promConfig := fmt.Sprintf(`
|
||||
global:
|
||||
scrape_interval: 500ms
|
||||
scrape_failure_log_file: %s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'test_job'
|
||||
static_configs:
|
||||
- targets: ['%s']
|
||||
`, scrapeFailureLogFileName, mockServerAddress)
|
||||
|
||||
err := os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
|
||||
require.NoError(t, err, "Failed to write Prometheus configuration file")
|
||||
|
||||
// Start Prometheus with the generated configuration and a random port, enabling the lifecycle API.
|
||||
port := testutil.RandomUnprivilegedPort(t)
|
||||
params := []string{
|
||||
"-test.main",
|
||||
"--config.file=" + promConfigFile,
|
||||
"--storage.tsdb.path=" + filepath.Join(tempDir, "data"),
|
||||
fmt.Sprintf("--web.listen-address=127.0.0.1:%d", port),
|
||||
"--web.enable-lifecycle",
|
||||
}
|
||||
prometheusProcess := exec.Command(promPath, params...)
|
||||
prometheusProcess.Stdout = os.Stdout
|
||||
prometheusProcess.Stderr = os.Stderr
|
||||
|
||||
err = prometheusProcess.Start()
|
||||
require.NoError(t, err, "Failed to start Prometheus")
|
||||
defer prometheusProcess.Process.Kill()
|
||||
|
||||
// Wait until the mock server receives at least two requests from Prometheus.
|
||||
require.Eventually(t, func() bool {
|
||||
return requestCount.Load() >= 2
|
||||
}, 30*time.Second, 500*time.Millisecond, "Expected at least two requests to the mock server")
|
||||
|
||||
// Verify that the scrape failures have been logged to the specified file.
|
||||
content, err := os.ReadFile(scrapeFailureLogFile)
|
||||
require.NoError(t, err, "Failed to read scrape failure log")
|
||||
require.Contains(t, string(content), "server returned HTTP status 500 Internal Server Error", "Expected scrape failure log entry not found")
|
||||
|
||||
// Step 2: Update the Prometheus configuration to remove the scrape failure
|
||||
// log file setting.
|
||||
promConfig = fmt.Sprintf(`
|
||||
global:
|
||||
scrape_interval: 1s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'test_job'
|
||||
static_configs:
|
||||
- targets: ['%s']
|
||||
`, mockServerAddress)
|
||||
|
||||
err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
|
||||
require.NoError(t, err, "Failed to update Prometheus configuration file")
|
||||
|
||||
// Reload Prometheus with the updated configuration.
|
||||
reloadPrometheus(t, port)
|
||||
|
||||
// Count the number of lines in the scrape failure log file before any
|
||||
// further requests.
|
||||
preReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
|
||||
|
||||
// Wait for at least two more requests to the mock server to ensure
|
||||
// Prometheus continues scraping.
|
||||
requestsBeforeReload := requestCount.Load()
|
||||
require.Eventually(t, func() bool {
|
||||
return requestCount.Load() >= requestsBeforeReload+2
|
||||
}, 30*time.Second, 500*time.Millisecond, "Expected two more requests to the mock server after configuration reload")
|
||||
|
||||
// Ensure that no new lines were added to the scrape failure log file after
|
||||
// the configuration change.
|
||||
require.Equal(t, preReloadLogLineCount, countLinesInFile(scrapeFailureLogFile), "No new lines should be added to the scrape failure log file after removing the log setting")
|
||||
|
||||
// Step 3: Re-add the scrape failure log file setting, but this time under
|
||||
// scrape_configs, and reload Prometheus.
|
||||
promConfig = fmt.Sprintf(`
|
||||
global:
|
||||
scrape_interval: 1s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'test_job'
|
||||
scrape_failure_log_file: %s
|
||||
static_configs:
|
||||
- targets: ['%s']
|
||||
`, scrapeFailureLogFileName, mockServerAddress)
|
||||
|
||||
err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
|
||||
require.NoError(t, err, "Failed to update Prometheus configuration file")
|
||||
|
||||
// Reload Prometheus with the updated configuration.
|
||||
reloadPrometheus(t, port)
|
||||
|
||||
// Wait for at least two more requests to the mock server and verify that
|
||||
// new log entries are created.
|
||||
postReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
|
||||
requestsBeforeReAddingLog := requestCount.Load()
|
||||
require.Eventually(t, func() bool {
|
||||
return requestCount.Load() >= requestsBeforeReAddingLog+2
|
||||
}, 30*time.Second, 500*time.Millisecond, "Expected two additional requests after re-adding the log setting")
|
||||
|
||||
// Confirm that new lines were added to the scrape failure log file.
|
||||
require.Greater(t, countLinesInFile(scrapeFailureLogFile), postReloadLogLineCount, "New lines should be added to the scrape failure log file after re-adding the log setting")
|
||||
}
|
||||
|
||||
// reloadPrometheus sends a reload request to the Prometheus server to apply
|
||||
// updated configurations.
|
||||
func reloadPrometheus(t *testing.T, port int) {
|
||||
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/-/reload", port), "", nil)
|
||||
require.NoError(t, err, "Failed to reload Prometheus")
|
||||
require.Equal(t, http.StatusOK, resp.StatusCode, "Unexpected status code when reloading Prometheus")
|
||||
}
|
||||
|
||||
// startGarbageServer sets up a mock server that returns a 500 Internal Server Error
|
||||
// for all requests. It also increments the request count each time it's hit.
|
||||
func startGarbageServer(t *testing.T, requestCount *atomic.Int32) string {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requestCount.Inc()
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}))
|
||||
t.Cleanup(server.Close)
|
||||
|
||||
parsedURL, err := url.Parse(server.URL)
|
||||
require.NoError(t, err, "Failed to parse mock server URL")
|
||||
|
||||
return parsedURL.Host
|
||||
}
|
||||
|
||||
// countLinesInFile counts and returns the number of lines in the specified file.
|
||||
func countLinesInFile(filePath string) int {
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return 0 // Return 0 if the file doesn't exist or can't be read.
|
||||
}
|
||||
return bytes.Count(data, []byte{'\n'})
|
||||
}
|
Loading…
Reference in new issue