Merge pull request #14176 from prometheus/superq/gogc

Add configuration option for GOGC, default to 50
pull/14240/head
Ben Kochie 2024-06-05 17:43:40 +02:00 committed by GitHub
commit 4e664035e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 75 additions and 0 deletions

View File

@ -2,7 +2,10 @@
## unreleased
This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 50.
* [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980
* [CHANGE] Runtime: Change GOGC threshold from 100 to 50 #14176
* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061
* [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974
* [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991

View File

@ -28,6 +28,8 @@ import (
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
"strconv"
"strings"
"sync"
"syscall"
@ -1384,6 +1386,17 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
}
oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC)
if oldGoGC != conf.Runtime.GoGC {
level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC)
}
// Write the new setting out to the ENV var for runtime API output.
if conf.Runtime.GoGC >= 0 {
os.Setenv("GOGC", strconv.Itoa(conf.Runtime.GoGC))
} else {
os.Setenv("GOGC", "off")
}
noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval)
l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)}
level.Info(logger).Log(append(l, timings...)...)

View File

@ -20,6 +20,7 @@ import (
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
@ -151,6 +152,11 @@ var (
ScrapeProtocols: DefaultScrapeProtocols,
}
DefaultRuntimeConfig = RuntimeConfig{
// Go runtime tuning.
GoGC: 50,
}
// DefaultScrapeConfig is the default scrape configuration.
DefaultScrapeConfig = ScrapeConfig{
// ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals.
@ -225,6 +231,7 @@ var (
// Config is the top-level configuration for Prometheus's config files.
type Config struct {
GlobalConfig GlobalConfig `yaml:"global"`
Runtime RuntimeConfig `yaml:"runtime,omitempty"`
AlertingConfig AlertingConfig `yaml:"alerting,omitempty"`
RuleFiles []string `yaml:"rule_files,omitempty"`
ScrapeConfigFiles []string `yaml:"scrape_config_files,omitempty"`
@ -335,6 +342,14 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
c.GlobalConfig = DefaultGlobalConfig
}
// If a runtime block was open but empty the default runtime config is overwritten.
// We have to restore it here.
if c.Runtime.isZero() {
c.Runtime = DefaultRuntimeConfig
// Use the GOGC env var value if the runtime section is empty.
c.Runtime.GoGC = getGoGCEnv()
}
for _, rf := range c.RuleFiles {
if !patRulePath.MatchString(rf) {
return fmt.Errorf("invalid rule file path %q", rf)
@ -564,6 +579,17 @@ func (c *GlobalConfig) isZero() bool {
c.ScrapeProtocols == nil
}
// RuntimeConfig configures the values for the process behavior.
type RuntimeConfig struct {
// The Go garbage collection target percentage.
GoGC int `yaml:"gogc,omitempty"`
}
// isZero returns true iff the global config is the zero value.
func (c *RuntimeConfig) isZero() bool {
return c.GoGC == 0
}
type ScrapeConfigs struct {
ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"`
}
@ -1211,3 +1237,19 @@ func filePath(filename string) string {
func fileErr(filename string, err error) error {
return fmt.Errorf("%q: %w", filePath(filename), err)
}
func getGoGCEnv() int {
goGCEnv := os.Getenv("GOGC")
// If the GOGC env var is set, use the same logic as upstream Go.
if goGCEnv != "" {
// Special case for GOGC=off.
if strings.ToLower(goGCEnv) == "off" {
return -1
}
i, err := strconv.Atoi(goGCEnv)
if err == nil {
return i
}
}
return DefaultRuntimeConfig.GoGC
}

View File

@ -19,6 +19,7 @@ const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
var ruleFilesExpectedConf = &Config{
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{
"testdata/first.rules",
"testdata/rules/second.rules",

View File

@ -76,6 +76,7 @@ const (
globLabelLimit = 30
globLabelNameLengthLimit = 200
globLabelValueLengthLimit = 200
globalGoGC = 42
)
var expectedConf = &Config{
@ -96,6 +97,10 @@ var expectedConf = &Config{
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
},
Runtime: RuntimeConfig{
GoGC: globalGoGC,
},
RuleFiles: []string{
filepath.FromSlash("testdata/first.rules"),
filepath.FromSlash("testdata/my/*.rules"),
@ -2081,6 +2086,7 @@ func TestEmptyGlobalBlock(t *testing.T) {
c, err := Load("global:\n", false, log.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
exp.Runtime = DefaultRuntimeConfig
require.Equal(t, exp, *c)
}

View File

@ -17,6 +17,7 @@ const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
var ruleFilesExpectedConf = &Config{
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{
"testdata\\first.rules",
"testdata\\rules\\second.rules",

View File

@ -14,6 +14,9 @@ global:
monitor: codelab
foo: bar
runtime:
gogc: 42
rule_files:
- "first.rules"
- "my/*.rules"

View File

@ -121,6 +121,12 @@ global:
# that will be kept in memory. 0 means no limit.
[ keep_dropped_targets: <int> | default = 0 ]
runtime:
# Configure the Go garbage collector GOGC parameter
# See: https://tip.golang.org/doc/gc-guide#GOGC
# Lowering this number increases CPU usage.
[ gogc: <int> | default = 50 ]
# Rule files specifies a list of globs. Rules and alerts are read from
# all matching files.
rule_files: