agent: remove data race in agent config (#20200)

To fix an issue displaying the current reloaded config in the 
v1/agent/self endpoint #18681 caused the agent's internal 
config struct member to be deepcopied and replaced on reload.

This is not safe because the field is not protected by a lock, nor 
should it be due to how it is accessed by the rest of the system.

This PR does the same deepcopy, but into a new field solely for 
the point of capturing the current reloaded values for display 
purposes. If there has been no reload then the original config is used.
pull/20194/head
R.B. Boyer 2024-01-12 15:11:21 -06:00 committed by GitHub
parent d4b67677c6
commit 7f9ed032fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 25 deletions

View File

@ -24,6 +24,12 @@ import (
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/rboyer/safeio"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
"github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
@ -31,11 +37,6 @@ import (
"github.com/hashicorp/hcp-scada-provider/capability"
"github.com/hashicorp/raft"
"github.com/hashicorp/serf/serf"
"github.com/rboyer/safeio"
"golang.org/x/net/http2"
"golang.org/x/net/http2/h2c"
"google.golang.org/grpc"
"google.golang.org/grpc/keepalive"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/acl/resolver"
@ -240,6 +241,9 @@ type Agent struct {
// config is the agent configuration.
config *config.RuntimeConfig
displayOnlyConfigCopy *config.RuntimeConfig
displayOnlyConfigCopyLock sync.Mutex
// Used for writing our logs
logger hclog.InterceptLogger
@ -4438,11 +4442,22 @@ func (a *Agent) reloadConfigInternal(newCfg *config.RuntimeConfig) error {
a.config.EnableDebug = newCfg.EnableDebug
// update Agent config with new config
a.config = newCfg.DeepCopy()
a.displayOnlyConfigCopyLock.Lock()
a.displayOnlyConfigCopy = newCfg.DeepCopy()
a.displayOnlyConfigCopyLock.Unlock()
return nil
}
func (a *Agent) getRuntimeConfigForDisplay() *config.RuntimeConfig {
a.displayOnlyConfigCopyLock.Lock()
defer a.displayOnlyConfigCopyLock.Unlock()
if a.displayOnlyConfigCopy != nil {
return a.displayOnlyConfigCopy.DeepCopy()
}
return a.config
}
// LocalBlockingQuery performs a blocking query in a generic way against
// local agent state that has no RPC or raft to back it. It uses `hash` parameter
// instead of an `index`.

View File

@ -11,14 +11,15 @@ import (
"strings"
"time"
"github.com/mitchellh/hashstructure"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/hashicorp/go-bexpr"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/serf/coordinate"
"github.com/hashicorp/serf/serf"
"github.com/mitchellh/hashstructure"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/hashicorp/consul/acl"
cachetype "github.com/hashicorp/consul/agent/cache-types"
@ -89,6 +90,8 @@ func (s *HTTPHandlers) AgentSelf(resp http.ResponseWriter, req *http.Request) (i
}
}
displayConfig := s.agent.getRuntimeConfigForDisplay()
var xds *XDSSelf
if s.agent.xdsServer != nil {
xds = &XDSSelf{
@ -96,15 +99,15 @@ func (s *HTTPHandlers) AgentSelf(resp http.ResponseWriter, req *http.Request) (i
"envoy": xdscommon.EnvoyVersions,
},
// Prefer the TLS port. See comment on the XDSSelf struct for details.
Port: s.agent.config.GRPCTLSPort,
Port: displayConfig.GRPCTLSPort,
Ports: GRPCPorts{
Plaintext: s.agent.config.GRPCPort,
TLS: s.agent.config.GRPCTLSPort,
Plaintext: displayConfig.GRPCPort,
TLS: displayConfig.GRPCTLSPort,
},
}
// Fallback to standard port if TLS is not enabled.
if s.agent.config.GRPCTLSPort <= 0 {
xds.Port = s.agent.config.GRPCPort
if displayConfig.GRPCTLSPort <= 0 {
xds.Port = displayConfig.GRPCPort
}
}
@ -119,22 +122,22 @@ func (s *HTTPHandlers) AgentSelf(resp http.ResponseWriter, req *http.Request) (i
Version string
BuildDate string
}{
Datacenter: s.agent.config.Datacenter,
PrimaryDatacenter: s.agent.config.PrimaryDatacenter,
NodeName: s.agent.config.NodeName,
NodeID: string(s.agent.config.NodeID),
Partition: s.agent.config.PartitionOrEmpty(),
Revision: s.agent.config.Revision,
Server: s.agent.config.ServerMode,
Datacenter: displayConfig.Datacenter,
PrimaryDatacenter: displayConfig.PrimaryDatacenter,
NodeName: displayConfig.NodeName,
NodeID: string(displayConfig.NodeID),
Partition: displayConfig.PartitionOrEmpty(),
Revision: displayConfig.Revision,
Server: displayConfig.ServerMode,
// We expect the ent version to be part of the reported version string, and that's now part of the metadata, not the actual version.
Version: s.agent.config.VersionWithMetadata(),
BuildDate: s.agent.config.BuildDate.Format(time.RFC3339),
Version: displayConfig.VersionWithMetadata(),
BuildDate: displayConfig.BuildDate.Format(time.RFC3339),
}
return Self{
Config: config,
DebugConfig: s.agent.config.Sanitized(),
Coord: cs[s.agent.config.SegmentName],
DebugConfig: displayConfig.Sanitized(),
Coord: cs[displayConfig.SegmentName],
Member: s.agent.AgentLocalMember(),
Stats: s.agent.Stats(),
Meta: s.agent.State.Metadata(),