consul/agent/hcp/bootstrap/config-loader/loader.go

// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

// Package loader handles loading the bootstrap agent config  fetched from HCP into
// the agent's config. It must be a separate package from other HCP components
// because it has a dependency on agent/config while other components need to be
// imported and run within the server process in agent/consul and that would create
// a dependency cycle.
package loader

import (
	"context"
	"fmt"
	"path/filepath"

	"github.com/hashicorp/consul/agent/config"
	"github.com/hashicorp/consul/agent/hcp/bootstrap"
	"github.com/hashicorp/consul/agent/hcp/bootstrap/constants"
	hcpclient "github.com/hashicorp/consul/agent/hcp/client"
)

type ConfigLoader func(source config.Source) (config.LoadResult, error)

// LoadConfig will attempt to load previously-fetched config from disk and fall back to
// fetch from HCP servers if the local data is incomplete.
// It must be passed a (CLI) UI implementation so it can deliver progress
// updates to the user, for example if it is waiting to retry for a long period.
func LoadConfig(ctx context.Context, client hcpclient.Client, dataDir string, loader ConfigLoader, ui bootstrap.UI) (ConfigLoader, error) {
	ui.Output("Loading configuration from HCP")

	// See if we have existing config on disk
	//
	// OPTIMIZE: We could probably be more intelligent about config loading.
	// The currently implemented approach is:
	// 1. Attempt to load data from disk
	// 2. If that fails or the data is incomplete, block indefinitely fetching remote config.
	//
	// What if instead we had the following flow:
	// 1. Attempt to fetch config from HCP.
	// 2. If that fails, fall back to data on disk from last fetch.
	// 3. If that fails, go into blocking loop to fetch remote config.
	//
	// This should allow us to more gracefully transition cases like when
	// an existing cluster is linked, but then wants to receive TLS materials
	// at a later time. Currently, if we observe the existing-cluster marker we
	// don't attempt to fetch any additional configuration from HCP.

	cfg, ok := bootstrap.LoadPersistedBootstrapConfig(dataDir, ui)
	if ok {
		// Persisted bootstrap config exists, but needs to be validated
		err := validatePersistedConfig(dataDir)
		if err != nil {
			ok = false
		}
	}
	if !ok {
		ui.Info("Fetching configuration from HCP servers")

		var err error
		cfg, err = bootstrap.FetchBootstrapConfig(ctx, client, dataDir, ui)
		if err != nil {
			return nil, fmt.Errorf("failed to bootstrap from HCP: %w", err)
		}
		ui.Info("Configuration fetched from HCP and saved on local disk")

	} else {
		ui.Info("Loaded HCP configuration from local disk")

	}

	// Create a new loader func to return
	newLoader := bootstrapConfigLoader(loader, cfg)
	return newLoader, nil
}

func AddAclPolicyAccessControlHeader(baseLoader ConfigLoader) ConfigLoader {
	return func(source config.Source) (config.LoadResult, error) {
		res, err := baseLoader(source)
		if err != nil {
			return res, err
		}

		rc := res.RuntimeConfig

		// HTTP response headers are modified for the HCP UI to work.
		if rc.HTTPResponseHeaders == nil {
			rc.HTTPResponseHeaders = make(map[string]string)
		}
		prevValue, ok := rc.HTTPResponseHeaders[accessControlHeaderName]
		if !ok {
			rc.HTTPResponseHeaders[accessControlHeaderName] = accessControlHeaderValue
		} else {
			rc.HTTPResponseHeaders[accessControlHeaderName] = prevValue + "," + accessControlHeaderValue
		}

		return res, nil
	}
}

// bootstrapConfigLoader is a ConfigLoader for passing bootstrap JSON config received from HCP
// to the config.builder. ConfigLoaders are functions used to build an agent's RuntimeConfig
// from various sources like files and flags. This config is contained in the config.LoadResult.
//
// The flow to include bootstrap config from HCP as a loader's data source is as follows:
//
//  1. A base ConfigLoader function (baseLoader) is created on agent start, and it sets the input
//     source argument as the DefaultConfig.
//
//  2. When a server agent can be configured by HCP that baseLoader is wrapped in this bootstrapConfigLoader.
//
//  3. The bootstrapConfigLoader calls that base loader with the bootstrap JSON config as the
//     default source. This data will be merged with other valid sources in the config.builder.
//
//  4. The result of the call to baseLoader() below contains the resulting RuntimeConfig, and we do some
//     additional modifications to attach data that doesn't get populated during the build in the config pkg.
//
// Note that since the ConfigJSON is stored as the baseLoader's DefaultConfig, its data is the first
// to be merged by the config.builder and could be overwritten by user-provided values in config files or
// CLI flags. However, values set to RuntimeConfig after the baseLoader call are final.
func bootstrapConfigLoader(baseLoader ConfigLoader, cfg *bootstrap.RawBootstrapConfig) ConfigLoader {
	return func(source config.Source) (config.LoadResult, error) {
		// Don't allow any further attempts to provide a DefaultSource. This should
		// only ever be needed later in client agent AutoConfig code but that should
		// be mutually exclusive from this bootstrapping mechanism since this is
		// only for servers. If we ever try to change that, this clear failure
		// should alert future developers that the assumptions are changing rather
		// than quietly not applying the config they expect!
		if source != nil {
			return config.LoadResult{},
				fmt.Errorf("non-nil config source provided to a loader after HCP bootstrap already provided a DefaultSource")
		}

		// Otherwise, just call to the loader we were passed with our own additional
		// JSON as the source.
		//
		// OPTIMIZE: We could check/log whether any fields set by the remote config were overwritten by a user-provided flag.
		res, err := baseLoader(config.FileSource{
			Name:   "HCP Bootstrap",
			Format: "json",
			Data:   cfg.ConfigJSON,
		})
		if err != nil {
			return res, fmt.Errorf("failed to load HCP Bootstrap config: %w", err)
		}

		finalizeRuntimeConfig(res.RuntimeConfig, cfg)
		return res, nil
	}
}

const (
	accessControlHeaderName  = "Access-Control-Expose-Headers"
	accessControlHeaderValue = "x-consul-default-acl-policy"
)

// finalizeRuntimeConfig will set additional HCP-specific values that are not
// handled by the config.builder.
func finalizeRuntimeConfig(rc *config.RuntimeConfig, cfg *bootstrap.RawBootstrapConfig) {
	rc.Cloud.ManagementToken = cfg.ManagementToken
}

// validatePersistedConfig attempts to load persisted config to check for errors and basic validity.
// Errors here will raise issues like referencing unsupported config fields.
func validatePersistedConfig(dataDir string) error {
	filename := filepath.Join(dataDir, constants.SubDir, bootstrap.ConfigFileName)
	_, err := config.Load(config.LoadOpts{
		ConfigFiles: []string{filename},
		HCL: []string{
			"server = true",
			`bind_addr = "127.0.0.1"`,
			fmt.Sprintf("data_dir = %q", dataDir),
		},
		ConfigFormat: "json",
	})
	if err != nil {
		return fmt.Errorf("failed to parse local bootstrap config: %w", err)
	}
	return nil
}
[CC-7063] Fetch HCP agent bootstrap config in Link reconciler (#20306) * Move config-dependent methods to separate package In order to reuse the fetching and file creation part of the bootstrap package, move the code that would cause cyclical dependencies to a different package. * Export needed bootstrap methods and variables Also add back validating persisted config and update tests. * Add support to check for just management token Add a new method that fetches the bootstrap configuration only if there isn't a valid management token file instead of checking for all the hcp-config files. * Pass data dir as a dependency to link controller The link controller needs to check the data directory for the hcp-config files. * Fetch bootstrap config for token in controller Load the management token when reconciling a link resource, which will fetch the agent boostrap configuration if the token is not already persisted locally. Skip this step if the cluster is in read-only mode. * Validate resource ID format in link creation * Handle unauthorized and forbidden errors Check for 401 and 403s when making GNM requests, exit bootstrap fetch loop and return specific failure statuses for link. * Move test function to a testing file * Log load and status write errors 10 months ago			`// Copyright (c) HashiCorp, Inc.`
			`// SPDX-License-Identifier: BUSL-1.1`

			`// Package loader handles loading the bootstrap agent config fetched from HCP into`
			`// the agent's config. It must be a separate package from other HCP components`
			`// because it has a dependency on agent/config while other components need to be`
			`// imported and run within the server process in agent/consul and that would create`
			`// a dependency cycle.`
			`package loader`

			`import (`
			`"context"`
			`"fmt"`
			`"path/filepath"`

			`"github.com/hashicorp/consul/agent/config"`
			`"github.com/hashicorp/consul/agent/hcp/bootstrap"`
Move HCP Manager lifecycle management out of Link controller (#20401) * Add function to get update channel for watching HCP Link * Add MonitorHCPLink function This function can be called in a goroutine to manage the lifecycle of the HCP manager. * Update HCP Manager config in link monitor before starting This updates HCPMonitorLink so it updates the HCP manager with an HCP client and management token when a Link is upserted. * Let MonitorHCPManager handle lifecycle instead of link controller * Remove cleanup from Link controller and move it to MonitorHCPLink Previously, the Link Controller was responsible for cleaning up the HCP-related files on the file system. This change makes it so MonitorHCPLink handles this cleanup. As a result, we are able to remove the PlacementEachServer placement strategy for the Link controller because it no longer needs to do this per-node cleanup. * Remove HCP Manager dependency from Link Controller The Link controller does not need to have HCP Manager as a dependency anymore, so this removes that dependency in order to simplify the design. * Add Linked prefix to Linked status variables This is in preparation for adding a new status type to the Link resource. * Add new "validated" status type to link resource The link resource controller will now set a "validated" status in addition to the "linked" status. This is needed so that other components (eg the HCP manager) know when the Link is ready to link with HCP. * Fix tests * Handle new 'EndOfSnapshot' WatchList event * Fix watch test * Remove unnecessary config from TestAgent_scadaProvider Since the Scada provider is now started on agent startup regardless of whether a cloud config is provided, this removes the cloud config override from the relevant test. This change is not exactly related to the changes from this PR, but rather is something small and sort of related that was noticed while working on this PR. * Simplify link watch test and remove sleep from link watch This updates the link watch test so that it uses more mocks and does not require setting up the infrastructure for the HCP Link controller. This also removes the time.Sleep delay in the link watcher loop in favor of an error counter. When we receive 10 consecutive errors, we shut down the link watcher loop. * Add better logging for link validation. Remove EndOfSnapshot test. * Refactor link monitor test into a table test * Add some clarifying comments to link monitor * Simplify link watch test * Test a bunch more errors cases in link monitor test * Use exponential backoff instead of errorCounter in LinkWatch * Move link watch and link monitor into a single goroutine called from server.go * Refactor HCP link watcher to use single go-routine. Previously, if the WatchClient errored, we would've never recovered because we never retry to create the stream. With this change, we have a single goroutine that runs for the life of the server agent and if the WatchClient stream ever errors, we retry the creation of the stream with an exponential backoff. 10 months ago			`"github.com/hashicorp/consul/agent/hcp/bootstrap/constants"`
[CC-7063] Fetch HCP agent bootstrap config in Link reconciler (#20306) * Move config-dependent methods to separate package In order to reuse the fetching and file creation part of the bootstrap package, move the code that would cause cyclical dependencies to a different package. * Export needed bootstrap methods and variables Also add back validating persisted config and update tests. * Add support to check for just management token Add a new method that fetches the bootstrap configuration only if there isn't a valid management token file instead of checking for all the hcp-config files. * Pass data dir as a dependency to link controller The link controller needs to check the data directory for the hcp-config files. * Fetch bootstrap config for token in controller Load the management token when reconciling a link resource, which will fetch the agent boostrap configuration if the token is not already persisted locally. Skip this step if the cluster is in read-only mode. * Validate resource ID format in link creation * Handle unauthorized and forbidden errors Check for 401 and 403s when making GNM requests, exit bootstrap fetch loop and return specific failure statuses for link. * Move test function to a testing file * Log load and status write errors 10 months ago			`hcpclient "github.com/hashicorp/consul/agent/hcp/client"`
			`)`

			`type ConfigLoader func(source config.Source) (config.LoadResult, error)`

			`// LoadConfig will attempt to load previously-fetched config from disk and fall back to`
			`// fetch from HCP servers if the local data is incomplete.`
			`// It must be passed a (CLI) UI implementation so it can deliver progress`
			`// updates to the user, for example if it is waiting to retry for a long period.`
			`func LoadConfig(ctx context.Context, client hcpclient.Client, dataDir string, loader ConfigLoader, ui bootstrap.UI) (ConfigLoader, error) {`
			`ui.Output("Loading configuration from HCP")`

			`// See if we have existing config on disk`
			`//`
			`// OPTIMIZE: We could probably be more intelligent about config loading.`
			`// The currently implemented approach is:`
			`// 1. Attempt to load data from disk`
			`// 2. If that fails or the data is incomplete, block indefinitely fetching remote config.`
			`//`
			`// What if instead we had the following flow:`
			`// 1. Attempt to fetch config from HCP.`
			`// 2. If that fails, fall back to data on disk from last fetch.`
			`// 3. If that fails, go into blocking loop to fetch remote config.`
			`//`
			`// This should allow us to more gracefully transition cases like when`
			`// an existing cluster is linked, but then wants to receive TLS materials`
			`// at a later time. Currently, if we observe the existing-cluster marker we`
			`// don't attempt to fetch any additional configuration from HCP.`

			`cfg, ok := bootstrap.LoadPersistedBootstrapConfig(dataDir, ui)`
			`if ok {`
			`// Persisted bootstrap config exists, but needs to be validated`
			`err := validatePersistedConfig(dataDir)`
			`if err != nil {`
			`ok = false`
			`}`
			`}`
			`if !ok {`
			`ui.Info("Fetching configuration from HCP servers")`

			`var err error`
			`cfg, err = bootstrap.FetchBootstrapConfig(ctx, client, dataDir, ui)`
			`if err != nil {`
			`return nil, fmt.Errorf("failed to bootstrap from HCP: %w", err)`
			`}`
			`ui.Info("Configuration fetched from HCP and saved on local disk")`

			`} else {`
			`ui.Info("Loaded HCP configuration from local disk")`

			`}`

			`// Create a new loader func to return`
			`newLoader := bootstrapConfigLoader(loader, cfg)`
			`return newLoader, nil`
			`}`

			`func AddAclPolicyAccessControlHeader(baseLoader ConfigLoader) ConfigLoader {`
			`return func(source config.Source) (config.LoadResult, error) {`
			`res, err := baseLoader(source)`
			`if err != nil {`
			`return res, err`
			`}`

			`rc := res.RuntimeConfig`

			`// HTTP response headers are modified for the HCP UI to work.`
			`if rc.HTTPResponseHeaders == nil {`
			`rc.HTTPResponseHeaders = make(map[string]string)`
			`}`
			`prevValue, ok := rc.HTTPResponseHeaders[accessControlHeaderName]`
			`if !ok {`
			`rc.HTTPResponseHeaders[accessControlHeaderName] = accessControlHeaderValue`
			`} else {`
			`rc.HTTPResponseHeaders[accessControlHeaderName] = prevValue + "," + accessControlHeaderValue`
			`}`

			`return res, nil`
			`}`
			`}`

			`// bootstrapConfigLoader is a ConfigLoader for passing bootstrap JSON config received from HCP`
			`// to the config.builder. ConfigLoaders are functions used to build an agent's RuntimeConfig`
			`// from various sources like files and flags. This config is contained in the config.LoadResult.`
			`//`
			`// The flow to include bootstrap config from HCP as a loader's data source is as follows:`
			`//`
			`// 1. A base ConfigLoader function (baseLoader) is created on agent start, and it sets the input`
			`// source argument as the DefaultConfig.`
			`//`
			`// 2. When a server agent can be configured by HCP that baseLoader is wrapped in this bootstrapConfigLoader.`
			`//`
			`// 3. The bootstrapConfigLoader calls that base loader with the bootstrap JSON config as the`
			`// default source. This data will be merged with other valid sources in the config.builder.`
			`//`
			`// 4. The result of the call to baseLoader() below contains the resulting RuntimeConfig, and we do some`
			`// additional modifications to attach data that doesn't get populated during the build in the config pkg.`
			`//`
			`// Note that since the ConfigJSON is stored as the baseLoader's DefaultConfig, its data is the first`
			`// to be merged by the config.builder and could be overwritten by user-provided values in config files or`
			`// CLI flags. However, values set to RuntimeConfig after the baseLoader call are final.`
			`func bootstrapConfigLoader(baseLoader ConfigLoader, cfg *bootstrap.RawBootstrapConfig) ConfigLoader {`
			`return func(source config.Source) (config.LoadResult, error) {`
			`// Don't allow any further attempts to provide a DefaultSource. This should`
			`// only ever be needed later in client agent AutoConfig code but that should`
			`// be mutually exclusive from this bootstrapping mechanism since this is`
			`// only for servers. If we ever try to change that, this clear failure`
			`// should alert future developers that the assumptions are changing rather`
			`// than quietly not applying the config they expect!`
			`if source != nil {`
			`return config.LoadResult{},`
			`fmt.Errorf("non-nil config source provided to a loader after HCP bootstrap already provided a DefaultSource")`
			`}`

			`// Otherwise, just call to the loader we were passed with our own additional`
			`// JSON as the source.`
			`//`
			`// OPTIMIZE: We could check/log whether any fields set by the remote config were overwritten by a user-provided flag.`
			`res, err := baseLoader(config.FileSource{`
			`Name: "HCP Bootstrap",`
			`Format: "json",`
			`Data: cfg.ConfigJSON,`
			`})`
			`if err != nil {`
			`return res, fmt.Errorf("failed to load HCP Bootstrap config: %w", err)`
			`}`

			`finalizeRuntimeConfig(res.RuntimeConfig, cfg)`
			`return res, nil`
			`}`
			`}`

			`const (`
			`accessControlHeaderName = "Access-Control-Expose-Headers"`
			`accessControlHeaderValue = "x-consul-default-acl-policy"`
			`)`

			`// finalizeRuntimeConfig will set additional HCP-specific values that are not`
			`// handled by the config.builder.`
			`func finalizeRuntimeConfig(rc config.RuntimeConfig, cfg bootstrap.RawBootstrapConfig) {`
			`rc.Cloud.ManagementToken = cfg.ManagementToken`
			`}`

			`// validatePersistedConfig attempts to load persisted config to check for errors and basic validity.`
			`// Errors here will raise issues like referencing unsupported config fields.`
			`func validatePersistedConfig(dataDir string) error {`
Move HCP Manager lifecycle management out of Link controller (#20401) * Add function to get update channel for watching HCP Link * Add MonitorHCPLink function This function can be called in a goroutine to manage the lifecycle of the HCP manager. * Update HCP Manager config in link monitor before starting This updates HCPMonitorLink so it updates the HCP manager with an HCP client and management token when a Link is upserted. * Let MonitorHCPManager handle lifecycle instead of link controller * Remove cleanup from Link controller and move it to MonitorHCPLink Previously, the Link Controller was responsible for cleaning up the HCP-related files on the file system. This change makes it so MonitorHCPLink handles this cleanup. As a result, we are able to remove the PlacementEachServer placement strategy for the Link controller because it no longer needs to do this per-node cleanup. * Remove HCP Manager dependency from Link Controller The Link controller does not need to have HCP Manager as a dependency anymore, so this removes that dependency in order to simplify the design. * Add Linked prefix to Linked status variables This is in preparation for adding a new status type to the Link resource. * Add new "validated" status type to link resource The link resource controller will now set a "validated" status in addition to the "linked" status. This is needed so that other components (eg the HCP manager) know when the Link is ready to link with HCP. * Fix tests * Handle new 'EndOfSnapshot' WatchList event * Fix watch test * Remove unnecessary config from TestAgent_scadaProvider Since the Scada provider is now started on agent startup regardless of whether a cloud config is provided, this removes the cloud config override from the relevant test. This change is not exactly related to the changes from this PR, but rather is something small and sort of related that was noticed while working on this PR. * Simplify link watch test and remove sleep from link watch This updates the link watch test so that it uses more mocks and does not require setting up the infrastructure for the HCP Link controller. This also removes the time.Sleep delay in the link watcher loop in favor of an error counter. When we receive 10 consecutive errors, we shut down the link watcher loop. * Add better logging for link validation. Remove EndOfSnapshot test. * Refactor link monitor test into a table test * Add some clarifying comments to link monitor * Simplify link watch test * Test a bunch more errors cases in link monitor test * Use exponential backoff instead of errorCounter in LinkWatch * Move link watch and link monitor into a single goroutine called from server.go * Refactor HCP link watcher to use single go-routine. Previously, if the WatchClient errored, we would've never recovered because we never retry to create the stream. With this change, we have a single goroutine that runs for the life of the server agent and if the WatchClient stream ever errors, we retry the creation of the stream with an exponential backoff. 10 months ago			`filename := filepath.Join(dataDir, constants.SubDir, bootstrap.ConfigFileName)`
[CC-7063] Fetch HCP agent bootstrap config in Link reconciler (#20306) * Move config-dependent methods to separate package In order to reuse the fetching and file creation part of the bootstrap package, move the code that would cause cyclical dependencies to a different package. * Export needed bootstrap methods and variables Also add back validating persisted config and update tests. * Add support to check for just management token Add a new method that fetches the bootstrap configuration only if there isn't a valid management token file instead of checking for all the hcp-config files. * Pass data dir as a dependency to link controller The link controller needs to check the data directory for the hcp-config files. * Fetch bootstrap config for token in controller Load the management token when reconciling a link resource, which will fetch the agent boostrap configuration if the token is not already persisted locally. Skip this step if the cluster is in read-only mode. * Validate resource ID format in link creation * Handle unauthorized and forbidden errors Check for 401 and 403s when making GNM requests, exit bootstrap fetch loop and return specific failure statuses for link. * Move test function to a testing file * Log load and status write errors 10 months ago			`_, err := config.Load(config.LoadOpts{`
			`ConfigFiles: []string{filename},`
			`HCL: []string{`
			`"server = true",`
			`bind_addr = "127.0.0.1"`,
			`fmt.Sprintf("data_dir = %q", dataDir),`
			`},`
			`ConfigFormat: "json",`
			`})`
			`if err != nil {`
			`return fmt.Errorf("failed to parse local bootstrap config: %w", err)`
			`}`
			`return nil`
			`}`