From c2738231ecb09d640dd5cab5c66783139eee11bd Mon Sep 17 00:00:00 2001 From: Brian Downs Date: Tue, 28 May 2024 08:55:41 -0700 Subject: [PATCH 01/20] update channel server for may 2024 (#10137) --- channel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/channel.yaml b/channel.yaml index 09141c6711..148b390736 100644 --- a/channel.yaml +++ b/channel.yaml @@ -1,7 +1,7 @@ # Example channels config channels: - name: stable - latest: v1.29.4+k3s1 + latest: v1.29.5+k3s1 - name: latest latestRegexp: .* excludeRegexp: (^[^+]+-|v1\.25\.5\+k3s1|v1\.26\.0\+k3s1) From 6683fcdb65609f0df041b10707fdafc16a3f90e3 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 24 May 2024 17:49:06 +0000 Subject: [PATCH 02/20] Bump klipper-helm image for tls secret support Signed-off-by: Brad Davidson --- go.mod | 2 +- go.sum | 4 ++-- scripts/airgap/image-list.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index 3cfb04b998..25e31b5884 100644 --- a/go.mod +++ b/go.mod @@ -103,7 +103,7 @@ require ( github.com/ipfs/go-log/v2 v2.5.1 github.com/joho/godotenv v1.5.1 github.com/json-iterator/go v1.1.12 - github.com/k3s-io/helm-controller v0.16.1-0.20240502205943-2f32059d43e6 + github.com/k3s-io/helm-controller v0.16.1 github.com/k3s-io/kine v0.11.9 github.com/klauspost/compress v1.17.7 github.com/kubernetes-sigs/cri-tools v0.0.0-00010101000000-000000000000 diff --git a/go.sum b/go.sum index 508117fb7d..3cff4c8930 100644 --- a/go.sum +++ b/go.sum @@ -924,8 +924,8 @@ github.com/k3s-io/etcd/raft/v3 v3.5.13-k3s1 h1:yexUwAPPdmYfIMWOj6sSyJ2nEe8QOrFzN github.com/k3s-io/etcd/raft/v3 v3.5.13-k3s1/go.mod h1:uUFibGLn2Ksm2URMxN1fICGhk8Wu96EfDQyuLhAcAmw= github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1 h1:Pqcxkg7V60c26ZpHoekP9QoUdLuduxFn827A/5CIwm4= github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1/go.mod h1:K/8nbsGupHqmr5MkgaZpLlH1QdX1pcNQLAkODy44XcQ= -github.com/k3s-io/helm-controller v0.16.1-0.20240502205943-2f32059d43e6 h1:2VcBFT2iPskZqNEVY5636Fk8NHiM/x4zQ9/h+f3WMSA= -github.com/k3s-io/helm-controller v0.16.1-0.20240502205943-2f32059d43e6/go.mod h1:AcSxEhOIUgeVvBTnJOAwcezBZXtYew/RhKwO5xp3RlM= +github.com/k3s-io/helm-controller v0.16.1 h1:4sdJSYdAeTvMjjq3Pt1ZcyenRTJIAvKojTWRg/i8Ne4= +github.com/k3s-io/helm-controller v0.16.1/go.mod h1:AcSxEhOIUgeVvBTnJOAwcezBZXtYew/RhKwO5xp3RlM= github.com/k3s-io/kine v0.11.9 h1:7HfWSwtOowb7GuV6nECnNlFKShgRgVBLdWXj0/4t0sE= github.com/k3s-io/kine v0.11.9/go.mod h1:N8rc1GDmEvvYRuTxhKTZfSc4fm/vyI6GbDxwBjccAjs= github.com/k3s-io/klog/v2 v2.120.1-k3s1 h1:7twAHPFpZA21KdMnMNnj68STQMPldAxF2Zsaol57dxw= diff --git a/scripts/airgap/image-list.txt b/scripts/airgap/image-list.txt index dc5e8c6852..932f47ab2f 100644 --- a/scripts/airgap/image-list.txt +++ b/scripts/airgap/image-list.txt @@ -1,4 +1,4 @@ -docker.io/rancher/klipper-helm:v0.8.3-build20240228 +docker.io/rancher/klipper-helm:v0.8.4-build20240523 docker.io/rancher/klipper-lb:v0.4.7 docker.io/rancher/local-path-provisioner:v0.0.26 docker.io/rancher/mirrored-coredns-coredns:1.10.1 From eb192197eb69e9be223c39ec83f59dee5ceadd3e Mon Sep 17 00:00:00 2001 From: Anuj Garg Date: Mon, 22 Apr 2024 11:43:35 +0530 Subject: [PATCH 03/20] Updating the script binary_size_check to complete the command name by adding .exe extension to the k3s binary name to make it available to run stat command Signed-off-by: Anuj Garg --- scripts/binary_size_check.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/binary_size_check.sh b/scripts/binary_size_check.sh index 9ba138e3f4..f019ec53e1 100755 --- a/scripts/binary_size_check.sh +++ b/scripts/binary_size_check.sh @@ -2,6 +2,8 @@ set -e +. ./scripts/version.sh + GO=${GO-go} ARCH=${ARCH:-$("${GO}" env GOARCH)} @@ -22,7 +24,7 @@ elif [ ${ARCH} = s390x ]; then BIN_SUFFIX="-s390x" fi -CMD_NAME="dist/artifacts/k3s${BIN_SUFFIX}" +CMD_NAME="dist/artifacts/k3s${BIN_SUFFIX}${BINARY_POSTFIX}" SIZE=$(stat -c '%s' ${CMD_NAME}) if [ -n "${DEBUG}" ]; then From 3d14092f76f27f4978968597174707a5cb2a80e7 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 29 Apr 2024 23:29:49 +0000 Subject: [PATCH 04/20] Fix issue with k3s-etcd informers not starting Start shared informer caches when k3s-etcd controller wins leader election. Previously, these were only started when the main k3s apiserver controller won an election. If the leaders ended up going to different nodes, some informers wouldn't be started Signed-off-by: Brad Davidson --- pkg/cluster/managed.go | 4 +++- pkg/etcd/etcd.go | 7 +++++++ pkg/server/server.go | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pkg/cluster/managed.go b/pkg/cluster/managed.go index d6c668998a..b0e6f71861 100644 --- a/pkg/cluster/managed.go +++ b/pkg/cluster/managed.go @@ -91,7 +91,9 @@ func (c *Cluster) start(ctx context.Context) error { return c.managedDB.Start(ctx, c.clientAccessInfo) } -// registerDBHandlers registers routes for database info with the http request handler +// registerDBHandlers registers managed-datastore-specific callbacks, and installs additional HTTP route handlers. +// Note that for etcd, controllers only run on nodes with a local apiserver, in order to provide stable external +// management of etcd cluster membership without being disrupted when a member is removed from the cluster. func (c *Cluster) registerDBHandlers(handler http.Handler) (http.Handler, error) { if c.managedDB == nil { return handler, nil diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index 757c542c44..c34f1396ff 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -34,6 +34,7 @@ import ( "github.com/pkg/errors" certutil "github.com/rancher/dynamiclistener/cert" controllerv1 "github.com/rancher/wrangler/v3/pkg/generated/controllers/core/v1" + "github.com/rancher/wrangler/v3/pkg/start" "github.com/robfig/cron/v3" "github.com/sirupsen/logrus" "go.etcd.io/etcd/api/v3/etcdserverpb" @@ -619,6 +620,12 @@ func (e *ETCD) Register(handler http.Handler) (http.Handler, error) { registerEndpointsHandlers(ctx, e) registerMemberHandlers(ctx, e) registerSnapshotHandlers(ctx, e) + + // Re-run informer factory startup after core and leader-elected controllers have started. + // Additional caches may need to start for the newly added OnChange/OnRemove callbacks. + if err := start.All(ctx, 5, e.config.Runtime.K3s, e.config.Runtime.Core); err != nil { + panic(errors.Wrap(err, "failed to start wrangler controllers")) + } } } diff --git a/pkg/server/server.go b/pkg/server/server.go index 7df3f52de5..848a5f1b82 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -166,8 +166,8 @@ func apiserverControllers(ctx context.Context, sc *Context, config *Config) { } } - // Re-run context startup after core and leader-elected controllers have started. Additional - // informer caches may need to start for the newly added OnChange callbacks. + // Re-run informer factory startup after core and leader-elected controllers have started. + // Additional caches may need to start for the newly added OnChange/OnRemove callbacks. if err := sc.Start(ctx); err != nil { panic(errors.Wrap(err, "failed to start wranger controllers")) } From ff679fb3abd5b06a82b62004de8703e0f4d6d664 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 25 Apr 2024 01:02:05 +0000 Subject: [PATCH 05/20] Refactor supervisor listener startup and add metrics * Refactor agent supervisor listener startup and authn/authz to use upstream auth delegators to perform for SubjectAccessReview for access to metrics. * Convert spegel and pprof handlers over to new structure. * Promote bind-address to agent flag to allow setting supervisor bind address for both agent and server. * Promote enable-pprof to agent flag to allow profiling agents. Access to the pprof endpoint now requires client cert auth, similar to the spegel registry api endpoint. * Add prometheus metrics handler. Signed-off-by: Brad Davidson --- pkg/agent/config/config.go | 19 ++++-- pkg/agent/https/https.go | 106 +++++++++++++++++++++++++++++++ pkg/agent/netpol/netpol.go | 16 ++--- pkg/agent/run.go | 14 +++++ pkg/certmonitor/certmonitor.go | 12 +--- pkg/cli/agent/agent.go | 50 ++++++--------- pkg/cli/cmds/agent.go | 14 +++++ pkg/cli/cmds/server.go | 22 +++---- pkg/cli/server/server.go | 52 +++++++++------ pkg/cluster/https.go | 23 +------ pkg/daemons/config/types.go | 12 +++- pkg/metrics/metrics.go | 45 +++++++++++++ pkg/profile/profile.go | 38 +++++++++++ pkg/spegel/spegel.go | 40 +++--------- pkg/util/net.go | 111 +++++++++++++++++++++++++++++++++ pkg/util/net_unix.go | 18 ++++++ pkg/util/net_windows.go | 11 ++++ 17 files changed, 462 insertions(+), 141 deletions(-) create mode 100644 pkg/agent/https/https.go create mode 100644 pkg/metrics/metrics.go create mode 100644 pkg/profile/profile.go create mode 100644 pkg/util/net_unix.go create mode 100644 pkg/util/net_windows.go diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index c1b454dda9..dfab68d023 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -524,12 +524,14 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N SELinux: envInfo.EnableSELinux, ContainerRuntimeEndpoint: envInfo.ContainerRuntimeEndpoint, ImageServiceEndpoint: envInfo.ImageServiceEndpoint, + EnablePProf: envInfo.EnablePProf, EmbeddedRegistry: controlConfig.EmbeddedRegistry, FlannelBackend: controlConfig.FlannelBackend, FlannelIPv6Masq: controlConfig.FlannelIPv6Masq, FlannelExternalIP: controlConfig.FlannelExternalIP, EgressSelectorMode: controlConfig.EgressSelectorMode, ServerHTTPSPort: controlConfig.HTTPSPort, + SupervisorMetrics: controlConfig.SupervisorMetrics, Token: info.String(), } nodeConfig.FlannelIface = flannelIface @@ -592,13 +594,18 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N nodeConfig.Containerd.Template = filepath.Join(envInfo.DataDir, "agent", "etc", "containerd", "config.toml.tmpl") nodeConfig.Certificate = servingCert - nodeConfig.AgentConfig.NodeIPs = nodeIPs - listenAddress, _, _, err := util.GetDefaultAddresses(nodeIPs[0]) - if err != nil { - return nil, errors.Wrap(err, "cannot configure IPv4/IPv6 node-ip") + if envInfo.BindAddress != "" { + nodeConfig.AgentConfig.ListenAddress = envInfo.BindAddress + } else { + listenAddress, _, _, err := util.GetDefaultAddresses(nodeIPs[0]) + if err != nil { + return nil, errors.Wrap(err, "cannot configure IPv4/IPv6 node-ip") + } + nodeConfig.AgentConfig.ListenAddress = listenAddress } + nodeConfig.AgentConfig.NodeIP = nodeIPs[0].String() - nodeConfig.AgentConfig.ListenAddress = listenAddress + nodeConfig.AgentConfig.NodeIPs = nodeIPs nodeConfig.AgentConfig.NodeExternalIPs = nodeExternalIPs // if configured, set NodeExternalIP to the first IPv4 address, for legacy clients @@ -689,6 +696,8 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N nodeConfig.AgentConfig.ImageCredProvConfig = envInfo.ImageCredProvConfig nodeConfig.AgentConfig.DisableCCM = controlConfig.DisableCCM nodeConfig.AgentConfig.DisableNPC = controlConfig.DisableNPC + nodeConfig.AgentConfig.MinTLSVersion = controlConfig.MinTLSVersion + nodeConfig.AgentConfig.CipherSuites = controlConfig.CipherSuites nodeConfig.AgentConfig.Rootless = envInfo.Rootless nodeConfig.AgentConfig.PodManifests = filepath.Join(envInfo.DataDir, "agent", DefaultPodManifestPath) nodeConfig.AgentConfig.ProtectKernelDefaults = envInfo.ProtectKernelDefaults diff --git a/pkg/agent/https/https.go b/pkg/agent/https/https.go new file mode 100644 index 0000000000..2b5927107f --- /dev/null +++ b/pkg/agent/https/https.go @@ -0,0 +1,106 @@ +package https + +import ( + "context" + "net/http" + "strconv" + "sync" + + "github.com/gorilla/mux" + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/generated/clientset/versioned/scheme" + "github.com/k3s-io/k3s/pkg/util" + "github.com/k3s-io/k3s/pkg/version" + "k8s.io/apiserver/pkg/authentication/authenticator" + "k8s.io/apiserver/pkg/authorization/authorizer" + genericapifilters "k8s.io/apiserver/pkg/endpoints/filters" + apirequest "k8s.io/apiserver/pkg/endpoints/request" + "k8s.io/apiserver/pkg/server" + "k8s.io/apiserver/pkg/server/options" +) + +// RouterFunc provides a hook for components to register additional routes to a request router +type RouterFunc func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) + +var once sync.Once +var router *mux.Router +var err error + +// Start returns a router with authn/authz filters applied. +// The first time it is called, the router is created and a new HTTPS listener is started if the handler is nil. +// Subsequent calls will return the same router. +func Start(ctx context.Context, nodeConfig *config.Node, runtime *config.ControlRuntime) (*mux.Router, error) { + once.Do(func() { + router = mux.NewRouter().SkipClean(true) + config := server.Config{} + + if runtime == nil { + // If we do not have an existing handler, set up a new listener + tcp, lerr := util.ListenWithLoopback(ctx, nodeConfig.AgentConfig.ListenAddress, strconv.Itoa(nodeConfig.ServerHTTPSPort)) + if lerr != nil { + err = lerr + return + } + + serving := options.NewSecureServingOptions() + serving.Listener = tcp + serving.CipherSuites = nodeConfig.AgentConfig.CipherSuites + serving.MinTLSVersion = nodeConfig.AgentConfig.MinTLSVersion + serving.ServerCert = options.GeneratableKeyCert{ + CertKey: options.CertKey{ + CertFile: nodeConfig.AgentConfig.ServingKubeletCert, + KeyFile: nodeConfig.AgentConfig.ServingKubeletKey, + }, + } + if aerr := serving.ApplyTo(&config.SecureServing); aerr != nil { + err = aerr + return + } + } else { + // If we have an existing handler, wrap it + router.NotFoundHandler = runtime.Handler + runtime.Handler = router + } + + authn := options.NewDelegatingAuthenticationOptions() + authn.DisableAnonymous = true + authn.SkipInClusterLookup = true + authn.ClientCert = options.ClientCertAuthenticationOptions{ + ClientCA: nodeConfig.AgentConfig.ClientCA, + } + authn.RemoteKubeConfigFile = nodeConfig.AgentConfig.KubeConfigKubelet + if applyErr := authn.ApplyTo(&config.Authentication, config.SecureServing, nil); applyErr != nil { + err = applyErr + return + } + + authz := options.NewDelegatingAuthorizationOptions() + authz.AlwaysAllowPaths = []string{"/v2", "/debug/pprof", "/v1-" + version.Program + "/p2p"} + authz.RemoteKubeConfigFile = nodeConfig.AgentConfig.KubeConfigKubelet + if applyErr := authz.ApplyTo(&config.Authorization); applyErr != nil { + err = applyErr + return + } + + router.Use(filterChain(config.Authentication.Authenticator, config.Authorization.Authorizer)) + + if config.SecureServing != nil { + _, _, err = config.SecureServing.Serve(router, 0, ctx.Done()) + } + }) + + return router, err +} + +// filterChain runs the kubernetes authn/authz filter chain using the mux middleware API +func filterChain(authn authenticator.Request, authz authorizer.Authorizer) mux.MiddlewareFunc { + return func(handler http.Handler) http.Handler { + requestInfoResolver := &apirequest.RequestInfoFactory{} + failedHandler := genericapifilters.Unauthorized(scheme.Codecs) + handler = genericapifilters.WithAuthorization(handler, authz, scheme.Codecs) + handler = genericapifilters.WithAuthentication(handler, authn, failedHandler, nil, nil) + handler = genericapifilters.WithRequestInfo(handler, requestInfoResolver) + handler = genericapifilters.WithCacheControl(handler) + return handler + } +} diff --git a/pkg/agent/netpol/netpol.go b/pkg/agent/netpol/netpol.go index a8bb760bcf..60d2c1f07f 100644 --- a/pkg/agent/netpol/netpol.go +++ b/pkg/agent/netpol/netpol.go @@ -19,25 +19,25 @@ import ( "github.com/cloudnativelabs/kube-router/v2/pkg/controllers/netpol" "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" - "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" + krmetrics "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" "github.com/cloudnativelabs/kube-router/v2/pkg/options" "github.com/cloudnativelabs/kube-router/v2/pkg/utils" "github.com/cloudnativelabs/kube-router/v2/pkg/version" "github.com/coreos/go-iptables/iptables" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/metrics" "github.com/pkg/errors" "github.com/sirupsen/logrus" v1core "k8s.io/api/core/v1" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" - "k8s.io/component-base/metrics/legacyregistry" ) func init() { // ensure that kube-router exposes metrics through the same registry used by Kubernetes components - metrics.DefaultRegisterer = legacyregistry.Registerer() - metrics.DefaultGatherer = legacyregistry.DefaultGatherer + krmetrics.DefaultRegisterer = metrics.DefaultRegisterer + krmetrics.DefaultGatherer = metrics.DefaultGatherer } // Run creates and starts a new instance of the kube-router network policy controller @@ -156,7 +156,7 @@ func Run(ctx context.Context, nodeConfig *config.Node) error { } // Start kube-router metrics controller to avoid complaints about metrics heartbeat missing - mc, err := metrics.NewMetricsController(krConfig) + mc, err := krmetrics.NewMetricsController(krConfig) if err != nil { return nil } @@ -188,13 +188,13 @@ func Run(ctx context.Context, nodeConfig *config.Node) error { } // metricsRunCheck is a stub version of mc.Run() that doesn't start up a dedicated http server. -func metricsRunCheck(mc *metrics.Controller, healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, wg *sync.WaitGroup) { +func metricsRunCheck(mc *krmetrics.Controller, healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, wg *sync.WaitGroup) { t := time.NewTicker(3 * time.Second) defer wg.Done() // register metrics for this controller - metrics.BuildInfo.WithLabelValues(runtime.Version(), version.Version).Set(1) - metrics.DefaultRegisterer.MustRegister(metrics.BuildInfo) + krmetrics.BuildInfo.WithLabelValues(runtime.Version(), version.Version).Set(1) + krmetrics.DefaultRegisterer.MustRegister(krmetrics.BuildInfo) for { healthcheck.SendHeartBeat(healthChan, "MC") diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 607cfc7bc4..f3342767ad 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -27,7 +27,9 @@ import ( "github.com/k3s-io/k3s/pkg/daemons/agent" daemonconfig "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/daemons/executor" + "github.com/k3s-io/k3s/pkg/metrics" "github.com/k3s-io/k3s/pkg/nodeconfig" + "github.com/k3s-io/k3s/pkg/profile" "github.com/k3s-io/k3s/pkg/rootless" "github.com/k3s-io/k3s/pkg/spegel" "github.com/k3s-io/k3s/pkg/util" @@ -113,6 +115,18 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error { } } + if nodeConfig.SupervisorMetrics { + if err := metrics.DefaultMetrics.Start(ctx, nodeConfig); err != nil { + return errors.Wrap(err, "failed to serve metrics") + } + } + + if nodeConfig.EnablePProf { + if err := profile.DefaultProfiler.Start(ctx, nodeConfig); err != nil { + return errors.Wrap(err, "failed to serve pprof") + } + } + if err := setupCriCtlConfig(cfg, nodeConfig); err != nil { return err } diff --git a/pkg/certmonitor/certmonitor.go b/pkg/certmonitor/certmonitor.go index 6b1e2a50ab..6aedbcdf63 100644 --- a/pkg/certmonitor/certmonitor.go +++ b/pkg/certmonitor/certmonitor.go @@ -11,6 +11,7 @@ import ( daemonconfig "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/daemons/control/deps" + "github.com/k3s-io/k3s/pkg/metrics" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/util/services" "github.com/k3s-io/k3s/pkg/version" @@ -22,18 +23,9 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/component-base/metrics/legacyregistry" ) var ( - // DefaultRegisterer and DefaultGatherer are the implementations of the - // prometheus Registerer and Gatherer interfaces that all metrics operations - // will use. They are variables so that packages that embed this library can - // replace them at runtime, instead of having to pass around specific - // registries. - DefaultRegisterer = legacyregistry.Registerer() - DefaultGatherer = legacyregistry.DefaultGatherer - // Check certificates twice an hour. Kubernetes events have a TTL of 1 hour by default, // so similar events should be aggregated and refreshed by the event recorder as long // as they are created within the TTL period. @@ -50,7 +42,7 @@ var ( // Setup starts the certificate expiration monitor func Setup(ctx context.Context, nodeConfig *daemonconfig.Node, dataDir string) error { logrus.Debugf("Starting %s with monitoring period %s", controllerName, certCheckInterval) - DefaultRegisterer.MustRegister(certificateExpirationSeconds) + metrics.DefaultRegisterer.MustRegister(certificateExpirationSeconds) client, err := util.GetClientSet(nodeConfig.AgentConfig.KubeConfigKubelet) if err != nil { diff --git a/pkg/cli/agent/agent.go b/pkg/cli/agent/agent.go index 7e43dd519d..d42beee7d5 100644 --- a/pkg/cli/agent/agent.go +++ b/pkg/cli/agent/agent.go @@ -1,20 +1,22 @@ package agent import ( + "context" "crypto/tls" - "errors" "fmt" - "net/http" "os" "path/filepath" "runtime" "github.com/gorilla/mux" "github.com/k3s-io/k3s/pkg/agent" - "github.com/k3s-io/k3s/pkg/authenticator" + "github.com/k3s-io/k3s/pkg/agent/https" "github.com/k3s-io/k3s/pkg/cli/cmds" + "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/datadir" + k3smetrics "github.com/k3s-io/k3s/pkg/metrics" "github.com/k3s-io/k3s/pkg/proctitle" + "github.com/k3s-io/k3s/pkg/profile" "github.com/k3s-io/k3s/pkg/spegel" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" @@ -22,7 +24,6 @@ import ( "github.com/rancher/wrangler/v3/pkg/signals" "github.com/sirupsen/logrus" "github.com/urfave/cli" - apiauth "k8s.io/apiserver/pkg/authentication/authenticator" ) func Run(ctx *cli.Context) error { @@ -108,33 +109,22 @@ func Run(ctx *cli.Context) error { // Until the agent is run and retrieves config from the server, we won't know // if the embedded registry is enabled. If it is not enabled, these are not // used as the registry is never started. - conf := spegel.DefaultRegistry - conf.Bootstrapper = spegel.NewAgentBootstrapper(cfg.ServerURL, cfg.Token, cfg.DataDir) - conf.HandlerFunc = func(conf *spegel.Config, router *mux.Router) error { - // Create and bind a new authenticator using the configured client CA - authArgs := []string{"--client-ca-file=" + conf.ClientCAFile} - auth, err := authenticator.FromArgs(authArgs) - if err != nil { - return err - } - conf.AuthFunc = func() apiauth.Request { - return auth - } + registry := spegel.DefaultRegistry + registry.Bootstrapper = spegel.NewAgentBootstrapper(cfg.ServerURL, cfg.Token, cfg.DataDir) + registry.Router = func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) { + return https.Start(ctx, nodeConfig, nil) + } - // Create a new server and listen on the configured port - server := &http.Server{ - Handler: router, - Addr: ":" + conf.RegistryPort, - TLSConfig: &tls.Config{ - ClientAuth: tls.RequestClientCert, - }, - } - go func() { - if err := server.ListenAndServeTLS(conf.ServerCertFile, conf.ServerKeyFile); err != nil && !errors.Is(err, http.ErrServerClosed) { - logrus.Fatalf("registry server failed: %v", err) - } - }() - return nil + // same deal for metrics - these are not used if the extra metrics listener is not enabled. + metrics := k3smetrics.DefaultMetrics + metrics.Router = func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) { + return https.Start(ctx, nodeConfig, nil) + } + + // and for pprof as well + pprof := profile.DefaultProfiler + pprof.Router = func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) { + return https.Start(ctx, nodeConfig, nil) } return agent.Run(contextCtx, cfg) diff --git a/pkg/cli/cmds/agent.go b/pkg/cli/cmds/agent.go index 53b620f4d0..16e0a196c1 100644 --- a/pkg/cli/cmds/agent.go +++ b/pkg/cli/cmds/agent.go @@ -20,6 +20,7 @@ type Agent struct { LBServerPort int ResolvConf string DataDir string + BindAddress string NodeIP cli.StringSlice NodeExternalIP cli.StringSlice NodeName string @@ -36,6 +37,7 @@ type Agent struct { VPNAuth string VPNAuthFile string Debug bool + EnablePProf bool Rootless bool RootlessAlreadyUnshared bool WithNodeID bool @@ -226,6 +228,16 @@ var ( Usage: "(agent/containerd) Disables containerd's fallback default registry endpoint when a mirror is configured for that registry", Destination: &AgentConfig.ContainerdNoDefault, } + EnablePProfFlag = &cli.BoolFlag{ + Name: "enable-pprof", + Usage: "(experimental) Enable pprof endpoint on supervisor port", + Destination: &AgentConfig.EnablePProf, + } + BindAddressFlag = &cli.StringFlag{ + Name: "bind-address", + Usage: "(listener) " + version.Program + " bind address (default: 0.0.0.0)", + Destination: &AgentConfig.BindAddress, + } ) func NewAgentCommand(action func(ctx *cli.Context) error) cli.Command { @@ -278,6 +290,7 @@ func NewAgentCommand(action func(ctx *cli.Context) error) cli.Command { DisableDefaultRegistryEndpointFlag, AirgapExtraRegistryFlag, NodeIPFlag, + BindAddressFlag, NodeExternalIPFlag, ResolvConfFlag, FlannelIfaceFlag, @@ -286,6 +299,7 @@ func NewAgentCommand(action func(ctx *cli.Context) error) cli.Command { ExtraKubeletArgs, ExtraKubeProxyArgs, // Experimental flags + EnablePProfFlag, &cli.BoolFlag{ Name: "rootless", Usage: "(experimental) Run rootless", diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index e59a886889..cfb9349fbb 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -48,8 +48,6 @@ type Server struct { HelmJobImage string TLSSan cli.StringSlice TLSSanSecurity bool - BindAddress string - EnablePProf bool ExtraAPIArgs cli.StringSlice ExtraEtcdArgs cli.StringSlice ExtraSchedulerArgs cli.StringSlice @@ -87,6 +85,7 @@ type Server struct { EncryptSkip bool SystemDefaultRegistry string StartupHooks []StartupHook + SupervisorMetrics bool EtcdSnapshotName string EtcdDisableSnapshots bool EtcdExposeMetrics bool @@ -178,11 +177,7 @@ var ServerFlags = []cli.Flag{ VModule, LogFile, AlsoLogToStderr, - &cli.StringFlag{ - Name: "bind-address", - Usage: "(listener) " + version.Program + " bind address (default: 0.0.0.0)", - Destination: &ServerConfig.BindAddress, - }, + BindAddressFlag, &cli.IntFlag{ Name: "https-listen-port", Usage: "(listener) HTTPS listen port", @@ -493,9 +488,14 @@ var ServerFlags = []cli.Flag{ }, &cli.BoolFlag{ Name: "embedded-registry", - Usage: "(experimental/components) Enable embedded distributed container registry; requires use of embedded containerd", + Usage: "(experimental/components) Enable embedded distributed container registry; requires use of embedded containerd; when enabled agents will also listen on the supervisor port", Destination: &ServerConfig.EmbeddedRegistry, }, + &cli.BoolFlag{ + Name: "supervisor-metrics", + Usage: "(experimental/components) Enable serving " + version.Program + " internal metrics on the supervisor port; when enabled agents will also listen on the supervisor port", + Destination: &ServerConfig.SupervisorMetrics, + }, NodeNameFlag, WithNodeIDFlag, NodeLabels, @@ -534,11 +534,7 @@ var ServerFlags = []cli.Flag{ Destination: &ServerConfig.EncryptSecrets, }, // Experimental flags - &cli.BoolFlag{ - Name: "enable-pprof", - Usage: "(experimental) Enable pprof endpoint on supervisor port", - Destination: &ServerConfig.EnablePProf, - }, + EnablePProfFlag, &cli.BoolFlag{ Name: "rootless", Usage: "(experimental) Run rootless", diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 28607f7a24..0093ee6cf9 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -12,13 +12,16 @@ import ( systemd "github.com/coreos/go-systemd/v22/daemon" "github.com/gorilla/mux" "github.com/k3s-io/k3s/pkg/agent" + "github.com/k3s-io/k3s/pkg/agent/https" "github.com/k3s-io/k3s/pkg/agent/loadbalancer" "github.com/k3s-io/k3s/pkg/cli/cmds" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/datadir" "github.com/k3s-io/k3s/pkg/etcd" + k3smetrics "github.com/k3s-io/k3s/pkg/metrics" "github.com/k3s-io/k3s/pkg/proctitle" + "github.com/k3s-io/k3s/pkg/profile" "github.com/k3s-io/k3s/pkg/rootless" "github.com/k3s-io/k3s/pkg/server" "github.com/k3s-io/k3s/pkg/spegel" @@ -30,7 +33,6 @@ import ( "github.com/sirupsen/logrus" "github.com/urfave/cli" utilnet "k8s.io/apimachinery/pkg/util/net" - "k8s.io/apiserver/pkg/authentication/authenticator" kubeapiserverflag "k8s.io/component-base/cli/flag" "k8s.io/kubernetes/pkg/controlplane/apiserver/options" utilsnet "k8s.io/utils/net" @@ -136,12 +138,11 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.ServiceLBNamespace = cfg.ServiceLBNamespace serverConfig.ControlConfig.SANs = util.SplitStringSlice(cfg.TLSSan) serverConfig.ControlConfig.SANSecurity = cfg.TLSSanSecurity - serverConfig.ControlConfig.BindAddress = cfg.BindAddress + serverConfig.ControlConfig.BindAddress = cmds.AgentConfig.BindAddress serverConfig.ControlConfig.SupervisorPort = cfg.SupervisorPort serverConfig.ControlConfig.HTTPSPort = cfg.HTTPSPort serverConfig.ControlConfig.APIServerPort = cfg.APIServerPort serverConfig.ControlConfig.APIServerBindAddress = cfg.APIServerBindAddress - serverConfig.ControlConfig.EnablePProf = cfg.EnablePProf serverConfig.ControlConfig.ExtraAPIArgs = cfg.ExtraAPIArgs serverConfig.ControlConfig.ExtraControllerArgs = cfg.ExtraControllerArgs serverConfig.ControlConfig.ExtraEtcdArgs = cfg.ExtraEtcdArgs @@ -174,6 +175,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.EncryptSecrets = cfg.EncryptSecrets serverConfig.ControlConfig.EtcdExposeMetrics = cfg.EtcdExposeMetrics serverConfig.ControlConfig.EtcdDisableSnapshots = cfg.EtcdDisableSnapshots + serverConfig.ControlConfig.SupervisorMetrics = cfg.SupervisorMetrics serverConfig.ControlConfig.VLevel = cmds.LogConfig.VLevel serverConfig.ControlConfig.VModule = cmds.LogConfig.VModule @@ -406,6 +408,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont } tlsMinVersionArg := getArgValueFromList("tls-min-version", serverConfig.ControlConfig.ExtraAPIArgs) + serverConfig.ControlConfig.MinTLSVersion = tlsMinVersionArg serverConfig.ControlConfig.TLSMinVersion, err = kubeapiserverflag.TLSVersion(tlsMinVersionArg) if err != nil { return errors.Wrap(err, "invalid tls-min-version") @@ -435,6 +438,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont } serverConfig.ControlConfig.ExtraAPIArgs = append(serverConfig.ControlConfig.ExtraAPIArgs, "tls-cipher-suites="+strings.Join(tlsCipherSuites, ",")) } + serverConfig.ControlConfig.CipherSuites = tlsCipherSuites serverConfig.ControlConfig.TLSCipherSuites, err = kubeapiserverflag.TLSCipherSuites(tlsCipherSuites) if err != nil { return errors.Wrap(err, "invalid tls-cipher-suites") @@ -556,28 +560,36 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont go getAPIAddressFromEtcd(ctx, serverConfig, agentConfig) } + // Until the agent is run and retrieves config from the server, we won't know + // if the embedded registry is enabled. If it is not enabled, these are not + // used as the registry is never started. + registry := spegel.DefaultRegistry + registry.Bootstrapper = spegel.NewChainingBootstrapper( + spegel.NewServerBootstrapper(&serverConfig.ControlConfig), + spegel.NewAgentBootstrapper(cfg.ServerURL, token, agentConfig.DataDir), + spegel.NewSelfBootstrapper(), + ) + registry.Router = func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) { + return https.Start(ctx, nodeConfig, serverConfig.ControlConfig.Runtime) + } + + // same deal for metrics - these are not used if the extra metrics listener is not enabled. + metrics := k3smetrics.DefaultMetrics + metrics.Router = func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) { + return https.Start(ctx, nodeConfig, serverConfig.ControlConfig.Runtime) + } + + // and for pprof as well + pprof := profile.DefaultProfiler + pprof.Router = func(ctx context.Context, nodeConfig *config.Node) (*mux.Router, error) { + return https.Start(ctx, nodeConfig, serverConfig.ControlConfig.Runtime) + } + if cfg.DisableAgent { agentConfig.ContainerRuntimeEndpoint = "/dev/null" return agent.RunStandalone(ctx, agentConfig) } - if cfg.EmbeddedRegistry { - conf := spegel.DefaultRegistry - conf.Bootstrapper = spegel.NewChainingBootstrapper( - spegel.NewServerBootstrapper(&serverConfig.ControlConfig), - spegel.NewAgentBootstrapper(cfg.ServerURL, token, agentConfig.DataDir), - spegel.NewSelfBootstrapper(), - ) - conf.HandlerFunc = func(_ *spegel.Config, router *mux.Router) error { - router.NotFoundHandler = serverConfig.ControlConfig.Runtime.Handler - serverConfig.ControlConfig.Runtime.Handler = router - return nil - } - conf.AuthFunc = func() authenticator.Request { - return serverConfig.ControlConfig.Runtime.Authenticator - } - } - return agent.Run(ctx, agentConfig) } diff --git a/pkg/cluster/https.go b/pkg/cluster/https.go index 78dcfa0623..1b25d321bc 100644 --- a/pkg/cluster/https.go +++ b/pkg/cluster/https.go @@ -4,17 +4,16 @@ import ( "context" "crypto/tls" "errors" - "fmt" "io" "log" "net" "net/http" - "net/http/pprof" "os" "path/filepath" + "strconv" - "github.com/gorilla/mux" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" "github.com/rancher/dynamiclistener" "github.com/rancher/dynamiclistener/factory" @@ -24,7 +23,6 @@ import ( "github.com/rancher/wrangler/v3/pkg/generated/controllers/core" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilsnet "k8s.io/utils/net" ) // newListener returns a new TCP listener and HTTP request handler using dynamiclistener. @@ -43,11 +41,7 @@ func (c *Cluster) newListener(ctx context.Context) (net.Listener, http.Handler, os.Remove(filepath.Join(c.config.DataDir, "tls/dynamic-cert.json")) } } - ip := c.config.BindAddress - if utilsnet.IsIPv6String(ip) { - ip = fmt.Sprintf("[%s]", ip) - } - tcp, err := dynamiclistener.NewTCPListener(ip, c.config.SupervisorPort) + tcp, err := util.ListenWithLoopback(ctx, c.config.BindAddress, strconv.Itoa(c.config.SupervisorPort)) if err != nil { return nil, nil, err } @@ -114,17 +108,6 @@ func (c *Cluster) initClusterAndHTTPS(ctx context.Context) error { return err } - if c.config.EnablePProf { - mux := mux.NewRouter().SkipClean(true) - mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - mux.HandleFunc("/debug/pprof/profile", pprof.Profile) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - mux.HandleFunc("/debug/pprof/trace", pprof.Trace) - mux.PathPrefix("/debug/pprof/").HandlerFunc(pprof.Index) - mux.NotFoundHandler = handler - handler = mux - } - // Create a HTTP server with the registered request handlers, using logrus for logging server := http.Server{ Handler: handler, diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index 8d9a361a43..273eb40060 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -41,6 +41,8 @@ type Node struct { ImageServiceEndpoint string NoFlannel bool SELinux bool + EnablePProf bool + SupervisorMetrics bool EmbeddedRegistry bool FlannelBackend string FlannelConfFile string @@ -128,6 +130,8 @@ type Agent struct { AirgapExtraRegistry []string DisableCCM bool DisableNPC bool + MinTLSVersion string + CipherSuites []string Rootless bool ProtectKernelDefaults bool DisableServiceLB bool @@ -159,6 +163,7 @@ type CriticalControlArgs struct { EgressSelectorMode string `cli:"egress-selector-mode"` ServiceIPRange *net.IPNet `cli:"service-cidr"` ServiceIPRanges []*net.IPNet `cli:"service-cidr"` + SupervisorMetrics bool `cli:"supervisor-metrics"` } type Control struct { @@ -191,7 +196,6 @@ type Control struct { DisableServiceLB bool Rootless bool ServiceLBNamespace string - EnablePProf bool ExtraAPIArgs []string ExtraControllerArgs []string ExtraCloudControllerArgs []string @@ -208,8 +212,10 @@ type Control struct { ClusterResetRestorePath string EncryptForce bool EncryptSkip bool - TLSMinVersion uint16 - TLSCipherSuites []uint16 + MinTLSVersion string + CipherSuites []string + TLSMinVersion uint16 `json:"-"` + TLSCipherSuites []uint16 `json:"-"` EtcdSnapshotName string `json:"-"` EtcdDisableSnapshots bool `json:"-"` EtcdExposeMetrics bool `json:"-"` diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go new file mode 100644 index 0000000000..4ebd1d5292 --- /dev/null +++ b/pkg/metrics/metrics.go @@ -0,0 +1,45 @@ +package metrics + +import ( + "context" + "errors" + + "github.com/gorilla/mux" + "github.com/k3s-io/k3s/pkg/agent/https" + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/prometheus/client_golang/prometheus/promhttp" + "k8s.io/component-base/metrics/legacyregistry" +) + +// DefaultRegisterer is the implementation of the +// prometheus Registerer interface that all metrics operations +// will use. +var DefaultRegisterer = legacyregistry.Registerer() + +// DefaultGatherer is the implementation of the +// prometheus Gatherere interface that all metrics operations +// will use. +var DefaultGatherer = legacyregistry.DefaultGatherer + +// DefaultMetrics is the default instance of a Metrics server +var DefaultMetrics = &Config{ + Router: func(context.Context, *config.Node) (*mux.Router, error) { + return nil, errors.New("not implemented") + }, +} + +// Config holds fields for the metrics listener +type Config struct { + // Router will be called to add the metrics API handler to an existing router. + Router https.RouterFunc +} + +// Start starts binds the metrics API to an existing HTTP router. +func (c *Config) Start(ctx context.Context, nodeConfig *config.Node) error { + mRouter, err := c.Router(ctx, nodeConfig) + if err != nil { + return err + } + mRouter.Handle("/metrics", promhttp.HandlerFor(DefaultGatherer, promhttp.HandlerOpts{})) + return nil +} diff --git a/pkg/profile/profile.go b/pkg/profile/profile.go new file mode 100644 index 0000000000..39c3929a58 --- /dev/null +++ b/pkg/profile/profile.go @@ -0,0 +1,38 @@ +package profile + +import ( + "context" + "errors" + "net/http/pprof" + + "github.com/gorilla/mux" + "github.com/k3s-io/k3s/pkg/agent/https" + "github.com/k3s-io/k3s/pkg/daemons/config" +) + +// DefaultProfiler the default instance of a performance profiling server +var DefaultProfiler = &Config{ + Router: func(context.Context, *config.Node) (*mux.Router, error) { + return nil, errors.New("not implemented") + }, +} + +// Config holds fields for the pprof listener +type Config struct { + // Router will be called to add the pprof API handler to an existing router. + Router https.RouterFunc +} + +// Start starts binds the pprof API to an existing HTTP router. +func (c *Config) Start(ctx context.Context, nodeConfig *config.Node) error { + mRouter, err := c.Router(ctx, nodeConfig) + if err != nil { + return err + } + mRouter.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + mRouter.HandleFunc("/debug/pprof/profile", pprof.Profile) + mRouter.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + mRouter.HandleFunc("/debug/pprof/trace", pprof.Trace) + mRouter.PathPrefix("/debug/pprof/").HandlerFunc(pprof.Index) + return nil +} diff --git a/pkg/spegel/spegel.go b/pkg/spegel/spegel.go index e9dbd11924..785a31c915 100644 --- a/pkg/spegel/spegel.go +++ b/pkg/spegel/spegel.go @@ -13,13 +13,12 @@ import ( "time" "github.com/containerd/containerd/remotes/docker" + "github.com/k3s-io/k3s/pkg/agent/https" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/version" "github.com/rancher/dynamiclistener/cert" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apiserver/pkg/authentication/authenticator" - "k8s.io/apiserver/pkg/authentication/request/union" "k8s.io/utils/ptr" "github.com/go-logr/logr" @@ -43,11 +42,8 @@ import ( // DefaultRegistry is the default instance of a Spegel distributed registry var DefaultRegistry = &Config{ Bootstrapper: NewSelfBootstrapper(), - HandlerFunc: func(_ *Config, _ *mux.Router) error { - return errors.New("not implemented") - }, - AuthFunc: func() authenticator.Request { - return union.New(nil) + Router: func(context.Context, *config.Node) (*mux.Router, error) { + return nil, errors.New("not implemented") }, } @@ -60,9 +56,6 @@ var ( resolveLatestTag = false ) -type authFunc func() authenticator.Request -type handlerFunc func(config *Config, router *mux.Router) error - // Config holds fields for a distributed registry type Config struct { ClientCAFile string @@ -89,10 +82,7 @@ type Config struct { Bootstrapper routing.Bootstrapper // HandlerFunc will be called to add the registry API handler to an existing router. - HandlerFunc handlerFunc - - // Authenticator will be called to retrieve an authenticator used to validate the request to the registry API. - AuthFunc authFunc + Router https.RouterFunc } // These values are not currently configurable @@ -237,13 +227,12 @@ func (c *Config) Start(ctx context.Context, nodeConfig *config.Node) error { // Track images available in containerd and publish via p2p router go state.Track(ctx, ociClient, router, resolveLatestTag) - mRouter := mux.NewRouter().SkipClean(true) - mRouter.Use(c.authMiddleware()) - mRouter.PathPrefix("/v2").Handler(regSvr.Handler) - mRouter.PathPrefix("/v1-" + version.Program + "/p2p").Handler(c.peerInfo()) - if err := c.HandlerFunc(c, mRouter); err != nil { + mRouter, err := c.Router(ctx, nodeConfig) + if err != nil { return err } + mRouter.PathPrefix("/v2").Handler(regSvr.Handler) + mRouter.PathPrefix("/v1-" + version.Program + "/p2p").Handler(c.peerInfo()) // Wait up to 5 seconds for the p2p network to find peers. This will return // immediately if the node is bootstrapping from itself. @@ -269,16 +258,3 @@ func (c *Config) peerInfo() http.HandlerFunc { fmt.Fprintf(resp, "%s/p2p/%s", info.Addrs[0].String(), info.ID.String()) }) } - -// authMiddleware calls the configured authenticator to gate access to the registry API -func (c *Config) authMiddleware() mux.MiddlewareFunc { - return func(next http.Handler) http.Handler { - return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if _, ok, err := c.AuthFunc().AuthenticateRequest(req); !ok || err != nil { - http.Error(resp, "Unauthorized", http.StatusUnauthorized) - return - } - next.ServeHTTP(resp, req) - }) - } -} diff --git a/pkg/util/net.go b/pkg/util/net.go index 7bc9f2ec4d..76304c584a 100644 --- a/pkg/util/net.go +++ b/pkg/util/net.go @@ -1,12 +1,15 @@ package util import ( + "context" "errors" "fmt" "net" "os" "strings" + "time" + "github.com/rancher/wrangler/v3/pkg/merr" "github.com/sirupsen/logrus" "github.com/urfave/cli" apinet "k8s.io/apimachinery/pkg/util/net" @@ -319,3 +322,111 @@ func getIPFromInterface(ifaceName string) (string, error) { return "", fmt.Errorf("can't find ip for interface %s", ifaceName) } + +type multiListener struct { + listeners []net.Listener + closing chan struct{} + conns chan acceptRes +} + +type acceptRes struct { + conn net.Conn + err error +} + +// explicit interface check +var _ net.Listener = &multiListener{} + +var loopbacks = []string{"127.0.0.1", "::1"} + +// ListenWithLoopback listens on the given address, as well as on IPv4 and IPv6 loopback addresses. +// If the address is a wildcard, the listener is return unwrapped. +func ListenWithLoopback(ctx context.Context, addr string, port string) (net.Listener, error) { + lc := &net.ListenConfig{ + KeepAlive: 3 * time.Minute, + Control: permitReuse, + } + l, err := lc.Listen(ctx, "tcp", net.JoinHostPort(addr, port)) + if err != nil { + return nil, err + } + + // If we're listening on a wildcard address, we don't need to wrap with the other loopback addresses + switch addr { + case "", "::", "0.0.0.0": + return l, nil + } + + ml := &multiListener{ + listeners: []net.Listener{l}, + closing: make(chan struct{}), + conns: make(chan acceptRes), + } + + for _, laddr := range loopbacks { + if laddr == addr { + continue + } + if l, err := lc.Listen(ctx, "tcp", net.JoinHostPort(laddr, port)); err == nil { + ml.listeners = append(ml.listeners, l) + } else { + logrus.Debugf("Failed to listen on %s: %v", net.JoinHostPort(laddr, port), err) + } + } + + for i := range ml.listeners { + go ml.accept(ml.listeners[i]) + } + + return ml, nil +} + +// Addr returns the address of the non-loopback address that this multiListener is listening on +func (ml *multiListener) Addr() net.Addr { + return ml.listeners[0].Addr() +} + +// Close closes all the listeners +func (ml *multiListener) Close() error { + close(ml.closing) + var errs merr.Errors + for i := range ml.listeners { + err := ml.listeners[i].Close() + if err != nil { + errs = append(errs, err) + } + } + return merr.NewErrors(errs) +} + +// Accept returns a Conn/err pair from one of the waiting listeners +func (ml *multiListener) Accept() (net.Conn, error) { + select { + case res, ok := <-ml.conns: + if ok { + return res.conn, res.err + } + return nil, fmt.Errorf("connection channel closed") + case <-ml.closing: + return nil, fmt.Errorf("listener closed") + } +} + +// accept runs a loop, accepting connections and trying to send on the result channel +func (ml *multiListener) accept(listener net.Listener) { + for { + conn, err := listener.Accept() + r := acceptRes{ + conn: conn, + err: err, + } + select { + case ml.conns <- r: + case <-ml.closing: + if r.err == nil { + r.conn.Close() + } + return + } + } +} diff --git a/pkg/util/net_unix.go b/pkg/util/net_unix.go new file mode 100644 index 0000000000..521e577cd1 --- /dev/null +++ b/pkg/util/net_unix.go @@ -0,0 +1,18 @@ +//go:build !windows +// +build !windows + +package util + +import ( + "syscall" + + "golang.org/x/sys/unix" +) + +// permitReuse enables port and address sharing on the socket +func permitReuse(network, addr string, conn syscall.RawConn) error { + return conn.Control(func(fd uintptr) { + syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, unix.SO_REUSEPORT, 1) + syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, unix.SO_REUSEADDR, 1) + }) +} diff --git a/pkg/util/net_windows.go b/pkg/util/net_windows.go new file mode 100644 index 0000000000..bb895c095a --- /dev/null +++ b/pkg/util/net_windows.go @@ -0,0 +1,11 @@ +//go:build windows +// +build windows + +package util + +import "syscall" + +// permitReuse is a no-op; port and address reuse is not supported on Windows +func permitReuse(network, addr string, conn syscall.RawConn) error { + return nil +} From f8e0648304a99cd5ea6a0653250feeb0514c5a26 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 25 Apr 2024 23:49:47 +0000 Subject: [PATCH 06/20] Convert remaining http handlers over to use util.SendError Signed-off-by: Brad Davidson --- pkg/cluster/router.go | 12 +++++----- pkg/daemons/control/tunnel.go | 3 +-- pkg/etcd/etcd.go | 4 ++-- pkg/server/cert.go | 4 ++-- pkg/server/router.go | 41 +++++++++-------------------------- pkg/server/secrets-encrypt.go | 13 ++--------- pkg/server/token.go | 7 +++--- pkg/spegel/bootstrap.go | 3 ++- pkg/util/apierrors.go | 1 + 9 files changed, 30 insertions(+), 58 deletions(-) diff --git a/pkg/cluster/router.go b/pkg/cluster/router.go index 4fe2694a72..39dc5e2164 100644 --- a/pkg/cluster/router.go +++ b/pkg/cluster/router.go @@ -1,7 +1,10 @@ package cluster import ( + "fmt" "net/http" + + "github.com/k3s-io/k3s/pkg/util" ) // getHandler returns a basic request handler that processes requests through @@ -19,11 +22,10 @@ func (c *Cluster) getHandler(handler http.Handler) (http.Handler, error) { // if no additional handlers are available. func (c *Cluster) router() http.Handler { return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { - if c.config.Runtime.Handler == nil { - http.Error(rw, "starting", http.StatusServiceUnavailable) - return + if c.config.Runtime.Handler != nil { + c.config.Runtime.Handler.ServeHTTP(rw, req) + } else { + util.SendError(fmt.Errorf("starting"), rw, req, http.StatusServiceUnavailable) } - - c.config.Runtime.Handler.ServeHTTP(rw, req) }) } diff --git a/pkg/daemons/control/tunnel.go b/pkg/daemons/control/tunnel.go index 3c4b2d54ce..86c685318b 100644 --- a/pkg/daemons/control/tunnel.go +++ b/pkg/daemons/control/tunnel.go @@ -29,8 +29,7 @@ var defaultDialer = net.Dialer{} func loggingErrorWriter(rw http.ResponseWriter, req *http.Request, code int, err error) { logrus.Debugf("Tunnel server error: %d %v", code, err) - rw.WriteHeader(code) - rw.Write([]byte(err.Error())) + util.SendError(err, rw, req, code) } func setupTunnel(ctx context.Context, cfg *config.Control) (http.Handler, error) { diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index c34f1396ff..d92ae900d8 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -761,7 +761,7 @@ func getEndpoints(control *config.Control) []string { // for use by etcd. func toTLSConfig(runtime *config.ControlRuntime) (*tls.Config, error) { if runtime.ClientETCDCert == "" || runtime.ClientETCDKey == "" || runtime.ETCDServerCA == "" { - return nil, errors.New("runtime is not ready yet") + return nil, util.ErrCoreNotReady } clientCert, err := tls.LoadX509KeyPair(runtime.ClientETCDCert, runtime.ClientETCDKey) @@ -1177,7 +1177,7 @@ func (e *ETCD) manageLearners(ctx context.Context) { func (e *ETCD) getETCDNodes() ([]*v1.Node, error) { if e.config.Runtime.Core == nil { - return nil, errors.New("runtime core not ready") + return nil, util.ErrCoreNotReady } nodes := e.config.Runtime.Core.Core().V1().Node() diff --git a/pkg/server/cert.go b/pkg/server/cert.go index b7c4d2f9ae..0fcdf0efe3 100644 --- a/pkg/server/cert.go +++ b/pkg/server/cert.go @@ -30,8 +30,8 @@ import ( func caCertReplaceHandler(server *config.Control) http.HandlerFunc { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil || req.Method != http.MethodPut { - resp.WriteHeader(http.StatusNotFound) + if req.Method != http.MethodPut { + util.SendError(fmt.Errorf("method not allowed"), resp, req, http.StatusMethodNotAllowed) return } force, _ := strconv.ParseBool(req.FormValue("force")) diff --git a/pkg/server/router.go b/pkg/server/router.go index d2dba1f878..98ed472963 100644 --- a/pkg/server/router.go +++ b/pkg/server/router.go @@ -200,11 +200,6 @@ func getCACertAndKeys(caCertFile, caKeyFile, signingKeyFile string) ([]*x509.Cer func servingKubeletCert(server *config.Control, keyFile string, auth nodePassBootstrapper) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil { - resp.WriteHeader(http.StatusNotFound) - return - } - nodeName, errCode, err := auth(req) if err != nil { util.SendError(err, resp, req, errCode) @@ -256,11 +251,6 @@ func servingKubeletCert(server *config.Control, keyFile string, auth nodePassBoo func clientKubeletCert(server *config.Control, keyFile string, auth nodePassBootstrapper) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil { - resp.WriteHeader(http.StatusNotFound) - return - } - nodeName, errCode, err := auth(req) if err != nil { util.SendError(err, resp, req, errCode) @@ -296,10 +286,6 @@ func clientKubeletCert(server *config.Control, keyFile string, auth nodePassBoot func fileHandler(fileName ...string) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil { - resp.WriteHeader(http.StatusNotFound) - return - } resp.Header().Set("Content-Type", "text/plain") if len(fileName) == 1 { @@ -310,8 +296,7 @@ func fileHandler(fileName ...string) http.Handler { for _, f := range fileName { bytes, err := os.ReadFile(f) if err != nil { - logrus.Errorf("Failed to read %s: %v", f, err) - resp.WriteHeader(http.StatusInternalServerError) + util.SendError(errors.Wrapf(err, "failed to read %s", f), resp, req, http.StatusInternalServerError) return } resp.Write(bytes) @@ -336,18 +321,13 @@ func apiserversHandler(server *config.Control) http.Handler { resp.Header().Set("content-type", "application/json") if err := json.NewEncoder(resp).Encode(endpoints); err != nil { - logrus.Errorf("Failed to encode apiserver endpoints: %v", err) - resp.WriteHeader(http.StatusInternalServerError) + util.SendError(errors.Wrap(err, "failed to encode apiserver endpoints"), resp, req, http.StatusInternalServerError) } }) } func configHandler(server *config.Control, cfg *cmds.Server) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil { - resp.WriteHeader(http.StatusNotFound) - return - } // Startup hooks may read and modify cmds.Server in a goroutine, but as these are copied into // config.Control before the startup hooks are called, any modifications need to be sync'd back // into the struct before it is sent to agents. @@ -355,23 +335,21 @@ func configHandler(server *config.Control, cfg *cmds.Server) http.Handler { server.DisableKubeProxy = cfg.DisableKubeProxy resp.Header().Set("content-type", "application/json") if err := json.NewEncoder(resp).Encode(server); err != nil { - logrus.Errorf("Failed to encode agent config: %v", err) - resp.WriteHeader(http.StatusInternalServerError) + util.SendError(errors.Wrap(err, "failed to encode agent config"), resp, req, http.StatusInternalServerError) } }) } func readyzHandler(server *config.Control) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - code := http.StatusOK - data := []byte("ok") if server.Runtime.Core == nil { - code = http.StatusInternalServerError - data = []byte("runtime core not ready") + util.SendError(util.ErrCoreNotReady, resp, req, http.StatusServiceUnavailable) + return } - resp.WriteHeader(code) + data := []byte("ok") + resp.WriteHeader(http.StatusOK) resp.Header().Set("Content-Type", "text/plain") - resp.Header().Set("Content-length", strconv.Itoa(len(data))) + resp.Header().Set("Content-Length", strconv.Itoa(len(data))) resp.Write(data) }) } @@ -379,6 +357,7 @@ func readyzHandler(server *config.Control) http.Handler { func ping() http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { data := []byte("pong") + resp.WriteHeader(http.StatusOK) resp.Header().Set("Content-Type", "text/plain") resp.Header().Set("Content-Length", strconv.Itoa(len(data))) resp.Write(data) @@ -432,7 +411,7 @@ func passwordBootstrap(ctx context.Context, config *Config) nodePassBootstrapper return verifyRemotePassword(ctx, config, &mu, deferredNodes, node) } else { // Otherwise, reject the request until the core is ready. - return "", http.StatusServiceUnavailable, errors.New("runtime core not ready") + return "", http.StatusServiceUnavailable, util.ErrCoreNotReady } } diff --git a/pkg/server/secrets-encrypt.go b/pkg/server/secrets-encrypt.go index ebf1a4c1d0..3172ae8970 100644 --- a/pkg/server/secrets-encrypt.go +++ b/pkg/server/secrets-encrypt.go @@ -56,10 +56,6 @@ func getEncryptionRequest(req *http.Request) (*EncryptionRequest, error) { func encryptionStatusHandler(server *config.Control) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil { - resp.WriteHeader(http.StatusNotFound) - return - } status, err := encryptionStatus(server) if err != nil { util.SendErrorWithID(err, "secret-encrypt", resp, req, http.StatusInternalServerError) @@ -160,18 +156,13 @@ func encryptionEnable(ctx context.Context, server *config.Control, enable bool) func encryptionConfigHandler(ctx context.Context, server *config.Control) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil { - resp.WriteHeader(http.StatusNotFound) - return - } if req.Method != http.MethodPut { - resp.WriteHeader(http.StatusBadRequest) + util.SendError(fmt.Errorf("method not allowed"), resp, req, http.StatusMethodNotAllowed) return } encryptReq, err := getEncryptionRequest(req) if err != nil { - resp.WriteHeader(http.StatusBadRequest) - resp.Write([]byte(err.Error())) + util.SendError(err, resp, req, http.StatusBadRequest) return } if encryptReq.Stage != nil { diff --git a/pkg/server/token.go b/pkg/server/token.go index c5da332fa6..efd095013f 100644 --- a/pkg/server/token.go +++ b/pkg/server/token.go @@ -32,16 +32,15 @@ func getServerTokenRequest(req *http.Request) (TokenRotateRequest, error) { func tokenRequestHandler(ctx context.Context, server *config.Control) http.Handler { return http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { - if req.TLS == nil || req.Method != http.MethodPut { - resp.WriteHeader(http.StatusBadRequest) + if req.Method != http.MethodPut { + util.SendError(fmt.Errorf("method not allowed"), resp, req, http.StatusMethodNotAllowed) return } var err error sTokenReq, err := getServerTokenRequest(req) logrus.Debug("Received token request") if err != nil { - resp.WriteHeader(http.StatusBadRequest) - resp.Write([]byte(err.Error())) + util.SendError(err, resp, req, http.StatusBadRequest) return } if err = tokenRotate(ctx, server, *sTokenReq.NewToken); err != nil { diff --git a/pkg/spegel/bootstrap.go b/pkg/spegel/bootstrap.go index e50dd25551..3263bc65c6 100644 --- a/pkg/spegel/bootstrap.go +++ b/pkg/spegel/bootstrap.go @@ -10,6 +10,7 @@ import ( "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" "github.com/libp2p/go-libp2p/core/peer" "github.com/pkg/errors" @@ -133,7 +134,7 @@ func (s *serverBootstrapper) Run(_ context.Context, id string) error { func (s *serverBootstrapper) Get() (addrInfo *peer.AddrInfo, err error) { if s.controlConfig.Runtime.Core == nil { - return nil, errors.New("runtime core not ready") + return nil, util.ErrCoreNotReady } nodeName := os.Getenv("NODE_NAME") if nodeName == "" { diff --git a/pkg/util/apierrors.go b/pkg/util/apierrors.go index 2edca11139..ec61ecea54 100644 --- a/pkg/util/apierrors.go +++ b/pkg/util/apierrors.go @@ -17,6 +17,7 @@ import ( var ErrAPINotReady = errors.New("apiserver not ready") var ErrAPIDisabled = errors.New("apiserver disabled") +var ErrCoreNotReady = errors.New("runtime core not ready") // SendErrorWithID sends and logs a random error ID so that logs can be correlated // between the REST API (which does not provide any detailed error output, to avoid From 2eca3f1e2c0d5330e00b03ae197402604f1e0348 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 13 May 2024 21:23:49 +0000 Subject: [PATCH 07/20] Update golangci-lint to stop using deprecated skip files/dirs Signed-off-by: Brad Davidson --- .golangci.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.golangci.json b/.golangci.json index 88ab8ef6d9..28a4b5daf0 100644 --- a/.golangci.json +++ b/.golangci.json @@ -10,7 +10,10 @@ ] }, "run": { - "skip-dirs": [ + "deadline": "5m" + }, + "issues": { + "exclude-dirs": [ "build", "contrib", "manifests", @@ -18,12 +21,9 @@ "scripts", "vendor" ], - "skip-files": [ + "exclude-files": [ "/zz_generated_" ], - "deadline": "5m" - }, - "issues": { "exclude-rules": [ { "linters": "typecheck", @@ -43,4 +43,4 @@ } ] } -} \ No newline at end of file +} From de4cda57e655136930d3feb3e5cab38e95ce68ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 09:16:10 +0000 Subject: [PATCH 08/20] Bump alpine from 3.18 to 3.20 in /conformance Bumps alpine from 3.18 to 3.20. --- updated-dependencies: - dependency-name: alpine dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- conformance/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conformance/Dockerfile b/conformance/Dockerfile index 4401984c01..e3bf6b65f0 100644 --- a/conformance/Dockerfile +++ b/conformance/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.18 +FROM alpine:3.20 ENV SONOBUOY_VERSION 0.57.1 RUN apk add curl tar gzip RUN curl -sfL https://github.com/vmware-tanzu/sonobuoy/releases/download/v${SONOBUOY_VERSION}/sonobuoy_${SONOBUOY_VERSION}_linux_amd64.tar.gz | tar xvzf - -C /usr/bin From 86875c97bb1b9dd35e51e774bc9f226ccccec4e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 09:46:18 +0000 Subject: [PATCH 09/20] Bump alpine from 3.18 to 3.20 in /package Bumps alpine from 3.18 to 3.20. --- updated-dependencies: - dependency-name: alpine dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- package/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/Dockerfile b/package/Dockerfile index 60c03619ef..7fd7a9875d 100644 --- a/package/Dockerfile +++ b/package/Dockerfile @@ -1,4 +1,4 @@ -FROM alpine:3.18 as base +FROM alpine:3.20 as base RUN apk add -U ca-certificates tar zstd tzdata COPY build/out/data.tar.zst / RUN mkdir -p /image/etc/ssl/certs /image/run /image/var/run /image/tmp /image/lib/modules /image/lib/firmware && \ From 84b578ec74dbdf62d27171bb4ef1e60ec057def9 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 29 May 2024 00:33:57 +0000 Subject: [PATCH 10/20] Use busybox tar to avoid issues with fchmodat2 on arm Signed-off-by: Brad Davidson --- package/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/Dockerfile b/package/Dockerfile index 7fd7a9875d..193d6ce32c 100644 --- a/package/Dockerfile +++ b/package/Dockerfile @@ -1,8 +1,8 @@ FROM alpine:3.20 as base -RUN apk add -U ca-certificates tar zstd tzdata +RUN apk add -U ca-certificates zstd tzdata COPY build/out/data.tar.zst / RUN mkdir -p /image/etc/ssl/certs /image/run /image/var/run /image/tmp /image/lib/modules /image/lib/firmware && \ - tar -xa -C /image -f /data.tar.zst && \ + zstdcat -d /data.tar.zst | tar -xa -C /image && \ echo "root:x:0:0:root:/:/bin/sh" > /image/etc/passwd && \ echo "root:x:0:" > /image/etc/group && \ cp /etc/ssl/certs/ca-certificates.crt /image/etc/ssl/certs/ca-certificates.crt From 4cb4542c3a7002fc254c71bac92fd1d6d26f776f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 16:15:38 +0000 Subject: [PATCH 11/20] Bump ubuntu from 22.04 to 24.04 in /tests/e2e/scripts Bumps ubuntu from 22.04 to 24.04. --- updated-dependencies: - dependency-name: ubuntu dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- tests/e2e/scripts/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/scripts/Dockerfile b/tests/e2e/scripts/Dockerfile index f6beedbdae..acb0abe528 100644 --- a/tests/e2e/scripts/Dockerfile +++ b/tests/e2e/scripts/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 +FROM ubuntu:24.04 ARG EXTERNAL_ENCODED_VPN ARG VPN_ENCODED_LOGIN From f2e7c01acfdc5f51bfd007c44bfe6605e8864975 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 26 May 2024 18:10:03 +0000 Subject: [PATCH 12/20] chore: Bump Trivy version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Made with ❤️️ by updatecli --- Dockerfile.dapper | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.dapper b/Dockerfile.dapper index 2384cac535..39dda48855 100644 --- a/Dockerfile.dapper +++ b/Dockerfile.dapper @@ -22,7 +22,7 @@ RUN apk -U --no-cache add \ RUN python3 -m pip install awscli # Install Trivy -ENV TRIVY_VERSION="0.50.1" +ENV TRIVY_VERSION="0.51.4" RUN case "$(go env GOARCH)" in \ arm64) TRIVY_ARCH="ARM64" ;; \ amd64) TRIVY_ARCH="64bit" ;; \ From ed23a2bb48d4c02321fe0e56890aef90e8299746 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 6 May 2024 19:43:37 +0000 Subject: [PATCH 13/20] Fix netpol crash when node remains tained unintialized It is concievable that users might take more than 60 seconds to deploy their own cloud-provider. Instead of exiting, we should wait forever, but with more logging to indicate what's being waited on. Signed-off-by: Brad Davidson --- pkg/agent/netpol/netpol.go | 15 +++++++-------- pkg/etcd/metadata_controller.go | 4 ++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pkg/agent/netpol/netpol.go b/pkg/agent/netpol/netpol.go index 60d2c1f07f..f09d47d11e 100644 --- a/pkg/agent/netpol/netpol.go +++ b/pkg/agent/netpol/netpol.go @@ -67,27 +67,26 @@ func Run(ctx context.Context, nodeConfig *config.Node) error { return err } - // As kube-router netpol requires addresses to be available in the node object - // Wait until the node has ready addresses to avoid race conditions (max 1 minute). + // kube-router netpol requires addresses to be available in the node object. + // Wait until the uninitialized taint has been removed, at which point the addresses should be set. // TODO: Replace with non-deprecated PollUntilContextTimeout when our and Kubernetes code migrate to it - if err := wait.PollImmediateWithContext(ctx, 2*time.Second, 60*time.Second, func(ctx context.Context) (bool, error) { + if err := wait.PollImmediateInfiniteWithContext(ctx, 2*time.Second, func(ctx context.Context) (bool, error) { // Get the node object node, err := client.CoreV1().Nodes().Get(ctx, nodeConfig.AgentConfig.NodeName, metav1.GetOptions{}) if err != nil { - logrus.Debugf("Network policy controller waiting to get Node %s: %v", nodeConfig.AgentConfig.NodeName, err) + logrus.Infof("Network policy controller waiting to get Node %s: %v", nodeConfig.AgentConfig.NodeName, err) return false, nil } - // Check for the uninitialized taint that should be removed by cloud-provider - // If there is no cloud-provider, the taint will not be there + // Check for the taint that should be removed by cloud-provider when the node has been initialized. for _, taint := range node.Spec.Taints { if taint.Key == cloudproviderapi.TaintExternalCloudProvider { - logrus.Debugf("Network policy controller waiting for removal of %s taint", cloudproviderapi.TaintExternalCloudProvider) + logrus.Infof("Network policy controller waiting for removal of %s taint", cloudproviderapi.TaintExternalCloudProvider) return false, nil } } return true, nil }); err != nil { - return errors.Wrapf(err, "network policy controller timed out waiting for %s taint to be removed from Node %s", cloudproviderapi.TaintExternalCloudProvider, nodeConfig.AgentConfig.NodeName) + return errors.Wrapf(err, "network policy controller failed to wait for %s taint to be removed from Node %s", cloudproviderapi.TaintExternalCloudProvider, nodeConfig.AgentConfig.NodeName) } krConfig := options.NewKubeRouterConfig() diff --git a/pkg/etcd/metadata_controller.go b/pkg/etcd/metadata_controller.go index 0f4599a78f..71ec165feb 100644 --- a/pkg/etcd/metadata_controller.go +++ b/pkg/etcd/metadata_controller.go @@ -13,7 +13,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/util/retry" - nodeUtil "k8s.io/kubernetes/pkg/controller/util/node" + nodeutil "k8s.io/kubernetes/pkg/controller/util/node" ) func registerMetadataHandlers(ctx context.Context, etcd *ETCD) { @@ -109,7 +109,7 @@ func (m *metadataHandler) handleSelf(node *v1.Node) (*v1.Node, error) { node.Labels = map[string]string{} } - if find, _ := nodeUtil.GetNodeCondition(&node.Status, etcdStatusType); find >= 0 { + if find, _ := nodeutil.GetNodeCondition(&node.Status, etcdStatusType); find >= 0 { node.Status.Conditions = append(node.Status.Conditions[:find], node.Status.Conditions[find+1:]...) } From 307f07bd61f42c6191216585099d5ef849bde33a Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 29 May 2024 18:17:29 +0000 Subject: [PATCH 14/20] Fix issue caused by sole server marked as failed under load If health checks are failing for all servers, make a second pass through the server list with health-checks ignored before returning failure Signed-off-by: Brad Davidson --- pkg/agent/loadbalancer/loadbalancer.go | 9 +++++++-- pkg/agent/loadbalancer/servers.go | 10 ++++++++-- pkg/etcd/etcdproxy.go | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pkg/agent/loadbalancer/loadbalancer.go b/pkg/agent/loadbalancer/loadbalancer.go index 2348fcb087..feddb4d872 100644 --- a/pkg/agent/loadbalancer/loadbalancer.go +++ b/pkg/agent/loadbalancer/loadbalancer.go @@ -158,6 +158,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net lb.mutex.RLock() defer lb.mutex.RUnlock() + var allChecksFailed bool startIndex := lb.nextServerIndex for { targetServer := lb.currentServerAddress @@ -165,7 +166,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net server := lb.servers[targetServer] if server == nil || targetServer == "" { logrus.Debugf("Nil server for load balancer %s: %s", lb.serviceName, targetServer) - } else if server.healthCheck() { + } else if allChecksFailed || server.healthCheck() { conn, err := server.dialContext(ctx, network, targetServer) if err == nil { return conn, nil @@ -189,7 +190,11 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net startIndex = maxIndex } if lb.nextServerIndex == startIndex { - return nil, errors.New("all servers failed") + if allChecksFailed { + return nil, errors.New("all servers failed") + } + logrus.Debugf("Health checks for all servers in load balancer %s have failed: retrying with health checks ignored", lb.serviceName) + allChecksFailed = true } } } diff --git a/pkg/agent/loadbalancer/servers.go b/pkg/agent/loadbalancer/servers.go index 78ee88d74f..7dc80e4932 100644 --- a/pkg/agent/loadbalancer/servers.go +++ b/pkg/agent/loadbalancer/servers.go @@ -227,13 +227,19 @@ func (lb *LoadBalancer) SetHealthCheck(address string, healthCheck func() bool) // runHealthChecks periodically health-checks all servers. Any servers that fail the health-check will have their // connections closed, to force clients to switch over to a healthy server. func (lb *LoadBalancer) runHealthChecks(ctx context.Context) { + previousStatus := map[string]bool{} wait.Until(func() { lb.mutex.RLock() defer lb.mutex.RUnlock() - for _, server := range lb.servers { - if !server.healthCheck() { + for address, server := range lb.servers { + status := server.healthCheck() + if status == false && previousStatus[address] == true { + // Only close connections when the server transitions from healthy to unhealthy; + // we don't want to re-close all the connections every time as we might be ignoring + // health checks due to all servers being marked unhealthy. defer server.closeAll() } + previousStatus[address] = status } }, time.Second, ctx.Done()) logrus.Debugf("Stopped health checking for load balancer %s", lb.serviceName) diff --git a/pkg/etcd/etcdproxy.go b/pkg/etcd/etcdproxy.go index 40bee876b1..55918850b3 100644 --- a/pkg/etcd/etcdproxy.go +++ b/pkg/etcd/etcdproxy.go @@ -130,7 +130,7 @@ func (e etcdproxy) createHealthCheck(ctx context.Context, address string) func() statusCode = resp.StatusCode } if err != nil || statusCode != http.StatusOK { - logrus.Debugf("Health check %s failed: %v (StatusCode: %d)", url, err, statusCode) + logrus.Debugf("Health check %s failed: %v (StatusCode: %d)", address, err, statusCode) connected = false } else { connected = true From 7a0ea3c9539c9ac8efb560de32395c9611db7969 Mon Sep 17 00:00:00 2001 From: Katherine Door Date: Fri, 31 May 2024 08:45:34 +0200 Subject: [PATCH 15/20] Add write-kubeconfig-group flag to server (#9233) * Add write-kubeconfig-group flag to server * update kubectl unable to read config message for kubeconfig mode/group Signed-off-by: Katherine Pata --- pkg/cli/cmds/server.go | 7 +++++++ pkg/cli/server/server.go | 1 + pkg/daemons/config/types.go | 1 + pkg/kubectl/main.go | 3 ++- pkg/server/server.go | 7 +++++++ pkg/util/file.go | 23 +++++++++++++++++++++++ 6 files changed, 41 insertions(+), 1 deletion(-) diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index cfb9349fbb..e179f5237d 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -45,6 +45,7 @@ type Server struct { DisableAgent bool KubeConfigOutput string KubeConfigMode string + KubeConfigGroup string HelmJobImage string TLSSan cli.StringSlice TLSSanSecurity bool @@ -250,6 +251,12 @@ var ServerFlags = []cli.Flag{ Destination: &ServerConfig.KubeConfigMode, EnvVar: version.ProgramUpper + "_KUBECONFIG_MODE", }, + &cli.StringFlag{ + Name: "write-kubeconfig-group", + Usage: "(client) Write kubeconfig with this group", + Destination: &ServerConfig.KubeConfigGroup, + EnvVar: version.ProgramUpper + "_KUBECONFIG_GROUP", + }, &cli.StringFlag{ Name: "helm-job-image", Usage: "(helm) Default image to use for helm jobs", diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 0093ee6cf9..b78b122695 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -133,6 +133,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.DataDir = cfg.DataDir serverConfig.ControlConfig.KubeConfigOutput = cfg.KubeConfigOutput serverConfig.ControlConfig.KubeConfigMode = cfg.KubeConfigMode + serverConfig.ControlConfig.KubeConfigGroup = cfg.KubeConfigGroup serverConfig.ControlConfig.HelmJobImage = cfg.HelmJobImage serverConfig.ControlConfig.Rootless = cfg.Rootless serverConfig.ControlConfig.ServiceLBNamespace = cfg.ServiceLBNamespace diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index 273eb40060..e65ac7fde2 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -182,6 +182,7 @@ type Control struct { ServiceNodePortRange *utilnet.PortRange KubeConfigOutput string KubeConfigMode string + KubeConfigGroup string HelmJobImage string DataDir string KineTLS bool diff --git a/pkg/kubectl/main.go b/pkg/kubectl/main.go index f3d77f24a1..dfcab9292d 100644 --- a/pkg/kubectl/main.go +++ b/pkg/kubectl/main.go @@ -54,7 +54,8 @@ func checkReadConfigPermissions(configFile string) error { if err != nil { if os.IsPermission(err) { return fmt.Errorf("Unable to read %s, please start server "+ - "with --write-kubeconfig-mode to modify kube config permissions", configFile) + "with --write-kubeconfig-mode or --write-kubeconfig-group "+ + "to modify kube config permissions", configFile) } } file.Close() diff --git a/pkg/server/server.go b/pkg/server/server.go index 848a5f1b82..8c6f40c433 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -443,6 +443,13 @@ func writeKubeConfig(certs string, config *Config) error { util.SetFileModeForPath(kubeConfig, os.FileMode(0600)) } + if config.ControlConfig.KubeConfigGroup != "" { + err := util.SetFileGroupForPath(kubeConfig, config.ControlConfig.KubeConfigGroup) + if err != nil { + logrus.Errorf("Failed to set %s to group %s: %v", kubeConfig, config.ControlConfig.KubeConfigGroup, err) + } + } + if kubeConfigSymlink != kubeConfig { if err := writeConfigSymlink(kubeConfig, kubeConfigSymlink); err != nil { logrus.Errorf("Failed to write kubeconfig symlink: %v", err) diff --git a/pkg/util/file.go b/pkg/util/file.go index d584ec8105..6d1a05ca84 100644 --- a/pkg/util/file.go +++ b/pkg/util/file.go @@ -2,7 +2,9 @@ package util import ( "os" + "os/user" "path/filepath" + "strconv" "strings" "time" @@ -14,6 +16,27 @@ func SetFileModeForPath(name string, mode os.FileMode) error { return os.Chmod(name, mode) } +func SetFileGroupForPath(name string, group string) error { + // Try to use as group id + gid, err := strconv.Atoi(group) + if err == nil { + return os.Chown(name, -1, gid) + } + + // Otherwise, it must be a group name + g, err := user.LookupGroup(group) + if err != nil { + return err + } + + gid, err = strconv.Atoi(g.Gid) + if err != nil { + return err + } + + return os.Chown(name, -1, gid) +} + func SetFileModeForFile(file *os.File, mode os.FileMode) error { return file.Chmod(mode) } From f9130d537df6862a63120aa4ddb803f1317e0ef9 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 30 May 2024 19:00:47 +0000 Subject: [PATCH 16/20] Fix embedded mirror blocked by SAR RBAC and re-enable test Signed-off-by: Brad Davidson --- .github/workflows/e2e.yaml | 5 ++--- pkg/agent/https/https.go | 6 +++++- tests/e2e/embeddedmirror/Vagrantfile | 6 ++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 5479bfd3c6..65f7a61e27 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -36,8 +36,7 @@ jobs: strategy: fail-fast: false matrix: - # TODO fix embeddedmirror and add it to the matrix - etest: [startup, s3, btrfs, externalip, privateregistry, wasm] + etest: [startup, s3, btrfs, externalip, privateregistry, embeddedmirror, wasm] max-parallel: 3 steps: - name: "Checkout" @@ -116,4 +115,4 @@ jobs: chmod +x ./dist/artifacts/k3s . ./tests/docker/test-helpers . ./tests/docker/test-run-${{ matrix.dtest }} - echo "Did test-run-${{ matrix.dtest }} pass $?" \ No newline at end of file + echo "Did test-run-${{ matrix.dtest }} pass $?" diff --git a/pkg/agent/https/https.go b/pkg/agent/https/https.go index 2b5927107f..da453742b8 100644 --- a/pkg/agent/https/https.go +++ b/pkg/agent/https/https.go @@ -75,7 +75,11 @@ func Start(ctx context.Context, nodeConfig *config.Node, runtime *config.Control } authz := options.NewDelegatingAuthorizationOptions() - authz.AlwaysAllowPaths = []string{"/v2", "/debug/pprof", "/v1-" + version.Program + "/p2p"} + authz.AlwaysAllowPaths = []string{ // skip authz for paths that should not use SubjectAccessReview; basically everything that will use this router other than metrics + "/v1-" + version.Program + "/p2p", // spegel libp2p peer discovery + "/v2/*", // spegel registry mirror + "/debug/pprof/*", // profiling + } authz.RemoteKubeConfigFile = nodeConfig.AgentConfig.KubeConfigKubelet if applyErr := authz.ApplyTo(&config.Authorization); applyErr != nil { err = applyErr diff --git a/tests/e2e/embeddedmirror/Vagrantfile b/tests/e2e/embeddedmirror/Vagrantfile index f510051361..67bc1709f1 100644 --- a/tests/e2e/embeddedmirror/Vagrantfile +++ b/tests/e2e/embeddedmirror/Vagrantfile @@ -38,6 +38,9 @@ def provision(vm, role, role_num, node_num) if role.include?("server") && role_num == 0 vm.provision "private-registry", type: "shell", inline: writePrivateRegistry + vm.provision "create-images-dir", type: "shell", inline: "mkdir -p -m 777 /tmp/images /var/lib/rancher/k3s/agent/images" + vm.provision "copy-images-file", type: "file", source: "../../../scripts/airgap/image-list.txt", destination: "/tmp/images/image-list.txt" + vm.provision "move-images-file", type: "shell", inline: "mv /tmp/images/image-list.txt /var/lib/rancher/k3s/agent/images/image-list.txt" vm.provision 'k3s-primary-server', type: 'k3s', run: 'once' do |k3s| k3s.args = "server " @@ -54,6 +57,9 @@ def provision(vm, role, role_num, node_num) elsif role.include?("server") && role_num != 0 vm.provision "shell", inline: writePrivateRegistry + vm.provision "create-images-dir", type: "shell", inline: "mkdir -p -m 777 /tmp/images /var/lib/rancher/k3s/agent/images" + vm.provision "copy-images-file", type: "file", source: "../../../scripts/airgap/image-list.txt", destination: "/tmp/images/image-list.txt" + vm.provision "move-images-file", type: "shell", inline: "mv /tmp/images/image-list.txt /var/lib/rancher/k3s/agent/images/image-list.txt" vm.provision 'k3s-secondary-server', type: 'k3s', run: 'once' do |k3s| k3s.args = "server" From 1268779ea055e9d6153b4ec763369a0121c7303a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 11:19:23 -0700 Subject: [PATCH 17/20] Bump Local Path Provisioner version (#10268) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: Bump Local Path Provisioner version Made with ❤️️ by updatecli --- manifests/local-storage.yaml | 2 +- pkg/deploy/zz_generated_bindata.go | 2 +- scripts/airgap/image-list.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/manifests/local-storage.yaml b/manifests/local-storage.yaml index 35f85af42b..f6fcc0d7c9 100644 --- a/manifests/local-storage.yaml +++ b/manifests/local-storage.yaml @@ -67,7 +67,7 @@ spec: effect: "NoSchedule" containers: - name: local-path-provisioner - image: "%{SYSTEM_DEFAULT_REGISTRY}%rancher/local-path-provisioner:v0.0.26" + image: "%{SYSTEM_DEFAULT_REGISTRY}%rancher/local-path-provisioner:v0.0.27" imagePullPolicy: IfNotPresent command: - local-path-provisioner diff --git a/pkg/deploy/zz_generated_bindata.go b/pkg/deploy/zz_generated_bindata.go index b85b2b1081..dc7984c584 100644 --- a/pkg/deploy/zz_generated_bindata.go +++ b/pkg/deploy/zz_generated_bindata.go @@ -132,7 +132,7 @@ func corednsYaml() (*asset, error) { return a, nil } -var _localStorageYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xb4\x56\xdf\x6f\xdb\xb6\x13\x7f\xd7\x5f\x71\x5f\x7d\x97\x97\xa1\x94\x93\x0d\x68\x06\xbe\x79\xb1\xd3\x06\x70\x6c\xc3\x76\x3b\x14\x45\x61\xd0\xd4\xd9\x66\x43\x91\x04\x49\xb9\xf5\xb2\xfc\xef\x03\x49\xd9\x91\x93\x34\x71\xb0\x4d\x2f\x82\x8e\x77\x9f\x3b\xde\xe7\x7e\x88\x19\xf1\x11\xad\x13\x5a\x51\xd8\x9c\x65\x37\x42\x95\x14\xa6\x68\x37\x82\x63\x97\x73\x5d\x2b\x9f\x55\xe8\x59\xc9\x3c\xa3\x19\x80\x62\x15\x52\x90\x9a\x33\x49\x0c\xf3\x6b\x62\xac\xde\x88\x60\x8f\x96\xb8\x64\x47\x58\x63\x98\xd4\x9d\x61\x1c\x29\xdc\xd4\x0b\x24\x6e\xeb\x3c\x56\x19\x21\x24\x6b\x7b\xb6\x0b\xc6\x0b\x56\xfb\xb5\xb6\xe2\x4f\xe6\x85\x56\xc5\xcd\x6f\xae\x10\xba\xb3\x8f\xe9\x42\xd6\xce\xa3\x9d\x68\x89\xc7\x07\x64\x83\xb6\xad\x25\x3a\x9a\x11\x60\x46\xbc\xb3\xba\x36\x8e\xc2\xe7\x3c\xff\x92\x01\x58\x74\xba\xb6\x1c\xa3\x44\xe9\x12\x5d\xfe\x06\x72\x13\xc2\x72\x1e\x95\xdf\x68\x59\x57\xc8\x25\x13\x55\x3c\xe1\x5a\x2d\xc5\xaa\x62\x26\xe9\xe9\xd2\x75\xa4\x5e\x45\xa8\x0d\xda\x45\x84\x59\xa1\x0f\x87\x52\xb8\xf8\xfe\xc6\x3c\x5f\xe7\x5f\x5e\x76\x8f\xaa\x34\x5a\x28\xff\x64\x08\x7b\x7f\x87\xbe\x7e\x3e\x0a\x78\x83\x01\xf5\xc0\x90\x5b\x64\x1e\x23\xe8\xd3\xf1\x39\xaf\x2d\x5b\x61\x43\xc3\x63\xd0\xe6\x9c\x4b\xe6\x1c\xba\xe3\x32\xf0\x8f\x48\xff\x5d\xa8\x52\xa8\xd5\xf1\xdc\x2f\x84\x2a\xb3\x50\x00\x13\x5c\x06\xe5\xdd\xf5\x9e\x71\x9c\x01\x3c\x2e\xb6\x63\x4a\xcc\xd5\x8b\xaf\xc8\x7d\xac\xb2\x27\x5b\xe8\xbf\x6a\x1c\x66\x8c\xbb\x4f\x57\x0f\x8d\xd4\xdb\x0a\x5f\xd1\xb3\x3f\x76\xe5\x0c\x72\x1a\x69\x4f\xba\xef\x45\xe0\x7c\x3b\x10\x95\xf0\x14\x4e\x33\x00\xe7\x2d\xf3\xb8\xda\x06\x2d\x00\xbf\x35\x48\x61\xa2\xa5\x14\x6a\xf5\xc1\x94\xcc\x63\x94\xdb\xb6\x24\xa9\x02\x54\xec\xfb\x07\xc5\x36\x4c\x48\xb6\x90\x48\xe1\x2c\xc0\xa1\x44\xee\xb5\x4d\x3a\x55\xa8\x9a\x01\x5b\xa0\x74\x3b\x23\x66\xcc\x33\xd7\xf0\x58\x19\xb9\x77\xd1\xbe\x7f\x78\xe4\x01\xd2\x4b\x58\x00\xbb\xdb\x87\xc7\x58\xa1\xad\xf0\xdb\x8b\x50\xec\xc3\x98\xcc\x3c\x25\x89\x84\x99\x41\xb8\x15\x5e\x70\x26\xf3\x46\xdf\x1d\x70\x3f\x7c\x1d\xf1\x31\x95\x5a\xa2\x8d\x85\xd9\x8a\x18\x80\xc0\x0d\x6e\x29\xe4\x17\x8d\xbf\x6e\x59\x6a\xe5\x46\x4a\x6e\xf3\x96\x16\x80\x36\xc1\x5a\x5b\x0a\x79\xff\xbb\x70\xde\xe5\x4f\x80\xc4\xc8\x43\xf1\x16\x81\x74\xab\xd0\x63\xec\x3d\xae\x95\xb7\x5a\x12\x23\x99\xc2\x57\xe0\x02\xe0\x72\x89\xdc\x53\xc8\x87\x7a\xca\xd7\x58\xd6\x12\x5f\xe3\xb8\x62\xa1\xe5\xfe\x2d\x8f\xe1\x1a\x4c\x28\xb4\xfb\x0c\x92\x97\xfa\x20\x3d\xa2\x62\xab\x40\xf0\xc9\xed\xf4\xd3\x74\xd6\xbf\x9e\xf7\xfa\x97\xdd\x0f\x83\xd9\x7c\xd2\x7f\x77\x35\x9d\x4d\x3e\xdd\x9d\x58\xa6\xf8\x1a\x6d\xe7\x69\x24\xba\x39\x2d\x4e\x8b\x5f\xde\xe6\x87\x90\xe3\x5a\xca\xb1\x96\x82\x6f\x29\x5c\x2d\x87\xda\x8f\x2d\x3a\xdc\x53\x1e\x22\xae\x2a\xa6\xca\x7b\xc2\xc9\x4b\xa1\x12\x70\x9e\x59\xdf\xfa\x26\x24\x6d\xa8\x96\xa8\x83\x9e\x77\x92\xb4\x79\x15\x5f\x9d\x56\x7b\x8d\xb4\x5f\xae\x43\xf5\xb9\xb6\xef\x94\xac\x64\x41\x92\x52\x2b\xf7\x55\xd0\x1f\x33\xbf\xa6\x07\x0e\xf6\x1a\xa8\x36\x8f\xc1\xc6\xa3\xde\x7c\xd8\xbd\xee\x4f\xc7\xdd\x8b\x7e\x0b\x6c\xc3\x64\x8d\x97\x56\x57\xf4\x80\xdd\xa5\x40\x59\x36\xc3\xfb\x91\x3c\xf9\xde\x75\x79\xb1\x9f\x61\x59\xfb\x56\xaf\xb8\x50\x92\x5f\x33\x73\xe8\xed\x51\xc9\x34\xf9\x7d\x38\x87\x0f\xd7\xe5\xfd\x44\x9e\x26\x79\x9c\x1c\xcf\xce\xe4\xb0\xa0\x94\xd2\xbe\xdd\xf5\x25\x2e\x59\x2d\xfd\xc7\x18\xeb\x2c\x8e\xd7\x3c\x5a\xa4\xd2\x6a\xaf\xe0\x07\xbd\x24\x1c\x69\x8c\x49\x3c\xa6\x90\x7b\x5b\x63\x9e\xb5\xeb\x14\x9a\x3a\x0e\x06\xad\x40\x52\x6a\x9a\x75\x7b\xad\x4b\xa4\xf0\x07\x13\xfe\x52\xdb\x4b\x61\x9d\xbf\xd0\xca\xd5\x15\xda\xcc\xa6\xff\xa2\x5d\x4d\xf7\x50\xa2\xc7\x98\x98\x66\x87\xee\x32\x9a\x3d\xf8\xc7\x7c\x76\x35\xed\xeb\xf7\x07\x5b\x69\x67\xd8\x2a\x65\x0a\x7f\x91\x98\x90\xdb\x86\xba\x38\x62\x42\x81\x5c\x33\x93\xd3\xcf\x8d\xf4\x76\x4f\x6c\x3c\xcf\x69\xbe\xeb\xec\x71\x77\xf6\x7e\x7e\x39\x9a\xcc\x87\xa3\xe1\x7c\x70\x35\x9d\xf5\x7b\xf3\xe1\xa8\xd7\x9f\xe6\x6f\xee\x6d\x42\x74\x2e\xa7\x9f\xf3\x93\xdb\x9d\xdd\x60\x74\xd1\x1d\xcc\xa7\xb3\xd1\xa4\xfb\xae\x1f\x51\xee\x4e\xe2\x9f\x50\x78\xee\x9a\x77\xfa\xbe\x8b\xfb\xcd\x87\xbf\x8f\x26\xd8\xff\xff\xaf\xb3\x10\xaa\xe3\xd6\x89\x4b\xf4\x40\xb0\x4e\xab\xeb\xa6\x14\x16\x48\x05\xa7\xe7\xe7\xe7\x40\x0c\xe4\x3f\xdd\x7e\x1c\x0d\xe6\xbd\xab\xc9\x5d\x62\x9e\xaf\x2b\x5d\xc2\xf9\xe9\x69\xfb\xa8\x53\x14\x79\x5c\x83\xcc\x96\xfa\x9b\x3a\xc2\x91\xad\x80\xd8\xe5\x43\xf8\x35\x4a\x83\x76\xac\xcb\x62\xcb\x2a\xb9\x87\x79\x40\x62\x10\x25\x9e\xc7\xba\x7c\x72\xe3\x26\x6a\x13\x1a\x31\x8d\x52\x7b\xad\xfe\x78\x44\x3f\x30\x82\xd7\x8d\xe5\x4a\x58\xab\x2d\x96\x44\x8a\x85\x65\x76\x4b\x16\xb5\xdb\x2e\xf4\x77\x7a\x56\xfc\xfa\xb6\x38\x3b\x76\x2e\xff\x1d\x00\x00\xff\xff\x33\x50\x2d\x30\x1a\x0d\x00\x00") +var _localStorageYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xb4\x56\x5f\x6f\xdb\x36\x10\x7f\xd7\xa7\xb8\x69\xcb\xcb\x50\xca\xc9\x06\x2c\x03\xdf\xbc\xd8\x69\x03\x38\xb6\x61\xbb\x1d\x8a\xa2\x30\x68\xea\x6c\xb3\xa1\x48\x82\xa4\xdc\x7a\x59\xbe\xfb\x40\x52\x76\xe4\x24\x4d\x1c\x6c\xd3\x8b\xa0\xe3\xdd\xef\x8e\xf7\xbb\x3f\x62\x46\x7c\x40\xeb\x84\x56\x14\x36\x67\xd9\x8d\x50\x25\x85\x29\xda\x8d\xe0\xd8\xe5\x5c\xd7\xca\x67\x15\x7a\x56\x32\xcf\x68\x06\xa0\x58\x85\x14\xa4\xe6\x4c\x12\xc3\xfc\x9a\x18\xab\x37\x22\xd8\xa3\x25\x2e\xd9\x11\xd6\x18\x26\x75\x67\x18\x47\x0a\x37\xf5\x02\x89\xdb\x3a\x8f\x55\x46\x08\xc9\xda\x9e\xed\x82\xf1\x82\xd5\x7e\xad\xad\xf8\x8b\x79\xa1\x55\x71\xf3\xbb\x2b\x84\xee\xec\x63\xba\x90\xb5\xf3\x68\x27\x5a\xe2\xf1\x01\xd9\xa0\x6d\x6b\x89\x8e\x66\x04\x98\x11\x6f\xad\xae\x8d\xa3\xf0\x29\xcf\x3f\x67\x00\x16\x9d\xae\x2d\xc7\x28\x51\xba\x44\x97\xbf\x81\xdc\x84\xb0\x9c\x47\xe5\x37\x5a\xd6\x15\x72\xc9\x44\x15\x4f\xb8\x56\x4b\xb1\xaa\x98\x49\x7a\xba\x74\x1d\xa9\x57\x11\x6a\x83\x76\x11\x61\x56\xe8\xc3\xa1\x14\x2e\xbe\xbf\x32\xcf\xd7\xf9\xe7\x97\xdd\xa3\x2a\x8d\x16\xca\x3f\x19\xc2\xde\xdf\xa1\xaf\x9f\x8f\x02\xde\x60\x40\x3d\x30\xe4\x16\x99\xc7\x08\xfa\x74\x7c\xce\x6b\xcb\x56\xd8\xd0\xf0\x18\xb4\x39\xe7\x92\x39\x87\xee\xb8\x0c\xfc\x2b\xd2\xff\x10\xaa\x14\x6a\x75\x3c\xf7\x0b\xa1\xca\x2c\x14\xc0\x04\x97\x41\x79\x77\xbd\x67\x1c\x67\x00\x8f\x8b\xed\x98\x12\x73\xf5\xe2\x0b\x72\x1f\xab\xec\xc9\x16\xfa\xbf\x1a\x87\x19\xe3\xee\xd3\xd5\x43\x23\xf5\xb6\xc2\x57\xf4\xec\xf7\x5d\x39\x83\x9c\x46\xda\x93\xee\x3b\x11\x38\xdf\x0e\x44\x25\x3c\x85\xd3\x0c\xc0\x79\xcb\x3c\xae\xb6\x41\x0b\xc0\x6f\x0d\x52\x98\x68\x29\x85\x5a\xbd\x37\x25\xf3\x18\xe5\xb6\x2d\x49\xaa\x00\x15\xfb\xf6\x5e\xb1\x0d\x13\x92\x2d\x24\x52\x38\x0b\x70\x28\x91\x7b\x6d\x93\x4e\x15\xaa\x66\xc0\x16\x28\xdd\xce\x88\x19\xf3\xcc\x35\x3c\x56\x46\xee\x5d\xb4\xef\x1f\x1e\x79\x80\xf4\x12\x16\xc0\xee\xf6\xe1\x31\x56\x68\x2b\xfc\xf6\x22\x14\xfb\x30\x26\x33\x4f\x49\x22\x61\x66\x10\x6e\x85\x17\x9c\xc9\xbc\xd1\x77\x07\xdc\x0f\x5f\x47\x7c\x4c\xa5\x96\x68\x63\x61\xb6\x22\x06\x20\x70\x83\x5b\x0a\xf9\x45\xe3\xaf\x5b\x96\x5a\xb9\x91\x92\xdb\xbc\xa5\x05\xa0\x4d\xb0\xd6\x96\x42\xde\xff\x26\x9c\x77\xf9\x13\x20\x31\xf2\x50\xbc\x45\x20\xdd\x2a\xf4\x18\x7b\x8f\x6b\xe5\xad\x96\xc4\x48\xa6\xf0\x15\xb8\x00\xb8\x5c\x22\xf7\x14\xf2\xa1\x9e\xf2\x35\x96\xb5\xc4\xd7\x38\xae\x58\x68\xb9\xff\xca\x63\xb8\x06\x13\x0a\xed\x3e\x83\xe4\xa5\x3e\x48\x8f\xa8\xd8\x2a\x10\x7c\x72\x3b\xfd\x38\x9d\xf5\xaf\xe7\xbd\xfe\x65\xf7\xfd\x60\x36\x9f\xf4\xdf\x5e\x4d\x67\x93\x8f\x77\x27\x96\x29\xbe\x46\xdb\x79\x1a\x89\x6e\x4e\x8b\xd3\xe2\x97\xf3\xfc\x10\x72\x5c\x4b\x39\xd6\x52\xf0\x2d\x85\xab\xe5\x50\xfb\xb1\x45\x87\x7b\xca\x43\xc4\x55\xc5\x54\x79\x4f\x38\x79\x29\x54\x02\xce\x33\xeb\x5b\xdf\x84\xa4\x0d\xd5\x12\x75\xd0\xf3\x4e\x92\x36\xaf\xe2\x8b\xd3\x6a\xaf\x91\xf6\xcb\x75\xa8\x3e\xd7\xf6\x9d\x92\x95\x2c\x48\x52\x6a\xe5\xbe\x0a\xfa\x63\xe6\xd7\xf4\xc0\xc1\x5e\x03\xd5\xe6\x31\xd8\x78\xd4\x9b\x0f\xbb\xd7\xfd\xe9\xb8\x7b\xd1\x6f\x81\x6d\x98\xac\xf1\xd2\xea\x8a\x1e\xb0\xbb\x14\x28\xcb\x66\x78\x3f\x92\x27\xdf\xbb\x2e\x2f\xf6\x33\x2c\x6b\xdf\xea\x15\x17\x4a\xf2\x6b\x66\x0e\xbd\x3d\x2a\x99\x26\xbf\x0f\xe7\xf0\xe1\xba\xbc\x9f\xc8\xd3\x24\x8f\x93\xe3\xd9\x99\x1c\x16\x94\x52\xda\xb7\xbb\xbe\xc4\x25\xab\xa5\xff\x10\x63\x9d\xc5\xf1\x9a\x47\x8b\x54\x5a\xed\x15\xfc\xa0\x97\x84\x23\x8d\x31\x89\xc7\x14\x72\x6f\x6b\xcc\xb3\x76\x9d\x42\x53\xc7\xc1\xa0\x15\x48\x4a\x4d\xb3\x6e\xaf\x75\x89\x14\xfe\x64\xc2\x5f\x6a\x7b\x29\xac\xf3\x17\x5a\xb9\xba\x42\x9b\xd9\xf4\x5f\xb4\xab\xe9\x1e\x4a\xf4\x18\x13\xd3\xec\xd0\x5d\x46\xb3\x07\xff\x98\xcf\xae\xa6\x7d\xfd\x7e\x67\x2b\xed\x0c\x5b\xa5\x4c\xe1\x6f\x12\x13\x72\xdb\x50\x17\x47\x4c\x28\x90\x6b\x66\x72\xfa\xa9\x91\xde\xee\x89\x8d\xe7\x39\xcd\x77\x9d\x3d\xee\xce\xde\xcd\x2f\x47\x93\xf9\x70\x34\x9c\x0f\xae\xa6\xb3\x7e\x6f\x3e\x1c\xf5\xfa\xd3\xfc\xcd\xbd\x4d\x88\xce\xe5\xf4\x53\x7e\x72\xbb\xb3\x1b\x8c\x2e\xba\x83\xf9\x74\x36\x9a\x74\xdf\xf6\x23\xca\xdd\x49\xfc\x13\x0a\xcf\x5d\xf3\x4e\xdf\x77\x71\xbf\xf9\xf0\xf7\xd1\x04\xfb\xe3\x0f\x9d\x85\x50\x1d\xb7\x4e\x5c\xa2\x07\x82\x75\x5a\x5d\x37\xa5\xb0\x40\x2a\x38\x3d\x3f\x3f\x07\x62\x20\xff\xe9\xf6\xc3\x68\x30\xef\x5d\x4d\xee\x12\xf3\x7c\x5d\xe9\x12\xce\x4f\x4f\xdb\x47\x9d\xa2\xc8\xe3\x1a\x64\xb6\xd4\x5f\xd5\x11\x8e\x6c\x05\xc4\x2e\x1f\xc2\xaf\x51\x1a\xb4\x63\x5d\x16\x5b\x56\xc9\x3d\xcc\x03\x12\x83\x28\xf1\x3c\xd6\xe5\x93\x1b\x37\x51\x9b\xd0\x88\x69\x94\xda\x6b\xf5\xfb\x23\xfa\x81\x11\xbc\x6e\x2c\x57\xc2\x5a\x6d\xb1\x24\x52\x2c\x2c\xb3\x5b\xb2\xa8\xdd\x76\xa1\xbf\xd1\xb3\xe2\xd7\xdf\x8a\xb3\x63\xe7\xf2\x3f\x01\x00\x00\xff\xff\x68\x8a\xdf\xd2\x1a\x0d\x00\x00") func localStorageYamlBytes() ([]byte, error) { return bindataRead( diff --git a/scripts/airgap/image-list.txt b/scripts/airgap/image-list.txt index 932f47ab2f..e0f3fd5941 100644 --- a/scripts/airgap/image-list.txt +++ b/scripts/airgap/image-list.txt @@ -1,6 +1,6 @@ docker.io/rancher/klipper-helm:v0.8.4-build20240523 docker.io/rancher/klipper-lb:v0.4.7 -docker.io/rancher/local-path-provisioner:v0.0.26 +docker.io/rancher/local-path-provisioner:v0.0.27 docker.io/rancher/mirrored-coredns-coredns:1.10.1 docker.io/rancher/mirrored-library-busybox:1.36.1 docker.io/rancher/mirrored-library-traefik:2.10.7 From 79ba10f5ec88a2458b44f67d009b4ee071918e4d Mon Sep 17 00:00:00 2001 From: Koen de Laat Date: Mon, 3 Jun 2024 11:03:53 +0000 Subject: [PATCH 18/20] fix: Use actual warningPeriod in certmonitor Signed-off-by: Koen de Laat --- pkg/certmonitor/certmonitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/certmonitor/certmonitor.go b/pkg/certmonitor/certmonitor.go index 6aedbcdf63..7615e7dc30 100644 --- a/pkg/certmonitor/certmonitor.go +++ b/pkg/certmonitor/certmonitor.go @@ -126,7 +126,7 @@ func checkCerts(fileMap map[string][]string, warningPeriod time.Duration) error } else if now.After(cert.NotAfter) { errs = append(errs, fmt.Errorf("%s/%s: certificate %s expired at %s", service, basename, cert.Subject, cert.NotAfter.Format(time.RFC3339))) } else if warn.After(cert.NotAfter) { - errs = append(errs, fmt.Errorf("%s/%s: certificate %s will expire within %d days at %s", service, basename, cert.Subject, daemonconfig.CertificateRenewDays, cert.NotAfter.Format(time.RFC3339))) + errs = append(errs, fmt.Errorf("%s/%s: certificate %s will expire within %d days at %s", service, basename, cert.Subject, int(warningPeriod.Hours()/24), cert.NotAfter.Format(time.RFC3339))) } } } From 1661f1024a68245bf768925a326ca558f2ea527e Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Mon, 3 Jun 2024 20:40:09 +0000 Subject: [PATCH 19/20] Fix bug that caused agents to bypass local loadbalancer If proxy.SetAPIServerPort was called multiple times, all calls after the first one would cause the apiserver address to be set to the default server address, bypassing the local load-balancer. This was most likely to occur on RKE2, where the supervisor may be up for a period of time before it is ready to manage node password secrets, causing the agent to retry. Signed-off-by: Brad Davidson --- pkg/agent/config/config.go | 30 +++++++++++++++++++------- pkg/agent/loadbalancer/loadbalancer.go | 7 +++++- pkg/agent/proxy/apiproxy.go | 17 ++++++++++----- pkg/agent/tunnel/tunnel.go | 2 +- 4 files changed, 41 insertions(+), 15 deletions(-) diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index dfab68d023..fc0e724154 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -375,9 +375,10 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N if controlConfig.SupervisorPort != controlConfig.HTTPSPort { isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{envInfo.NodeIP.String()}[0])) if err := proxy.SetAPIServerPort(controlConfig.HTTPSPort, isIPv6); err != nil { - return nil, errors.Wrapf(err, "failed to setup access to API Server port %d on at %s", controlConfig.HTTPSPort, proxy.SupervisorURL()) + return nil, errors.Wrapf(err, "failed to set apiserver port to %d", controlConfig.HTTPSPort) } } + apiServerURL := proxy.APIServerURL() var flannelIface *net.Interface if controlConfig.FlannelBackend != config.FlannelBackendNone && len(envInfo.FlannelIface) > 0 { @@ -482,40 +483,53 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N os.Setenv("NODE_NAME", nodeName) + // Ensure that the kubelet's server certificate is valid for all configured node IPs. Note + // that in the case of an external CCM, additional IPs may be added by the infra provider + // that the cert will not be valid for, as they are not present in the list collected here. nodeExternalAndInternalIPs := append(nodeIPs, nodeExternalIPs...) + + // Ask the server to generate a kubelet server cert+key. These files are unique to this node. servingCert, err := getServingCert(nodeName, nodeExternalAndInternalIPs, servingKubeletCert, servingKubeletKey, newNodePasswordFile, info) if err != nil { - return nil, err + return nil, errors.Wrap(err, servingKubeletCert) } + // Ask the server to genrate a kubelet client cert+key. These files are unique to this node. if err := getNodeNamedHostFile(clientKubeletCert, clientKubeletKey, nodeName, nodeIPs, newNodePasswordFile, info); err != nil { - return nil, err + return nil, errors.Wrap(err, clientKubeletCert) } + // Generate a kubeconfig for the kubelet. kubeconfigKubelet := filepath.Join(envInfo.DataDir, "agent", "kubelet.kubeconfig") - if err := deps.KubeConfig(kubeconfigKubelet, proxy.APIServerURL(), serverCAFile, clientKubeletCert, clientKubeletKey); err != nil { + if err := deps.KubeConfig(kubeconfigKubelet, apiServerURL, serverCAFile, clientKubeletCert, clientKubeletKey); err != nil { return nil, err } clientKubeProxyCert := filepath.Join(envInfo.DataDir, "agent", "client-kube-proxy.crt") clientKubeProxyKey := filepath.Join(envInfo.DataDir, "agent", "client-kube-proxy.key") + + // Ask the server to send us its kube-proxy client cert+key. These files are not unique to this node. if err := getHostFile(clientKubeProxyCert, clientKubeProxyKey, info); err != nil { - return nil, err + return nil, errors.Wrap(err, clientKubeProxyCert) } + // Generate a kubeconfig for kube-proxy. kubeconfigKubeproxy := filepath.Join(envInfo.DataDir, "agent", "kubeproxy.kubeconfig") - if err := deps.KubeConfig(kubeconfigKubeproxy, proxy.APIServerURL(), serverCAFile, clientKubeProxyCert, clientKubeProxyKey); err != nil { + if err := deps.KubeConfig(kubeconfigKubeproxy, apiServerURL, serverCAFile, clientKubeProxyCert, clientKubeProxyKey); err != nil { return nil, err } clientK3sControllerCert := filepath.Join(envInfo.DataDir, "agent", "client-"+version.Program+"-controller.crt") clientK3sControllerKey := filepath.Join(envInfo.DataDir, "agent", "client-"+version.Program+"-controller.key") + + // Ask the server to send us its agent controller client cert+key. These files are not unique to this node. if err := getHostFile(clientK3sControllerCert, clientK3sControllerKey, info); err != nil { - return nil, err + return nil, errors.Wrap(err, clientK3sControllerCert) } + // Generate a kubeconfig for the agent controller. kubeconfigK3sController := filepath.Join(envInfo.DataDir, "agent", version.Program+"controller.kubeconfig") - if err := deps.KubeConfig(kubeconfigK3sController, proxy.APIServerURL(), serverCAFile, clientK3sControllerCert, clientK3sControllerKey); err != nil { + if err := deps.KubeConfig(kubeconfigK3sController, apiServerURL, serverCAFile, clientK3sControllerCert, clientK3sControllerKey); err != nil { return nil, err } diff --git a/pkg/agent/loadbalancer/loadbalancer.go b/pkg/agent/loadbalancer/loadbalancer.go index feddb4d872..36019470c8 100644 --- a/pkg/agent/loadbalancer/loadbalancer.go +++ b/pkg/agent/loadbalancer/loadbalancer.go @@ -172,6 +172,11 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net return conn, nil } logrus.Debugf("Dial error from load balancer %s: %s", lb.serviceName, err) + // Don't close connections to the failed server if we're retrying with health checks ignored. + // We don't want to disrupt active connections if it is unlikely they will have anywhere to go. + if !allChecksFailed { + defer server.closeAll() + } } newServer, err := lb.nextServer(targetServer) @@ -179,7 +184,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net return nil, err } if targetServer != newServer { - logrus.Debugf("Failed over to new server for load balancer %s: %s", lb.serviceName, newServer) + logrus.Debugf("Failed over to new server for load balancer %s: %s -> %s", lb.serviceName, targetServer, newServer) } if ctx.Err() != nil { return nil, ctx.Err() diff --git a/pkg/agent/proxy/apiproxy.go b/pkg/agent/proxy/apiproxy.go index becc2a0def..e711623e46 100644 --- a/pkg/agent/proxy/apiproxy.go +++ b/pkg/agent/proxy/apiproxy.go @@ -63,6 +63,7 @@ func NewSupervisorProxy(ctx context.Context, lbEnabled bool, dataDir, supervisor p.fallbackSupervisorAddress = u.Host p.supervisorPort = u.Port() + logrus.Debugf("Supervisor proxy using supervisor=%s apiserver=%s lb=%v", p.supervisorURL, p.apiServerURL, p.lbEnabled) return &p, nil } @@ -132,6 +133,11 @@ func (p *proxy) setSupervisorPort(addresses []string) []string { // load-balancer, and the address of this load-balancer is returned instead of the actual apiserver // addresses. func (p *proxy) SetAPIServerPort(port int, isIPv6 bool) error { + if p.apiServerEnabled { + logrus.Debugf("Supervisor proxy apiserver port already set") + return nil + } + u, err := url.Parse(p.initialSupervisorURL) if err != nil { return errors.Wrapf(err, "failed to parse server URL %s", p.initialSupervisorURL) @@ -139,22 +145,23 @@ func (p *proxy) SetAPIServerPort(port int, isIPv6 bool) error { p.apiServerPort = strconv.Itoa(port) u.Host = sysnet.JoinHostPort(u.Hostname(), p.apiServerPort) - p.apiServerURL = u.String() - p.apiServerEnabled = true - if p.lbEnabled && p.apiServerLB == nil { lbServerPort := p.lbServerPort if lbServerPort != 0 { lbServerPort = lbServerPort - 1 } - lb, err := loadbalancer.New(p.context, p.dataDir, loadbalancer.APIServerServiceName, p.apiServerURL, lbServerPort, isIPv6) + lb, err := loadbalancer.New(p.context, p.dataDir, loadbalancer.APIServerServiceName, u.String(), lbServerPort, isIPv6) if err != nil { return err } - p.apiServerURL = lb.LoadBalancerServerURL() p.apiServerLB = lb + p.apiServerURL = lb.LoadBalancerServerURL() + } else { + p.apiServerURL = u.String() } + logrus.Debugf("Supervisor proxy apiserver port changed; apiserver=%s lb=%v", p.apiServerURL, p.lbEnabled) + p.apiServerEnabled = true return nil } diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 6245d52910..23c6dac404 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -451,7 +451,7 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup err := remotedialer.ConnectToProxy(ctx, wsURL, nil, auth, ws, onConnect) connected = false if err != nil && !errors.Is(err, context.Canceled) { - logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconecting...") + logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconnecting...") // wait between reconnection attempts to avoid hammering the server time.Sleep(endpointDebounceDelay) } From df5db28a687f5fb6fd0f5cb68873d2184974e953 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 4 Jun 2024 14:15:43 -0700 Subject: [PATCH 20/20] Add ADR for support for etcd s3 config secret (#9364) * Add ADR for etcd s3 config secret * Update ADR with changes from initial review Signed-off-by: Brad Davidson --- docs/adrs/etcd-s3-secret.md | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 docs/adrs/etcd-s3-secret.md diff --git a/docs/adrs/etcd-s3-secret.md b/docs/adrs/etcd-s3-secret.md new file mode 100644 index 0000000000..5cf67424fe --- /dev/null +++ b/docs/adrs/etcd-s3-secret.md @@ -0,0 +1,75 @@ +# Support etcd Snapshot Configuration via Kubernetes Secret + +Date: 2024-02-06 + +## Status + +Accepted + +## Context + +### Current State + +K3s currently reads configuration for S3 storage of etcd snapshots from CLI flags and/or configuration files. + +Security-conscious users have raised issue with the current state. They want to store snapshots on S3, but do not want +to have credentials visible in config files or systemd units. Users operating in highly secure environments have also +asked for the ability to configure a proxy server to be used when creating/restoring snapshots stored on S3, without +managing complicated `NO_PROXY` settings or affecting the rest of the K3s process environment. + +### Security Considerations + +Storing credentials on-disk is generally considered a bad idea, and is not allowed by security practices in many +organizations. Use of static credentials in the config file also makes them difficult to rotate, as K3s only reloads the +configuration on startup. + +### Existing Work + +Cloud-providers and other tools that need to auth to external systems frequently can be configured to retrieve secrets +from an existing credential secret that is provisioned via an external process, such as a secrets management tool. This +avoids embedding the credentials directly in the system configuration, chart values, and so on. + +## Decision + +* We will add a `--etcd-s3-proxy` flag that can be used to set the proxy used by the S3 client. This will override the + settings that golang's default HTTP client reads from the `HTTP_PROXY/HTTPS_PROXY/NO_PROXY` environment varibles. +* We will add support for reading etcd snapshot S3 configuration from a Secret. The secret name will be specified via a new + `--etcd-s3-secret` flag, which accepts the name of the Secret in the `kube-system` namespace. +* Presence of the `--etcd-s3-secret` flag does not imply `--etcd-s3`. If S3 is not enabled by use of the `--etcd-s3` flag, + the Secret will not be used. +* The Secret does not need to exist when K3s starts; it will be checked for every time a snapshot operation is performed. +* Secret and CLI/config values will NOT be merged. The Secret will provide values to be used in absence of other + configuration; if S3 configuration is passed via CLI flags or configuration file, ALL fields set by the Secret + will be ignored. +* The Secret will ONLY be used for on-demand and scheduled snapshot save operations; it will not be used by snapshot restore. + Snapshot restore operations that want to retrieve a snapshot from S3 will need to pass the appropriate configuration + via environment variables or CLI flags, as the Secret is not available during the restore process. + +Fields within the Secret will match `k3s server` CLI flags / config file keys. For the `etcd-s3-endpoint-ca`, which +normally contains the path of a file on disk, the `etcd-s3-endpoint-ca` field can specify an inline PEM-encoded CA +bundle, or the `etcd-s3-endpoint-ca-name` can be used to specify the name of a ConfigMap in the `kube-system` namespace +containing one or more CA bundles. All valid CA bundles found in either field are loaded. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: k3s-etcd-snapshot-s3-config + namespace: kube-system +stringData: + etcd-s3-endpoint: "" + etcd-s3-endpoint-ca: "" + etcd-s3-endpoint-ca-name: "" + etcd-s3-skip-ssl-verify: "false" + etcd-s3-access-key: "AWS_ACCESS_KEY_ID" + etcd-s3-secret-key: "AWS_SECRET_ACCESS_KEY" + etcd-s3-bucket: "bucket" + etcd-s3-region: "us-east-1" + etcd-s3-insecure: "false" + etcd-s3-timeout: "5m" + etcd-s3-proxy: "" +``` + +## Consequences + +This will require additional documentation, tests, and QA work to validate use of secrets for s3 snapshot configuration.