Increase agent's apiserver ready timeout (#4454)

Since we now start the server's agent sooner and in the background, we
may need to wait longer than 30 seconds for the apiserver to become
ready on downstream projects such as RKE2.

Since this essentially just serves as an analogue for the server's
apiReady channel, there's little danger in setting it to something
relatively high.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
pull/4484/head
Brad Davidson 2021-11-11 13:01:49 -08:00 committed by GitHub
parent bc7cdc78ca
commit 5ab6d21a7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 18 additions and 7 deletions

View File

@ -107,7 +107,9 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error {
return err
}
util.WaitForAPIServerReady(coreClient, 30*time.Second)
if err := util.WaitForAPIServerReady(ctx, coreClient, util.DefaultAPIServerReadyTimeout); err != nil {
return errors.Wrap(err, "failed to wait for apiserver ready")
}
if err := configureNode(ctx, &nodeConfig.AgentConfig, coreClient.CoreV1().Nodes()); err != nil {
return err

View File

@ -78,7 +78,9 @@ func Setup(ctx context.Context, config *config.Node, proxy proxy.Proxy) error {
// and go from the cluster. We go into a faster but noisier connect loop if the watch fails
// following a successful connection.
go func() {
util.WaitForAPIServerReady(client, 30*time.Second)
if err := util.WaitForAPIServerReady(ctx, client, util.DefaultAPIServerReadyTimeout); err != nil {
logrus.Warnf("Tunnel endpoint watch failed to wait for apiserver ready: %v", err)
}
connect:
for {
time.Sleep(5 * time.Second)

View File

@ -419,7 +419,7 @@ func waitForAPIServerInBackground(ctx context.Context, runtime *config.ControlRu
select {
case <-ctx.Done():
return
case err := <-promise(func() error { return util.WaitForAPIServerReady(k8sClient, 30*time.Second) }):
case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, k8sClient, 30*time.Second) }):
if err != nil {
logrus.Infof("Waiting for API server to become available")
continue

View File

@ -16,6 +16,12 @@ import (
clientset "k8s.io/client-go/kubernetes"
)
// This sets a default duration to wait for the apiserver to become ready. This is primarily used to
// block startup of agent supervisor controllers until the apiserver is ready to serve requests, in the
// same way that the apiReady channel is used in the server packages, so it can be fairly long. It must
// be at least long enough for downstream projects like RKE2 to start the apiserver in the background.
const DefaultAPIServerReadyTimeout = 15 * time.Minute
func GetAddresses(endpoint *v1.Endpoints) []string {
serverAddresses := []string{}
if endpoint == nil {
@ -37,14 +43,15 @@ func GetAddresses(endpoint *v1.Endpoints) []string {
}
// WaitForAPIServerReady waits for the API Server's /readyz endpoint to report "ok" with timeout.
// This is cribbed from the Kubernetes controller-manager app, but checks the readyz endpoint instead of the deprecated healthz endpoint.
func WaitForAPIServerReady(client clientset.Interface, timeout time.Duration) error {
// This is modified from WaitForAPIServer from the Kubernetes controller-manager app, but checks the
// readyz endpoint instead of the deprecated healthz endpoint, and supports context.
func WaitForAPIServerReady(ctx context.Context, client clientset.Interface, timeout time.Duration) error {
var lastErr error
restClient := client.Discovery().RESTClient()
err := wait.PollImmediate(time.Second, timeout, func() (bool, error) {
err := wait.PollImmediateWithContext(ctx, time.Second, timeout, func(ctx context.Context) (bool, error) {
healthStatus := 0
result := restClient.Get().AbsPath("/readyz").Do(context.TODO()).StatusCode(&healthStatus)
result := restClient.Get().AbsPath("/readyz").Do(ctx).StatusCode(&healthStatus)
if rerr := result.Error(); rerr != nil {
lastErr = errors.Wrap(rerr, "failed to get apiserver /readyz status")
return false, nil