From 82e3c32c9f837065acce2d86e01ba47447f14cb8 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Tue, 30 Jan 2024 23:47:18 +0000 Subject: [PATCH] Retry startup snapshot reconcile The reconcile may run before the kubelet has created the node object; retry until it succeeds Signed-off-by: Brad Davidson --- pkg/cluster/cluster.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 7e3ecf2686..d87c2d9fa6 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -5,6 +5,7 @@ import ( "net/url" "runtime" "strings" + "time" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/cluster/managed" @@ -13,6 +14,7 @@ import ( "github.com/k3s-io/kine/pkg/endpoint" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/util/wait" utilsnet "k8s.io/utils/net" ) @@ -107,11 +109,14 @@ func (c *Cluster) Start(ctx context.Context) (<-chan struct{}, error) { } if !c.config.EtcdDisableSnapshots { - if err := c.managedDB.ReconcileSnapshotData(ctx); err != nil { - logrus.Errorf("Failed to record snapshots for cluster: %v", err) - } + wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + err := c.managedDB.ReconcileSnapshotData(ctx) + if err != nil { + logrus.Errorf("Failed to record snapshots for cluster: %v", err) + } + return err == nil, nil + }) } - return default: runtime.Gosched()