mirror of https://github.com/k3s-io/k3s
Fix netpol crash when node remains tained unintialized
It is concievable that users might take more than 60 seconds to deploy their own cloud-provider. Instead of exiting, we should wait forever, but with more logging to indicate what's being waited on.
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
(cherry picked from commit ed23a2bb48
)
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
pull/10290/head
parent
6c7e503bea
commit
da89ab5052
|
@ -67,27 +67,26 @@ func Run(ctx context.Context, nodeConfig *config.Node) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// As kube-router netpol requires addresses to be available in the node object
|
||||
// Wait until the node has ready addresses to avoid race conditions (max 1 minute).
|
||||
// kube-router netpol requires addresses to be available in the node object.
|
||||
// Wait until the uninitialized taint has been removed, at which point the addresses should be set.
|
||||
// TODO: Replace with non-deprecated PollUntilContextTimeout when our and Kubernetes code migrate to it
|
||||
if err := wait.PollImmediateWithContext(ctx, 2*time.Second, 60*time.Second, func(ctx context.Context) (bool, error) {
|
||||
if err := wait.PollImmediateInfiniteWithContext(ctx, 2*time.Second, func(ctx context.Context) (bool, error) {
|
||||
// Get the node object
|
||||
node, err := client.CoreV1().Nodes().Get(ctx, nodeConfig.AgentConfig.NodeName, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
logrus.Debugf("Network policy controller waiting to get Node %s: %v", nodeConfig.AgentConfig.NodeName, err)
|
||||
logrus.Infof("Network policy controller waiting to get Node %s: %v", nodeConfig.AgentConfig.NodeName, err)
|
||||
return false, nil
|
||||
}
|
||||
// Check for the uninitialized taint that should be removed by cloud-provider
|
||||
// If there is no cloud-provider, the taint will not be there
|
||||
// Check for the taint that should be removed by cloud-provider when the node has been initialized.
|
||||
for _, taint := range node.Spec.Taints {
|
||||
if taint.Key == cloudproviderapi.TaintExternalCloudProvider {
|
||||
logrus.Debugf("Network policy controller waiting for removal of %s taint", cloudproviderapi.TaintExternalCloudProvider)
|
||||
logrus.Infof("Network policy controller waiting for removal of %s taint", cloudproviderapi.TaintExternalCloudProvider)
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
}); err != nil {
|
||||
return errors.Wrapf(err, "network policy controller timed out waiting for %s taint to be removed from Node %s", cloudproviderapi.TaintExternalCloudProvider, nodeConfig.AgentConfig.NodeName)
|
||||
return errors.Wrapf(err, "network policy controller failed to wait for %s taint to be removed from Node %s", cloudproviderapi.TaintExternalCloudProvider, nodeConfig.AgentConfig.NodeName)
|
||||
}
|
||||
|
||||
krConfig := options.NewKubeRouterConfig()
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/client-go/util/retry"
|
||||
nodeUtil "k8s.io/kubernetes/pkg/controller/util/node"
|
||||
nodeutil "k8s.io/kubernetes/pkg/controller/util/node"
|
||||
)
|
||||
|
||||
func registerMetadataHandlers(ctx context.Context, etcd *ETCD) {
|
||||
|
@ -109,7 +109,7 @@ func (m *metadataHandler) handleSelf(node *v1.Node) (*v1.Node, error) {
|
|||
node.Labels = map[string]string{}
|
||||
}
|
||||
|
||||
if find, _ := nodeUtil.GetNodeCondition(&node.Status, etcdStatusType); find >= 0 {
|
||||
if find, _ := nodeutil.GetNodeCondition(&node.Status, etcdStatusType); find >= 0 {
|
||||
node.Status.Conditions = append(node.Status.Conditions[:find], node.Status.Conditions[find+1:]...)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue