Augmenting API call retry in nodeinfomanager

pull/58/head
Cheng Xing 2018-11-14 18:07:40 -08:00
parent 7ff49c99d3
commit ca18690ceb
2 changed files with 141 additions and 93 deletions

View File

@ -15,10 +15,12 @@ go_library(
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/util/retry:go_default_library",
"//staging/src/k8s.io/csi-api/pkg/apis/csi/v1alpha1:go_default_library",
"//staging/src/k8s.io/csi-api/pkg/client/clientset/versioned:go_default_library",
"//vendor/github.com/container-storage-interface/spec/lib/go/csi:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],

View File

@ -29,15 +29,18 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/util/retry"
csiv1alpha1 "k8s.io/csi-api/pkg/apis/csi/v1alpha1"
csiclientset "k8s.io/csi-api/pkg/client/clientset/versioned"
"k8s.io/klog"
"k8s.io/kubernetes/pkg/features"
nodeutil "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util"
"time"
)
const (
@ -45,7 +48,15 @@ const (
annotationKeyNodeID = "csi.volume.kubernetes.io/nodeid"
)
var nodeKind = v1.SchemeGroupVersion.WithKind("Node")
var (
nodeKind = v1.SchemeGroupVersion.WithKind("Node")
updateBackoff = wait.Backoff{
Steps: 4,
Duration: 10 * time.Millisecond,
Factor: 5.0,
Jitter: 0.1,
}
)
// nodeInfoManager contains necessary common dependencies to update node info on both
// the Node and CSINodeInfo objects.
@ -137,16 +148,29 @@ func (nim *nodeInfoManager) UninstallCSIDriver(driverName string) error {
return nil
}
func (nim *nodeInfoManager) updateNode(updateFuncs ...nodeUpdateFunc) error {
var updateErrs []error
err := wait.ExponentialBackoff(updateBackoff, func() (bool, error) {
if err := nim.tryUpdateNode(updateFuncs...); err != nil {
updateErrs = append(updateErrs, err)
return false, nil
}
return true, nil
})
if err != nil {
return fmt.Errorf("error updating node: %v; caused by: %v", err, utilerrors.NewAggregate(updateErrs))
}
return nil
}
// updateNode repeatedly attempts to update the corresponding node object
// which is modified by applying the given update functions sequentially.
// Because updateFuncs are applied sequentially, later updateFuncs should take into account
// the effects of previous updateFuncs to avoid potential conflicts. For example, if multiple
// functions update the same field, updates in the last function are persisted.
func (nim *nodeInfoManager) updateNode(updateFuncs ...nodeUpdateFunc) error {
retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
func (nim *nodeInfoManager) tryUpdateNode(updateFuncs ...nodeUpdateFunc) error {
// Retrieve the latest version of Node before attempting update, so that
// existing changes are not overwritten. RetryOnConflict uses
// exponential backoff to avoid exhausting the apiserver.
// existing changes are not overwritten.
kubeClient := nim.volumeHost.GetKubeClient()
if kubeClient == nil {
@ -155,10 +179,10 @@ func (nim *nodeInfoManager) updateNode(updateFuncs ...nodeUpdateFunc) error {
nodeClient := kubeClient.CoreV1().Nodes()
originalNode, err := nodeClient.Get(string(nim.nodeName), metav1.GetOptions{})
node := originalNode.DeepCopy()
if err != nil {
return err // do not wrap error
return err
}
node := originalNode.DeepCopy()
needUpdate := false
for _, update := range updateFuncs {
@ -174,14 +198,9 @@ func (nim *nodeInfoManager) updateNode(updateFuncs ...nodeUpdateFunc) error {
// PatchNodeStatus can update both node's status and labels or annotations
// Updating status by directly updating node does not work
_, _, updateErr := nodeutil.PatchNodeStatus(kubeClient.CoreV1(), types.NodeName(node.Name), originalNode, node)
return updateErr // do not wrap error
return updateErr
}
return nil
})
if retryErr != nil {
return fmt.Errorf("node update failed: %v", retryErr)
}
return nil
}
@ -333,21 +352,35 @@ func (nim *nodeInfoManager) updateCSINodeInfo(
return fmt.Errorf("error getting CSI client")
}
retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
var updateErrs []error
err := wait.ExponentialBackoff(updateBackoff, func() (bool, error) {
if err := nim.tryUpdateCSINodeInfo(csiKubeClient, driverName, driverNodeID, topology); err != nil {
updateErrs = append(updateErrs, err)
return false, nil
}
return true, nil
})
if err != nil {
return fmt.Errorf("error updating CSINodeInfo: %v; caused by: %v", err, utilerrors.NewAggregate(updateErrs))
}
return nil
}
func (nim *nodeInfoManager) tryUpdateCSINodeInfo(
csiKubeClient csiclientset.Interface,
driverName string,
driverNodeID string,
topology *csipb.Topology) error {
nodeInfo, err := csiKubeClient.CsiV1alpha1().CSINodeInfos().Get(string(nim.nodeName), metav1.GetOptions{})
if nodeInfo == nil || errors.IsNotFound(err) {
nodeInfo, err = nim.CreateCSINodeInfo()
}
if err != nil {
return err // do not wrap error
return err
}
return nim.installDriverToCSINodeInfo(nodeInfo, driverName, driverNodeID, topology)
})
if retryErr != nil {
return fmt.Errorf("CSINodeInfo update failed: %v", retryErr)
}
return nil
}
func (nim *nodeInfoManager) CreateCSINodeInfo() (*csiv1alpha1.CSINodeInfo, error) {
@ -364,7 +397,7 @@ func (nim *nodeInfoManager) CreateCSINodeInfo() (*csiv1alpha1.CSINodeInfo, error
node, err := kubeClient.CoreV1().Nodes().Get(string(nim.nodeName), metav1.GetOptions{})
if err != nil {
return nil, err // do not wrap error
return nil, err
}
nodeInfo := &csiv1alpha1.CSINodeInfo{
@ -464,17 +497,35 @@ func (nim *nodeInfoManager) installDriverToCSINodeInfo(
return err
}
_, err = csiKubeClient.CsiV1alpha1().CSINodeInfos().Update(nodeInfo)
return err // do not wrap error
return err
}
func (nim *nodeInfoManager) uninstallDriverFromCSINodeInfo(csiDriverName string) error {
retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
func (nim *nodeInfoManager) uninstallDriverFromCSINodeInfo(
csiDriverName string) error {
csiKubeClient := nim.volumeHost.GetCSIClient()
if csiKubeClient == nil {
return fmt.Errorf("error getting CSI client")
}
var updateErrs []error
err := wait.ExponentialBackoff(updateBackoff, func() (bool, error) {
if err := nim.tryUninstallDriverFromCSINodeInfo(csiKubeClient, csiDriverName); err != nil {
updateErrs = append(updateErrs, err)
return false, nil
}
return true, nil
})
if err != nil {
return fmt.Errorf("error updating CSINodeInfo: %v; caused by: %v", err, utilerrors.NewAggregate(updateErrs))
}
return nil
}
func (nim *nodeInfoManager) tryUninstallDriverFromCSINodeInfo(
csiKubeClient csiclientset.Interface,
csiDriverName string) error {
nodeInfoClient := csiKubeClient.CsiV1alpha1().CSINodeInfos()
nodeInfo, err := nodeInfoClient.Get(string(nim.nodeName), metav1.GetOptions{})
if err != nil {
@ -506,11 +557,6 @@ func (nim *nodeInfoManager) uninstallDriverFromCSINodeInfo(csiDriverName string)
_, updateErr := nodeInfoClient.Update(nodeInfo)
return updateErr // do not wrap error
})
if retryErr != nil {
return fmt.Errorf("CSINodeInfo update failed: %v", retryErr)
}
return nil
}
func updateMaxAttachLimit(driverName string, maxLimit int64) nodeUpdateFunc {