WIP: Handle failed attach operation leave uncertain volume attach state

This PR fixes issue #32727.

When an attach operation fails, it is still possible that the volume
will be attached to the node later. This PR adds the logic to record the
volume to node with attached state no matter whether the operation
succedded or not. If the operation fails, mark the attached state to
false. If the operation succeeded, mark the attached state to true. The
reconciler will still issue attach operation until it returns
successfully. If the pod is removed in the mean time, the reconciler
will issue detach operations for all the volumes no matter what is the
attached state.
pull/564/head
Jing Xu 2018-10-02 13:24:28 -07:00
parent 50e02fd0cc
commit 47331cf0a2
3 changed files with 67 additions and 49 deletions

View File

@ -56,7 +56,7 @@ type ActualStateOfWorld interface {
// added. // added.
// If no node with the name nodeName exists in list of attached nodes for // If no node with the name nodeName exists in list of attached nodes for
// the specified volume, the node is added. // the specified volume, the node is added.
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error) AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, attached bool) (v1.UniqueVolumeName, error)
// SetVolumeMountedByNode sets the MountedByNode value for the given volume // SetVolumeMountedByNode sets the MountedByNode value for the given volume
// and node. When set to true the mounted parameter indicates the volume // and node. When set to true the mounted parameter indicates the volume
@ -97,21 +97,27 @@ type ActualStateOfWorld interface {
// VolumeNodeExists returns true if the specified volume/node combo exists // VolumeNodeExists returns true if the specified volume/node combo exists
// in the underlying store indicating the specified volume is attached to // in the underlying store indicating the specified volume is attached to
// the specified node. // the specified node.
VolumeNodeExists(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool IsVolumeAttachedToNode(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
// GetAttachedVolumes generates and returns a list of volumes/node pairs // GetAttachedVolumes generates and returns a list of volumes/node pairs
// reflecting which volumes are attached to which nodes based on the // reflecting which volumes might attached to which nodes based on the
// current actual state of the world. // current actual state of the world.
GetAttachedVolumes() []AttachedVolume GetAllVolumes() []AttachedVolume
// GetAttachedVolumes generates and returns a list of volumes attached to // GetAttachedVolumes generates and returns a list of volumes that added to
// the specified node reflecting which volumes are/or might be attached to that node
// based on the current actual state of the world. This function is currently used by
// attach_detach_controller to process VolumeInUse
GetAllVolumesForNode(nodeName types.NodeName) []AttachedVolume
// GetAttachedVolumesPerNode generates and returns a map of nodes and volumes that added to
// the specified node reflecting which volumes are attached to that node // the specified node reflecting which volumes are attached to that node
// based on the current actual state of the world. // based on the current actual state of the world. This function is currently used by
GetAttachedVolumesForNode(nodeName types.NodeName) []AttachedVolume // reconciler to verify whether the volume is still attached to the node.
GetAttachedVolumesPerNode() map[types.NodeName][]operationexecutor.AttachedVolume GetAttachedVolumesPerNode() map[types.NodeName][]operationexecutor.AttachedVolume
// GetNodesForVolume returns the nodes on which the volume is attached // GetNodesForVolume returns the nodes on which the volume is attached.
// This function is used by reconciler for mutli-attach check.
GetNodesForVolume(volumeName v1.UniqueVolumeName) []types.NodeName GetNodesForVolume(volumeName v1.UniqueVolumeName) []types.NodeName
// GetVolumesToReportAttached returns a map containing the set of nodes for // GetVolumesToReportAttached returns a map containing the set of nodes for
@ -185,7 +191,7 @@ type attachedVolume struct {
spec *volume.Spec spec *volume.Spec
// nodesAttachedTo is a map containing the set of nodes this volume has // nodesAttachedTo is a map containing the set of nodes this volume has
// successfully been attached to. The key in this map is the name of the // been trying to be attached to. The key in this map is the name of the
// node and the value is a node object containing more information about // node and the value is a node object containing more information about
// the node. // the node.
nodesAttachedTo map[types.NodeName]nodeAttachedTo nodesAttachedTo map[types.NodeName]nodeAttachedTo
@ -194,7 +200,8 @@ type attachedVolume struct {
devicePath string devicePath string
} }
// The nodeAttachedTo object represents a node that has volumes attached to it. // The nodeAttachedTo object represents a node that has volumes attached to it
// or trying to attach to it.
type nodeAttachedTo struct { type nodeAttachedTo struct {
// nodeName contains the name of this node. // nodeName contains the name of this node.
nodeName types.NodeName nodeName types.NodeName
@ -203,11 +210,8 @@ type nodeAttachedTo struct {
// node and is unsafe to detach // node and is unsafe to detach
mountedByNode bool mountedByNode bool
// number of times SetVolumeMountedByNode has been called to set the value // attached indicates that the volume is confirmed to be attached to this node
// of mountedByNode to true. This is used to prevent mountedByNode from attached bool
// being reset during the period between attach and mount when volumesInUse
// status for the node may not be set.
mountedByNodeSetCount uint
// detachRequestedTime used to capture the desire to detach this volume // detachRequestedTime used to capture the desire to detach this volume
detachRequestedTime time.Time detachRequestedTime time.Time
@ -235,9 +239,16 @@ type nodeToUpdateStatusFor struct {
volumesToReportAsAttached map[v1.UniqueVolumeName]v1.UniqueVolumeName volumesToReportAsAttached map[v1.UniqueVolumeName]v1.UniqueVolumeName
} }
func (asw *actualStateOfWorld) MarkVolumeAsUncertain(
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error {
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, "", false)
return err
}
func (asw *actualStateOfWorld) MarkVolumeAsAttached( func (asw *actualStateOfWorld) MarkVolumeAsAttached(
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error { uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error {
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath) _, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath, true)
return err return err
} }
@ -261,12 +272,12 @@ func (asw *actualStateOfWorld) AddVolumeToReportAsAttached(
} }
func (asw *actualStateOfWorld) AddVolumeNode( func (asw *actualStateOfWorld) AddVolumeNode(
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error) { uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, isAttached bool) (v1.UniqueVolumeName, error) {
asw.Lock() asw.Lock()
defer asw.Unlock() defer asw.Unlock()
var volumeName v1.UniqueVolumeName volumeName := uniqueName
if volumeSpec != nil { if volumeName == "" {
attachableVolumePlugin, err := asw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec) attachableVolumePlugin, err := asw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
if err != nil || attachableVolumePlugin == nil { if err != nil || attachableVolumePlugin == nil {
return "", fmt.Errorf( return "", fmt.Errorf(
@ -283,12 +294,6 @@ func (asw *actualStateOfWorld) AddVolumeNode(
volumeSpec.Name(), volumeSpec.Name(),
err) err)
} }
} else {
// volumeSpec is nil
// This happens only on controller startup when reading the volumes from node
// status; if the pods using the volume have been removed and are unreachable
// the volumes should be detached immediately and the spec is not needed
volumeName = uniqueName
} }
volumeObj, volumeExists := asw.attachedVolumes[volumeName] volumeObj, volumeExists := asw.attachedVolumes[volumeName]
@ -311,22 +316,26 @@ func (asw *actualStateOfWorld) AddVolumeNode(
} }
asw.attachedVolumes[volumeName] = volumeObj asw.attachedVolumes[volumeName] = volumeObj
_, nodeExists := volumeObj.nodesAttachedTo[nodeName] node, nodeExists := volumeObj.nodesAttachedTo[nodeName]
if !nodeExists { if !nodeExists {
// Create object if it doesn't exist. // Create object if it doesn't exist.
volumeObj.nodesAttachedTo[nodeName] = nodeAttachedTo{ node = nodeAttachedTo{
nodeName: nodeName, nodeName: nodeName,
mountedByNode: true, // Assume mounted, until proven otherwise mountedByNode: true, // Assume mounted, until proven otherwise
mountedByNodeSetCount: 0, attached: isAttached,
detachRequestedTime: time.Time{}, detachRequestedTime: time.Time{},
} }
} else { } else {
node.attached = isAttached
klog.V(5).Infof("Volume %q is already added to attachedVolume list to the node %q", klog.V(5).Infof("Volume %q is already added to attachedVolume list to the node %q",
volumeName, volumeName,
nodeName) nodeName)
} }
volumeObj.nodesAttachedTo[nodeName] = node
if isAttached {
asw.addVolumeToReportAsAttached(volumeName, nodeName) asw.addVolumeToReportAsAttached(volumeName, nodeName)
}
return volumeName, nil return volumeName, nil
} }
@ -340,11 +349,6 @@ func (asw *actualStateOfWorld) SetVolumeMountedByNode(
return fmt.Errorf("Failed to SetVolumeMountedByNode with error: %v", err) return fmt.Errorf("Failed to SetVolumeMountedByNode with error: %v", err)
} }
if mounted {
// Increment set count
nodeObj.mountedByNodeSetCount = nodeObj.mountedByNodeSetCount + 1
}
nodeObj.mountedByNode = mounted nodeObj.mountedByNode = mounted
volumeObj.nodesAttachedTo[nodeName] = nodeObj volumeObj.nodesAttachedTo[nodeName] = nodeObj
klog.V(4).Infof("SetVolumeMountedByNode volume %v to the node %q mounted %t", klog.V(4).Infof("SetVolumeMountedByNode volume %v to the node %q mounted %t",
@ -515,22 +519,24 @@ func (asw *actualStateOfWorld) DeleteVolumeNode(
asw.removeVolumeFromReportAsAttached(volumeName, nodeName) asw.removeVolumeFromReportAsAttached(volumeName, nodeName)
} }
func (asw *actualStateOfWorld) VolumeNodeExists( func (asw *actualStateOfWorld) IsVolumeAttachedToNode(
volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool { volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool {
asw.RLock() asw.RLock()
defer asw.RUnlock() defer asw.RUnlock()
volumeObj, volumeExists := asw.attachedVolumes[volumeName] volumeObj, volumeExists := asw.attachedVolumes[volumeName]
if volumeExists { if volumeExists {
if _, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists { if node, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
if node.attached == true {
return true return true
} }
} }
}
return false return false
} }
func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume { func (asw *actualStateOfWorld) GetAllVolumes() []AttachedVolume {
asw.RLock() asw.RLock()
defer asw.RUnlock() defer asw.RUnlock()
@ -546,7 +552,7 @@ func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
return attachedVolumes return attachedVolumes
} }
func (asw *actualStateOfWorld) GetAttachedVolumesForNode( func (asw *actualStateOfWorld) GetAllVolumesForNode(
nodeName types.NodeName) []AttachedVolume { nodeName types.NodeName) []AttachedVolume {
asw.RLock() asw.RLock()
defer asw.RUnlock() defer asw.RUnlock()
@ -574,11 +580,13 @@ func (asw *actualStateOfWorld) GetAttachedVolumesPerNode() map[types.NodeName][]
attachedVolumesPerNode := make(map[types.NodeName][]operationexecutor.AttachedVolume) attachedVolumesPerNode := make(map[types.NodeName][]operationexecutor.AttachedVolume)
for _, volumeObj := range asw.attachedVolumes { for _, volumeObj := range asw.attachedVolumes {
for nodeName, nodeObj := range volumeObj.nodesAttachedTo { for nodeName, nodeObj := range volumeObj.nodesAttachedTo {
if nodeObj.attached {
volumes := attachedVolumesPerNode[nodeName] volumes := attachedVolumesPerNode[nodeName]
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume) volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
attachedVolumesPerNode[nodeName] = volumes attachedVolumesPerNode[nodeName] = volumes
} }
} }
}
return attachedVolumesPerNode return attachedVolumesPerNode
} }
@ -593,9 +601,11 @@ func (asw *actualStateOfWorld) GetNodesForVolume(volumeName v1.UniqueVolumeName)
} }
nodes := []types.NodeName{} nodes := []types.NodeName{}
for k := range volumeObj.nodesAttachedTo { for k, nodesAttached := range volumeObj.nodesAttachedTo {
if nodesAttached.attached {
nodes = append(nodes, k) nodes = append(nodes, k)
} }
}
return nodes return nodes
} }

View File

@ -192,6 +192,8 @@ type ActualStateOfWorldAttacherUpdater interface {
// volumes. See issue 29695. // volumes. See issue 29695.
MarkVolumeAsAttached(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error MarkVolumeAsAttached(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error
MarkVolumeAsUncertain(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error
// Marks the specified volume as detached from the specified node // Marks the specified volume as detached from the specified node
MarkVolumeAsDetached(volumeName v1.UniqueVolumeName, nodeName types.NodeName) MarkVolumeAsDetached(volumeName v1.UniqueVolumeName, nodeName types.NodeName)

View File

@ -323,6 +323,12 @@ func (og *operationGenerator) GenerateAttachVolumeFunc(
} }
} }
addVolumeNodeErr := actualStateOfWorld.MarkVolumeAsUncertain(
v1.UniqueVolumeName(""), volumeToAttach.VolumeSpec, volumeToAttach.NodeName)
if addVolumeNodeErr != nil {
// On failure, return error. Caller will log and retry.
return volumeToAttach.GenerateError("AttachVolume.MarkVolumeAsUncertain failed", addVolumeNodeErr)
}
// On failure, return error. Caller will log and retry. // On failure, return error. Caller will log and retry.
return volumeToAttach.GenerateError("AttachVolume.Attach failed", attachErr) return volumeToAttach.GenerateError("AttachVolume.Attach failed", attachErr)
} }