WIP: Handle failed attach operation leave uncertain volume attach state

This PR fixes issue #32727.

When an attach operation fails, it is still possible that the volume
will be attached to the node later. This PR adds the logic to record the
volume to node with attached state no matter whether the operation
succedded or not. If the operation fails, mark the attached state to
false. If the operation succeeded, mark the attached state to true. The
reconciler will still issue attach operation until it returns
successfully. If the pod is removed in the mean time, the reconciler
will issue detach operations for all the volumes no matter what is the
attached state.
pull/564/head
Jing Xu 2018-10-02 13:24:28 -07:00
parent 50e02fd0cc
commit 47331cf0a2
3 changed files with 67 additions and 49 deletions

View File

@ -56,7 +56,7 @@ type ActualStateOfWorld interface {
// added.
// If no node with the name nodeName exists in list of attached nodes for
// the specified volume, the node is added.
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error)
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, attached bool) (v1.UniqueVolumeName, error)
// SetVolumeMountedByNode sets the MountedByNode value for the given volume
// and node. When set to true the mounted parameter indicates the volume
@ -97,21 +97,27 @@ type ActualStateOfWorld interface {
// VolumeNodeExists returns true if the specified volume/node combo exists
// in the underlying store indicating the specified volume is attached to
// the specified node.
VolumeNodeExists(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
IsVolumeAttachedToNode(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
// GetAttachedVolumes generates and returns a list of volumes/node pairs
// reflecting which volumes are attached to which nodes based on the
// reflecting which volumes might attached to which nodes based on the
// current actual state of the world.
GetAttachedVolumes() []AttachedVolume
GetAllVolumes() []AttachedVolume
// GetAttachedVolumes generates and returns a list of volumes attached to
// GetAttachedVolumes generates and returns a list of volumes that added to
// the specified node reflecting which volumes are/or might be attached to that node
// based on the current actual state of the world. This function is currently used by
// attach_detach_controller to process VolumeInUse
GetAllVolumesForNode(nodeName types.NodeName) []AttachedVolume
// GetAttachedVolumesPerNode generates and returns a map of nodes and volumes that added to
// the specified node reflecting which volumes are attached to that node
// based on the current actual state of the world.
GetAttachedVolumesForNode(nodeName types.NodeName) []AttachedVolume
// based on the current actual state of the world. This function is currently used by
// reconciler to verify whether the volume is still attached to the node.
GetAttachedVolumesPerNode() map[types.NodeName][]operationexecutor.AttachedVolume
// GetNodesForVolume returns the nodes on which the volume is attached
// GetNodesForVolume returns the nodes on which the volume is attached.
// This function is used by reconciler for mutli-attach check.
GetNodesForVolume(volumeName v1.UniqueVolumeName) []types.NodeName
// GetVolumesToReportAttached returns a map containing the set of nodes for
@ -185,7 +191,7 @@ type attachedVolume struct {
spec *volume.Spec
// nodesAttachedTo is a map containing the set of nodes this volume has
// successfully been attached to. The key in this map is the name of the
// been trying to be attached to. The key in this map is the name of the
// node and the value is a node object containing more information about
// the node.
nodesAttachedTo map[types.NodeName]nodeAttachedTo
@ -194,7 +200,8 @@ type attachedVolume struct {
devicePath string
}
// The nodeAttachedTo object represents a node that has volumes attached to it.
// The nodeAttachedTo object represents a node that has volumes attached to it
// or trying to attach to it.
type nodeAttachedTo struct {
// nodeName contains the name of this node.
nodeName types.NodeName
@ -203,11 +210,8 @@ type nodeAttachedTo struct {
// node and is unsafe to detach
mountedByNode bool
// number of times SetVolumeMountedByNode has been called to set the value
// of mountedByNode to true. This is used to prevent mountedByNode from
// being reset during the period between attach and mount when volumesInUse
// status for the node may not be set.
mountedByNodeSetCount uint
// attached indicates that the volume is confirmed to be attached to this node
attached bool
// detachRequestedTime used to capture the desire to detach this volume
detachRequestedTime time.Time
@ -235,9 +239,16 @@ type nodeToUpdateStatusFor struct {
volumesToReportAsAttached map[v1.UniqueVolumeName]v1.UniqueVolumeName
}
func (asw *actualStateOfWorld) MarkVolumeAsUncertain(
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error {
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, "", false)
return err
}
func (asw *actualStateOfWorld) MarkVolumeAsAttached(
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error {
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath)
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath, true)
return err
}
@ -261,12 +272,12 @@ func (asw *actualStateOfWorld) AddVolumeToReportAsAttached(
}
func (asw *actualStateOfWorld) AddVolumeNode(
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error) {
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, isAttached bool) (v1.UniqueVolumeName, error) {
asw.Lock()
defer asw.Unlock()
var volumeName v1.UniqueVolumeName
if volumeSpec != nil {
volumeName := uniqueName
if volumeName == "" {
attachableVolumePlugin, err := asw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
if err != nil || attachableVolumePlugin == nil {
return "", fmt.Errorf(
@ -283,12 +294,6 @@ func (asw *actualStateOfWorld) AddVolumeNode(
volumeSpec.Name(),
err)
}
} else {
// volumeSpec is nil
// This happens only on controller startup when reading the volumes from node
// status; if the pods using the volume have been removed and are unreachable
// the volumes should be detached immediately and the spec is not needed
volumeName = uniqueName
}
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
@ -311,22 +316,26 @@ func (asw *actualStateOfWorld) AddVolumeNode(
}
asw.attachedVolumes[volumeName] = volumeObj
_, nodeExists := volumeObj.nodesAttachedTo[nodeName]
node, nodeExists := volumeObj.nodesAttachedTo[nodeName]
if !nodeExists {
// Create object if it doesn't exist.
volumeObj.nodesAttachedTo[nodeName] = nodeAttachedTo{
nodeName: nodeName,
mountedByNode: true, // Assume mounted, until proven otherwise
mountedByNodeSetCount: 0,
detachRequestedTime: time.Time{},
node = nodeAttachedTo{
nodeName: nodeName,
mountedByNode: true, // Assume mounted, until proven otherwise
attached: isAttached,
detachRequestedTime: time.Time{},
}
} else {
node.attached = isAttached
klog.V(5).Infof("Volume %q is already added to attachedVolume list to the node %q",
volumeName,
nodeName)
}
asw.addVolumeToReportAsAttached(volumeName, nodeName)
volumeObj.nodesAttachedTo[nodeName] = node
if isAttached {
asw.addVolumeToReportAsAttached(volumeName, nodeName)
}
return volumeName, nil
}
@ -340,11 +349,6 @@ func (asw *actualStateOfWorld) SetVolumeMountedByNode(
return fmt.Errorf("Failed to SetVolumeMountedByNode with error: %v", err)
}
if mounted {
// Increment set count
nodeObj.mountedByNodeSetCount = nodeObj.mountedByNodeSetCount + 1
}
nodeObj.mountedByNode = mounted
volumeObj.nodesAttachedTo[nodeName] = nodeObj
klog.V(4).Infof("SetVolumeMountedByNode volume %v to the node %q mounted %t",
@ -515,22 +519,24 @@ func (asw *actualStateOfWorld) DeleteVolumeNode(
asw.removeVolumeFromReportAsAttached(volumeName, nodeName)
}
func (asw *actualStateOfWorld) VolumeNodeExists(
func (asw *actualStateOfWorld) IsVolumeAttachedToNode(
volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool {
asw.RLock()
defer asw.RUnlock()
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
if volumeExists {
if _, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
return true
if node, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
if node.attached == true {
return true
}
}
}
return false
}
func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
func (asw *actualStateOfWorld) GetAllVolumes() []AttachedVolume {
asw.RLock()
defer asw.RUnlock()
@ -546,7 +552,7 @@ func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
return attachedVolumes
}
func (asw *actualStateOfWorld) GetAttachedVolumesForNode(
func (asw *actualStateOfWorld) GetAllVolumesForNode(
nodeName types.NodeName) []AttachedVolume {
asw.RLock()
defer asw.RUnlock()
@ -574,9 +580,11 @@ func (asw *actualStateOfWorld) GetAttachedVolumesPerNode() map[types.NodeName][]
attachedVolumesPerNode := make(map[types.NodeName][]operationexecutor.AttachedVolume)
for _, volumeObj := range asw.attachedVolumes {
for nodeName, nodeObj := range volumeObj.nodesAttachedTo {
volumes := attachedVolumesPerNode[nodeName]
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
attachedVolumesPerNode[nodeName] = volumes
if nodeObj.attached {
volumes := attachedVolumesPerNode[nodeName]
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
attachedVolumesPerNode[nodeName] = volumes
}
}
}
@ -593,8 +601,10 @@ func (asw *actualStateOfWorld) GetNodesForVolume(volumeName v1.UniqueVolumeName)
}
nodes := []types.NodeName{}
for k := range volumeObj.nodesAttachedTo {
nodes = append(nodes, k)
for k, nodesAttached := range volumeObj.nodesAttachedTo {
if nodesAttached.attached {
nodes = append(nodes, k)
}
}
return nodes
}

View File

@ -192,6 +192,8 @@ type ActualStateOfWorldAttacherUpdater interface {
// volumes. See issue 29695.
MarkVolumeAsAttached(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error
MarkVolumeAsUncertain(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error
// Marks the specified volume as detached from the specified node
MarkVolumeAsDetached(volumeName v1.UniqueVolumeName, nodeName types.NodeName)

View File

@ -323,6 +323,12 @@ func (og *operationGenerator) GenerateAttachVolumeFunc(
}
}
addVolumeNodeErr := actualStateOfWorld.MarkVolumeAsUncertain(
v1.UniqueVolumeName(""), volumeToAttach.VolumeSpec, volumeToAttach.NodeName)
if addVolumeNodeErr != nil {
// On failure, return error. Caller will log and retry.
return volumeToAttach.GenerateError("AttachVolume.MarkVolumeAsUncertain failed", addVolumeNodeErr)
}
// On failure, return error. Caller will log and retry.
return volumeToAttach.GenerateError("AttachVolume.Attach failed", attachErr)
}