mirror of https://github.com/k3s-io/k3s
WIP: Handle failed attach operation leave uncertain volume attach state
This PR fixes issue #32727. When an attach operation fails, it is still possible that the volume will be attached to the node later. This PR adds the logic to record the volume to node with attached state no matter whether the operation succedded or not. If the operation fails, mark the attached state to false. If the operation succeeded, mark the attached state to true. The reconciler will still issue attach operation until it returns successfully. If the pod is removed in the mean time, the reconciler will issue detach operations for all the volumes no matter what is the attached state.pull/564/head
parent
50e02fd0cc
commit
47331cf0a2
|
@ -56,7 +56,7 @@ type ActualStateOfWorld interface {
|
|||
// added.
|
||||
// If no node with the name nodeName exists in list of attached nodes for
|
||||
// the specified volume, the node is added.
|
||||
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error)
|
||||
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, attached bool) (v1.UniqueVolumeName, error)
|
||||
|
||||
// SetVolumeMountedByNode sets the MountedByNode value for the given volume
|
||||
// and node. When set to true the mounted parameter indicates the volume
|
||||
|
@ -97,21 +97,27 @@ type ActualStateOfWorld interface {
|
|||
// VolumeNodeExists returns true if the specified volume/node combo exists
|
||||
// in the underlying store indicating the specified volume is attached to
|
||||
// the specified node.
|
||||
VolumeNodeExists(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
|
||||
IsVolumeAttachedToNode(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
|
||||
|
||||
// GetAttachedVolumes generates and returns a list of volumes/node pairs
|
||||
// reflecting which volumes are attached to which nodes based on the
|
||||
// reflecting which volumes might attached to which nodes based on the
|
||||
// current actual state of the world.
|
||||
GetAttachedVolumes() []AttachedVolume
|
||||
GetAllVolumes() []AttachedVolume
|
||||
|
||||
// GetAttachedVolumes generates and returns a list of volumes attached to
|
||||
// GetAttachedVolumes generates and returns a list of volumes that added to
|
||||
// the specified node reflecting which volumes are/or might be attached to that node
|
||||
// based on the current actual state of the world. This function is currently used by
|
||||
// attach_detach_controller to process VolumeInUse
|
||||
GetAllVolumesForNode(nodeName types.NodeName) []AttachedVolume
|
||||
|
||||
// GetAttachedVolumesPerNode generates and returns a map of nodes and volumes that added to
|
||||
// the specified node reflecting which volumes are attached to that node
|
||||
// based on the current actual state of the world.
|
||||
GetAttachedVolumesForNode(nodeName types.NodeName) []AttachedVolume
|
||||
|
||||
// based on the current actual state of the world. This function is currently used by
|
||||
// reconciler to verify whether the volume is still attached to the node.
|
||||
GetAttachedVolumesPerNode() map[types.NodeName][]operationexecutor.AttachedVolume
|
||||
|
||||
// GetNodesForVolume returns the nodes on which the volume is attached
|
||||
// GetNodesForVolume returns the nodes on which the volume is attached.
|
||||
// This function is used by reconciler for mutli-attach check.
|
||||
GetNodesForVolume(volumeName v1.UniqueVolumeName) []types.NodeName
|
||||
|
||||
// GetVolumesToReportAttached returns a map containing the set of nodes for
|
||||
|
@ -185,7 +191,7 @@ type attachedVolume struct {
|
|||
spec *volume.Spec
|
||||
|
||||
// nodesAttachedTo is a map containing the set of nodes this volume has
|
||||
// successfully been attached to. The key in this map is the name of the
|
||||
// been trying to be attached to. The key in this map is the name of the
|
||||
// node and the value is a node object containing more information about
|
||||
// the node.
|
||||
nodesAttachedTo map[types.NodeName]nodeAttachedTo
|
||||
|
@ -194,7 +200,8 @@ type attachedVolume struct {
|
|||
devicePath string
|
||||
}
|
||||
|
||||
// The nodeAttachedTo object represents a node that has volumes attached to it.
|
||||
// The nodeAttachedTo object represents a node that has volumes attached to it
|
||||
// or trying to attach to it.
|
||||
type nodeAttachedTo struct {
|
||||
// nodeName contains the name of this node.
|
||||
nodeName types.NodeName
|
||||
|
@ -203,11 +210,8 @@ type nodeAttachedTo struct {
|
|||
// node and is unsafe to detach
|
||||
mountedByNode bool
|
||||
|
||||
// number of times SetVolumeMountedByNode has been called to set the value
|
||||
// of mountedByNode to true. This is used to prevent mountedByNode from
|
||||
// being reset during the period between attach and mount when volumesInUse
|
||||
// status for the node may not be set.
|
||||
mountedByNodeSetCount uint
|
||||
// attached indicates that the volume is confirmed to be attached to this node
|
||||
attached bool
|
||||
|
||||
// detachRequestedTime used to capture the desire to detach this volume
|
||||
detachRequestedTime time.Time
|
||||
|
@ -235,9 +239,16 @@ type nodeToUpdateStatusFor struct {
|
|||
volumesToReportAsAttached map[v1.UniqueVolumeName]v1.UniqueVolumeName
|
||||
}
|
||||
|
||||
func (asw *actualStateOfWorld) MarkVolumeAsUncertain(
|
||||
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error {
|
||||
|
||||
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, "", false)
|
||||
return err
|
||||
}
|
||||
|
||||
func (asw *actualStateOfWorld) MarkVolumeAsAttached(
|
||||
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error {
|
||||
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath)
|
||||
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath, true)
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -261,12 +272,12 @@ func (asw *actualStateOfWorld) AddVolumeToReportAsAttached(
|
|||
}
|
||||
|
||||
func (asw *actualStateOfWorld) AddVolumeNode(
|
||||
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error) {
|
||||
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, isAttached bool) (v1.UniqueVolumeName, error) {
|
||||
asw.Lock()
|
||||
defer asw.Unlock()
|
||||
|
||||
var volumeName v1.UniqueVolumeName
|
||||
if volumeSpec != nil {
|
||||
volumeName := uniqueName
|
||||
if volumeName == "" {
|
||||
attachableVolumePlugin, err := asw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
|
||||
if err != nil || attachableVolumePlugin == nil {
|
||||
return "", fmt.Errorf(
|
||||
|
@ -283,12 +294,6 @@ func (asw *actualStateOfWorld) AddVolumeNode(
|
|||
volumeSpec.Name(),
|
||||
err)
|
||||
}
|
||||
} else {
|
||||
// volumeSpec is nil
|
||||
// This happens only on controller startup when reading the volumes from node
|
||||
// status; if the pods using the volume have been removed and are unreachable
|
||||
// the volumes should be detached immediately and the spec is not needed
|
||||
volumeName = uniqueName
|
||||
}
|
||||
|
||||
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
|
||||
|
@ -311,22 +316,26 @@ func (asw *actualStateOfWorld) AddVolumeNode(
|
|||
}
|
||||
asw.attachedVolumes[volumeName] = volumeObj
|
||||
|
||||
_, nodeExists := volumeObj.nodesAttachedTo[nodeName]
|
||||
node, nodeExists := volumeObj.nodesAttachedTo[nodeName]
|
||||
if !nodeExists {
|
||||
// Create object if it doesn't exist.
|
||||
volumeObj.nodesAttachedTo[nodeName] = nodeAttachedTo{
|
||||
nodeName: nodeName,
|
||||
mountedByNode: true, // Assume mounted, until proven otherwise
|
||||
mountedByNodeSetCount: 0,
|
||||
detachRequestedTime: time.Time{},
|
||||
node = nodeAttachedTo{
|
||||
nodeName: nodeName,
|
||||
mountedByNode: true, // Assume mounted, until proven otherwise
|
||||
attached: isAttached,
|
||||
detachRequestedTime: time.Time{},
|
||||
}
|
||||
} else {
|
||||
node.attached = isAttached
|
||||
klog.V(5).Infof("Volume %q is already added to attachedVolume list to the node %q",
|
||||
volumeName,
|
||||
nodeName)
|
||||
}
|
||||
|
||||
asw.addVolumeToReportAsAttached(volumeName, nodeName)
|
||||
volumeObj.nodesAttachedTo[nodeName] = node
|
||||
|
||||
if isAttached {
|
||||
asw.addVolumeToReportAsAttached(volumeName, nodeName)
|
||||
}
|
||||
return volumeName, nil
|
||||
}
|
||||
|
||||
|
@ -340,11 +349,6 @@ func (asw *actualStateOfWorld) SetVolumeMountedByNode(
|
|||
return fmt.Errorf("Failed to SetVolumeMountedByNode with error: %v", err)
|
||||
}
|
||||
|
||||
if mounted {
|
||||
// Increment set count
|
||||
nodeObj.mountedByNodeSetCount = nodeObj.mountedByNodeSetCount + 1
|
||||
}
|
||||
|
||||
nodeObj.mountedByNode = mounted
|
||||
volumeObj.nodesAttachedTo[nodeName] = nodeObj
|
||||
klog.V(4).Infof("SetVolumeMountedByNode volume %v to the node %q mounted %t",
|
||||
|
@ -515,22 +519,24 @@ func (asw *actualStateOfWorld) DeleteVolumeNode(
|
|||
asw.removeVolumeFromReportAsAttached(volumeName, nodeName)
|
||||
}
|
||||
|
||||
func (asw *actualStateOfWorld) VolumeNodeExists(
|
||||
func (asw *actualStateOfWorld) IsVolumeAttachedToNode(
|
||||
volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool {
|
||||
asw.RLock()
|
||||
defer asw.RUnlock()
|
||||
|
||||
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
|
||||
if volumeExists {
|
||||
if _, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
|
||||
return true
|
||||
if node, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
|
||||
if node.attached == true {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
|
||||
func (asw *actualStateOfWorld) GetAllVolumes() []AttachedVolume {
|
||||
asw.RLock()
|
||||
defer asw.RUnlock()
|
||||
|
||||
|
@ -546,7 +552,7 @@ func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
|
|||
return attachedVolumes
|
||||
}
|
||||
|
||||
func (asw *actualStateOfWorld) GetAttachedVolumesForNode(
|
||||
func (asw *actualStateOfWorld) GetAllVolumesForNode(
|
||||
nodeName types.NodeName) []AttachedVolume {
|
||||
asw.RLock()
|
||||
defer asw.RUnlock()
|
||||
|
@ -574,9 +580,11 @@ func (asw *actualStateOfWorld) GetAttachedVolumesPerNode() map[types.NodeName][]
|
|||
attachedVolumesPerNode := make(map[types.NodeName][]operationexecutor.AttachedVolume)
|
||||
for _, volumeObj := range asw.attachedVolumes {
|
||||
for nodeName, nodeObj := range volumeObj.nodesAttachedTo {
|
||||
volumes := attachedVolumesPerNode[nodeName]
|
||||
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
|
||||
attachedVolumesPerNode[nodeName] = volumes
|
||||
if nodeObj.attached {
|
||||
volumes := attachedVolumesPerNode[nodeName]
|
||||
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
|
||||
attachedVolumesPerNode[nodeName] = volumes
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -593,8 +601,10 @@ func (asw *actualStateOfWorld) GetNodesForVolume(volumeName v1.UniqueVolumeName)
|
|||
}
|
||||
|
||||
nodes := []types.NodeName{}
|
||||
for k := range volumeObj.nodesAttachedTo {
|
||||
nodes = append(nodes, k)
|
||||
for k, nodesAttached := range volumeObj.nodesAttachedTo {
|
||||
if nodesAttached.attached {
|
||||
nodes = append(nodes, k)
|
||||
}
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
|
|
@ -192,6 +192,8 @@ type ActualStateOfWorldAttacherUpdater interface {
|
|||
// volumes. See issue 29695.
|
||||
MarkVolumeAsAttached(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error
|
||||
|
||||
MarkVolumeAsUncertain(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error
|
||||
|
||||
// Marks the specified volume as detached from the specified node
|
||||
MarkVolumeAsDetached(volumeName v1.UniqueVolumeName, nodeName types.NodeName)
|
||||
|
||||
|
|
|
@ -323,6 +323,12 @@ func (og *operationGenerator) GenerateAttachVolumeFunc(
|
|||
}
|
||||
|
||||
}
|
||||
addVolumeNodeErr := actualStateOfWorld.MarkVolumeAsUncertain(
|
||||
v1.UniqueVolumeName(""), volumeToAttach.VolumeSpec, volumeToAttach.NodeName)
|
||||
if addVolumeNodeErr != nil {
|
||||
// On failure, return error. Caller will log and retry.
|
||||
return volumeToAttach.GenerateError("AttachVolume.MarkVolumeAsUncertain failed", addVolumeNodeErr)
|
||||
}
|
||||
// On failure, return error. Caller will log and retry.
|
||||
return volumeToAttach.GenerateError("AttachVolume.Attach failed", attachErr)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue