mirror of https://github.com/k3s-io/k3s
WIP: Handle failed attach operation leave uncertain volume attach state
This PR fixes issue #32727. When an attach operation fails, it is still possible that the volume will be attached to the node later. This PR adds the logic to record the volume to node with attached state no matter whether the operation succedded or not. If the operation fails, mark the attached state to false. If the operation succeeded, mark the attached state to true. The reconciler will still issue attach operation until it returns successfully. If the pod is removed in the mean time, the reconciler will issue detach operations for all the volumes no matter what is the attached state.pull/564/head
parent
50e02fd0cc
commit
47331cf0a2
|
@ -56,7 +56,7 @@ type ActualStateOfWorld interface {
|
||||||
// added.
|
// added.
|
||||||
// If no node with the name nodeName exists in list of attached nodes for
|
// If no node with the name nodeName exists in list of attached nodes for
|
||||||
// the specified volume, the node is added.
|
// the specified volume, the node is added.
|
||||||
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error)
|
AddVolumeNode(uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, attached bool) (v1.UniqueVolumeName, error)
|
||||||
|
|
||||||
// SetVolumeMountedByNode sets the MountedByNode value for the given volume
|
// SetVolumeMountedByNode sets the MountedByNode value for the given volume
|
||||||
// and node. When set to true the mounted parameter indicates the volume
|
// and node. When set to true the mounted parameter indicates the volume
|
||||||
|
@ -97,21 +97,27 @@ type ActualStateOfWorld interface {
|
||||||
// VolumeNodeExists returns true if the specified volume/node combo exists
|
// VolumeNodeExists returns true if the specified volume/node combo exists
|
||||||
// in the underlying store indicating the specified volume is attached to
|
// in the underlying store indicating the specified volume is attached to
|
||||||
// the specified node.
|
// the specified node.
|
||||||
VolumeNodeExists(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
|
IsVolumeAttachedToNode(volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool
|
||||||
|
|
||||||
// GetAttachedVolumes generates and returns a list of volumes/node pairs
|
// GetAttachedVolumes generates and returns a list of volumes/node pairs
|
||||||
// reflecting which volumes are attached to which nodes based on the
|
// reflecting which volumes might attached to which nodes based on the
|
||||||
// current actual state of the world.
|
// current actual state of the world.
|
||||||
GetAttachedVolumes() []AttachedVolume
|
GetAllVolumes() []AttachedVolume
|
||||||
|
|
||||||
// GetAttachedVolumes generates and returns a list of volumes attached to
|
// GetAttachedVolumes generates and returns a list of volumes that added to
|
||||||
|
// the specified node reflecting which volumes are/or might be attached to that node
|
||||||
|
// based on the current actual state of the world. This function is currently used by
|
||||||
|
// attach_detach_controller to process VolumeInUse
|
||||||
|
GetAllVolumesForNode(nodeName types.NodeName) []AttachedVolume
|
||||||
|
|
||||||
|
// GetAttachedVolumesPerNode generates and returns a map of nodes and volumes that added to
|
||||||
// the specified node reflecting which volumes are attached to that node
|
// the specified node reflecting which volumes are attached to that node
|
||||||
// based on the current actual state of the world.
|
// based on the current actual state of the world. This function is currently used by
|
||||||
GetAttachedVolumesForNode(nodeName types.NodeName) []AttachedVolume
|
// reconciler to verify whether the volume is still attached to the node.
|
||||||
|
|
||||||
GetAttachedVolumesPerNode() map[types.NodeName][]operationexecutor.AttachedVolume
|
GetAttachedVolumesPerNode() map[types.NodeName][]operationexecutor.AttachedVolume
|
||||||
|
|
||||||
// GetNodesForVolume returns the nodes on which the volume is attached
|
// GetNodesForVolume returns the nodes on which the volume is attached.
|
||||||
|
// This function is used by reconciler for mutli-attach check.
|
||||||
GetNodesForVolume(volumeName v1.UniqueVolumeName) []types.NodeName
|
GetNodesForVolume(volumeName v1.UniqueVolumeName) []types.NodeName
|
||||||
|
|
||||||
// GetVolumesToReportAttached returns a map containing the set of nodes for
|
// GetVolumesToReportAttached returns a map containing the set of nodes for
|
||||||
|
@ -185,7 +191,7 @@ type attachedVolume struct {
|
||||||
spec *volume.Spec
|
spec *volume.Spec
|
||||||
|
|
||||||
// nodesAttachedTo is a map containing the set of nodes this volume has
|
// nodesAttachedTo is a map containing the set of nodes this volume has
|
||||||
// successfully been attached to. The key in this map is the name of the
|
// been trying to be attached to. The key in this map is the name of the
|
||||||
// node and the value is a node object containing more information about
|
// node and the value is a node object containing more information about
|
||||||
// the node.
|
// the node.
|
||||||
nodesAttachedTo map[types.NodeName]nodeAttachedTo
|
nodesAttachedTo map[types.NodeName]nodeAttachedTo
|
||||||
|
@ -194,7 +200,8 @@ type attachedVolume struct {
|
||||||
devicePath string
|
devicePath string
|
||||||
}
|
}
|
||||||
|
|
||||||
// The nodeAttachedTo object represents a node that has volumes attached to it.
|
// The nodeAttachedTo object represents a node that has volumes attached to it
|
||||||
|
// or trying to attach to it.
|
||||||
type nodeAttachedTo struct {
|
type nodeAttachedTo struct {
|
||||||
// nodeName contains the name of this node.
|
// nodeName contains the name of this node.
|
||||||
nodeName types.NodeName
|
nodeName types.NodeName
|
||||||
|
@ -203,11 +210,8 @@ type nodeAttachedTo struct {
|
||||||
// node and is unsafe to detach
|
// node and is unsafe to detach
|
||||||
mountedByNode bool
|
mountedByNode bool
|
||||||
|
|
||||||
// number of times SetVolumeMountedByNode has been called to set the value
|
// attached indicates that the volume is confirmed to be attached to this node
|
||||||
// of mountedByNode to true. This is used to prevent mountedByNode from
|
attached bool
|
||||||
// being reset during the period between attach and mount when volumesInUse
|
|
||||||
// status for the node may not be set.
|
|
||||||
mountedByNodeSetCount uint
|
|
||||||
|
|
||||||
// detachRequestedTime used to capture the desire to detach this volume
|
// detachRequestedTime used to capture the desire to detach this volume
|
||||||
detachRequestedTime time.Time
|
detachRequestedTime time.Time
|
||||||
|
@ -235,9 +239,16 @@ type nodeToUpdateStatusFor struct {
|
||||||
volumesToReportAsAttached map[v1.UniqueVolumeName]v1.UniqueVolumeName
|
volumesToReportAsAttached map[v1.UniqueVolumeName]v1.UniqueVolumeName
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (asw *actualStateOfWorld) MarkVolumeAsUncertain(
|
||||||
|
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error {
|
||||||
|
|
||||||
|
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, "", false)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
func (asw *actualStateOfWorld) MarkVolumeAsAttached(
|
func (asw *actualStateOfWorld) MarkVolumeAsAttached(
|
||||||
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error {
|
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error {
|
||||||
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath)
|
_, err := asw.AddVolumeNode(uniqueName, volumeSpec, nodeName, devicePath, true)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,12 +272,12 @@ func (asw *actualStateOfWorld) AddVolumeToReportAsAttached(
|
||||||
}
|
}
|
||||||
|
|
||||||
func (asw *actualStateOfWorld) AddVolumeNode(
|
func (asw *actualStateOfWorld) AddVolumeNode(
|
||||||
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) (v1.UniqueVolumeName, error) {
|
uniqueName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string, isAttached bool) (v1.UniqueVolumeName, error) {
|
||||||
asw.Lock()
|
asw.Lock()
|
||||||
defer asw.Unlock()
|
defer asw.Unlock()
|
||||||
|
|
||||||
var volumeName v1.UniqueVolumeName
|
volumeName := uniqueName
|
||||||
if volumeSpec != nil {
|
if volumeName == "" {
|
||||||
attachableVolumePlugin, err := asw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
|
attachableVolumePlugin, err := asw.volumePluginMgr.FindAttachablePluginBySpec(volumeSpec)
|
||||||
if err != nil || attachableVolumePlugin == nil {
|
if err != nil || attachableVolumePlugin == nil {
|
||||||
return "", fmt.Errorf(
|
return "", fmt.Errorf(
|
||||||
|
@ -283,12 +294,6 @@ func (asw *actualStateOfWorld) AddVolumeNode(
|
||||||
volumeSpec.Name(),
|
volumeSpec.Name(),
|
||||||
err)
|
err)
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// volumeSpec is nil
|
|
||||||
// This happens only on controller startup when reading the volumes from node
|
|
||||||
// status; if the pods using the volume have been removed and are unreachable
|
|
||||||
// the volumes should be detached immediately and the spec is not needed
|
|
||||||
volumeName = uniqueName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
|
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
|
||||||
|
@ -311,22 +316,26 @@ func (asw *actualStateOfWorld) AddVolumeNode(
|
||||||
}
|
}
|
||||||
asw.attachedVolumes[volumeName] = volumeObj
|
asw.attachedVolumes[volumeName] = volumeObj
|
||||||
|
|
||||||
_, nodeExists := volumeObj.nodesAttachedTo[nodeName]
|
node, nodeExists := volumeObj.nodesAttachedTo[nodeName]
|
||||||
if !nodeExists {
|
if !nodeExists {
|
||||||
// Create object if it doesn't exist.
|
// Create object if it doesn't exist.
|
||||||
volumeObj.nodesAttachedTo[nodeName] = nodeAttachedTo{
|
node = nodeAttachedTo{
|
||||||
nodeName: nodeName,
|
nodeName: nodeName,
|
||||||
mountedByNode: true, // Assume mounted, until proven otherwise
|
mountedByNode: true, // Assume mounted, until proven otherwise
|
||||||
mountedByNodeSetCount: 0,
|
attached: isAttached,
|
||||||
detachRequestedTime: time.Time{},
|
detachRequestedTime: time.Time{},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
node.attached = isAttached
|
||||||
klog.V(5).Infof("Volume %q is already added to attachedVolume list to the node %q",
|
klog.V(5).Infof("Volume %q is already added to attachedVolume list to the node %q",
|
||||||
volumeName,
|
volumeName,
|
||||||
nodeName)
|
nodeName)
|
||||||
}
|
}
|
||||||
|
volumeObj.nodesAttachedTo[nodeName] = node
|
||||||
|
|
||||||
|
if isAttached {
|
||||||
asw.addVolumeToReportAsAttached(volumeName, nodeName)
|
asw.addVolumeToReportAsAttached(volumeName, nodeName)
|
||||||
|
}
|
||||||
return volumeName, nil
|
return volumeName, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -340,11 +349,6 @@ func (asw *actualStateOfWorld) SetVolumeMountedByNode(
|
||||||
return fmt.Errorf("Failed to SetVolumeMountedByNode with error: %v", err)
|
return fmt.Errorf("Failed to SetVolumeMountedByNode with error: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if mounted {
|
|
||||||
// Increment set count
|
|
||||||
nodeObj.mountedByNodeSetCount = nodeObj.mountedByNodeSetCount + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
nodeObj.mountedByNode = mounted
|
nodeObj.mountedByNode = mounted
|
||||||
volumeObj.nodesAttachedTo[nodeName] = nodeObj
|
volumeObj.nodesAttachedTo[nodeName] = nodeObj
|
||||||
klog.V(4).Infof("SetVolumeMountedByNode volume %v to the node %q mounted %t",
|
klog.V(4).Infof("SetVolumeMountedByNode volume %v to the node %q mounted %t",
|
||||||
|
@ -515,22 +519,24 @@ func (asw *actualStateOfWorld) DeleteVolumeNode(
|
||||||
asw.removeVolumeFromReportAsAttached(volumeName, nodeName)
|
asw.removeVolumeFromReportAsAttached(volumeName, nodeName)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (asw *actualStateOfWorld) VolumeNodeExists(
|
func (asw *actualStateOfWorld) IsVolumeAttachedToNode(
|
||||||
volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool {
|
volumeName v1.UniqueVolumeName, nodeName types.NodeName) bool {
|
||||||
asw.RLock()
|
asw.RLock()
|
||||||
defer asw.RUnlock()
|
defer asw.RUnlock()
|
||||||
|
|
||||||
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
|
volumeObj, volumeExists := asw.attachedVolumes[volumeName]
|
||||||
if volumeExists {
|
if volumeExists {
|
||||||
if _, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
|
if node, nodeExists := volumeObj.nodesAttachedTo[nodeName]; nodeExists {
|
||||||
|
if node.attached == true {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
|
func (asw *actualStateOfWorld) GetAllVolumes() []AttachedVolume {
|
||||||
asw.RLock()
|
asw.RLock()
|
||||||
defer asw.RUnlock()
|
defer asw.RUnlock()
|
||||||
|
|
||||||
|
@ -546,7 +552,7 @@ func (asw *actualStateOfWorld) GetAttachedVolumes() []AttachedVolume {
|
||||||
return attachedVolumes
|
return attachedVolumes
|
||||||
}
|
}
|
||||||
|
|
||||||
func (asw *actualStateOfWorld) GetAttachedVolumesForNode(
|
func (asw *actualStateOfWorld) GetAllVolumesForNode(
|
||||||
nodeName types.NodeName) []AttachedVolume {
|
nodeName types.NodeName) []AttachedVolume {
|
||||||
asw.RLock()
|
asw.RLock()
|
||||||
defer asw.RUnlock()
|
defer asw.RUnlock()
|
||||||
|
@ -574,11 +580,13 @@ func (asw *actualStateOfWorld) GetAttachedVolumesPerNode() map[types.NodeName][]
|
||||||
attachedVolumesPerNode := make(map[types.NodeName][]operationexecutor.AttachedVolume)
|
attachedVolumesPerNode := make(map[types.NodeName][]operationexecutor.AttachedVolume)
|
||||||
for _, volumeObj := range asw.attachedVolumes {
|
for _, volumeObj := range asw.attachedVolumes {
|
||||||
for nodeName, nodeObj := range volumeObj.nodesAttachedTo {
|
for nodeName, nodeObj := range volumeObj.nodesAttachedTo {
|
||||||
|
if nodeObj.attached {
|
||||||
volumes := attachedVolumesPerNode[nodeName]
|
volumes := attachedVolumesPerNode[nodeName]
|
||||||
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
|
volumes = append(volumes, getAttachedVolume(&volumeObj, &nodeObj).AttachedVolume)
|
||||||
attachedVolumesPerNode[nodeName] = volumes
|
attachedVolumesPerNode[nodeName] = volumes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return attachedVolumesPerNode
|
return attachedVolumesPerNode
|
||||||
}
|
}
|
||||||
|
@ -593,9 +601,11 @@ func (asw *actualStateOfWorld) GetNodesForVolume(volumeName v1.UniqueVolumeName)
|
||||||
}
|
}
|
||||||
|
|
||||||
nodes := []types.NodeName{}
|
nodes := []types.NodeName{}
|
||||||
for k := range volumeObj.nodesAttachedTo {
|
for k, nodesAttached := range volumeObj.nodesAttachedTo {
|
||||||
|
if nodesAttached.attached {
|
||||||
nodes = append(nodes, k)
|
nodes = append(nodes, k)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return nodes
|
return nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -192,6 +192,8 @@ type ActualStateOfWorldAttacherUpdater interface {
|
||||||
// volumes. See issue 29695.
|
// volumes. See issue 29695.
|
||||||
MarkVolumeAsAttached(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error
|
MarkVolumeAsAttached(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName, devicePath string) error
|
||||||
|
|
||||||
|
MarkVolumeAsUncertain(volumeName v1.UniqueVolumeName, volumeSpec *volume.Spec, nodeName types.NodeName) error
|
||||||
|
|
||||||
// Marks the specified volume as detached from the specified node
|
// Marks the specified volume as detached from the specified node
|
||||||
MarkVolumeAsDetached(volumeName v1.UniqueVolumeName, nodeName types.NodeName)
|
MarkVolumeAsDetached(volumeName v1.UniqueVolumeName, nodeName types.NodeName)
|
||||||
|
|
||||||
|
|
|
@ -323,6 +323,12 @@ func (og *operationGenerator) GenerateAttachVolumeFunc(
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
addVolumeNodeErr := actualStateOfWorld.MarkVolumeAsUncertain(
|
||||||
|
v1.UniqueVolumeName(""), volumeToAttach.VolumeSpec, volumeToAttach.NodeName)
|
||||||
|
if addVolumeNodeErr != nil {
|
||||||
|
// On failure, return error. Caller will log and retry.
|
||||||
|
return volumeToAttach.GenerateError("AttachVolume.MarkVolumeAsUncertain failed", addVolumeNodeErr)
|
||||||
|
}
|
||||||
// On failure, return error. Caller will log and retry.
|
// On failure, return error. Caller will log and retry.
|
||||||
return volumeToAttach.GenerateError("AttachVolume.Attach failed", attachErr)
|
return volumeToAttach.GenerateError("AttachVolume.Attach failed", attachErr)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue