2016-04-30 06:36:27 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2016 The Kubernetes Authors .
2016-04-30 06:36:27 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
// Package reconciler implements interfaces that attempt to reconcile the
// desired state of the with the actual state of the world by triggering
// actions.
package reconciler
import (
2017-05-07 02:16:19 +00:00
"fmt"
2016-04-30 06:36:27 +00:00
"time"
"github.com/golang/glog"
2017-01-11 14:09:48 +00:00
"k8s.io/apimachinery/pkg/util/wait"
2017-05-19 02:17:59 +00:00
"k8s.io/client-go/tools/record"
2017-01-25 17:40:41 +00:00
"k8s.io/kubernetes/pkg/api/v1"
2016-07-02 01:50:25 +00:00
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache"
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
2017-05-19 02:17:59 +00:00
kevents "k8s.io/kubernetes/pkg/kubelet/events"
2016-07-14 05:38:54 +00:00
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
2017-01-25 17:40:41 +00:00
"k8s.io/kubernetes/pkg/volume"
2016-05-30 02:22:22 +00:00
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
2016-04-30 06:36:27 +00:00
)
// Reconciler runs a periodic loop to reconcile the desired state of the with
// the actual state of the world by triggering attach detach operations.
2016-05-30 02:22:22 +00:00
// Note: This is distinct from the Reconciler implemented by the kubelet volume
// manager. This reconciles state for the attach/detach controller. That
// reconciles state for the kubelet volume manager.
2016-04-30 06:36:27 +00:00
type Reconciler interface {
2016-05-23 20:37:30 +00:00
// Starts running the reconciliation loop which executes periodically, checks
2016-04-30 06:36:27 +00:00
// if volumes that should be attached are attached and volumes that should
// be detached are detached. If not, it will trigger attach/detach
// operations to rectify.
Run ( stopCh <- chan struct { } )
}
// NewReconciler returns a new instance of Reconciler that waits loopPeriod
// between successive executions.
2016-05-23 20:37:30 +00:00
// loopPeriod is the amount of time the reconciler loop waits between
2016-04-30 06:36:27 +00:00
// successive executions.
2016-05-23 20:37:30 +00:00
// maxWaitForUnmountDuration is the max amount of time the reconciler will wait
// for the volume to be safely unmounted, after this it will detach the volume
// anyway (to handle crashed/unavailable nodes). If during this time the volume
// becomes used by a new pod, the detach request will be aborted and the timer
// cleared.
2016-04-30 06:36:27 +00:00
func NewReconciler (
loopPeriod time . Duration ,
2016-05-23 20:37:30 +00:00
maxWaitForUnmountDuration time . Duration ,
2016-10-14 21:21:58 +00:00
syncDuration time . Duration ,
2017-01-10 00:20:19 +00:00
disableReconciliationSync bool ,
2016-04-30 06:36:27 +00:00
desiredStateOfWorld cache . DesiredStateOfWorld ,
actualStateOfWorld cache . ActualStateOfWorld ,
2016-06-16 06:48:04 +00:00
attacherDetacher operationexecutor . OperationExecutor ,
2017-05-19 02:17:59 +00:00
nodeStatusUpdater statusupdater . NodeStatusUpdater ,
recorder record . EventRecorder ) Reconciler {
2016-04-30 06:36:27 +00:00
return & reconciler {
2016-05-23 20:37:30 +00:00
loopPeriod : loopPeriod ,
maxWaitForUnmountDuration : maxWaitForUnmountDuration ,
2016-10-14 21:21:58 +00:00
syncDuration : syncDuration ,
2017-01-10 00:20:19 +00:00
disableReconciliationSync : disableReconciliationSync ,
2016-05-23 20:37:30 +00:00
desiredStateOfWorld : desiredStateOfWorld ,
actualStateOfWorld : actualStateOfWorld ,
attacherDetacher : attacherDetacher ,
2016-06-16 06:48:04 +00:00
nodeStatusUpdater : nodeStatusUpdater ,
2016-10-14 21:21:58 +00:00
timeOfLastSync : time . Now ( ) ,
2017-05-19 02:17:59 +00:00
recorder : recorder ,
2016-04-30 06:36:27 +00:00
}
}
type reconciler struct {
2016-05-23 20:37:30 +00:00
loopPeriod time . Duration
maxWaitForUnmountDuration time . Duration
2016-10-14 21:21:58 +00:00
syncDuration time . Duration
2016-05-23 20:37:30 +00:00
desiredStateOfWorld cache . DesiredStateOfWorld
actualStateOfWorld cache . ActualStateOfWorld
2016-05-30 02:22:22 +00:00
attacherDetacher operationexecutor . OperationExecutor
2016-06-16 06:48:04 +00:00
nodeStatusUpdater statusupdater . NodeStatusUpdater
2016-10-14 21:21:58 +00:00
timeOfLastSync time . Time
2017-01-10 00:20:19 +00:00
disableReconciliationSync bool
2017-05-19 02:17:59 +00:00
recorder record . EventRecorder
2016-04-30 06:36:27 +00:00
}
func ( rc * reconciler ) Run ( stopCh <- chan struct { } ) {
wait . Until ( rc . reconciliationLoopFunc ( ) , rc . loopPeriod , stopCh )
}
2017-01-06 22:24:51 +00:00
// reconciliationLoopFunc this can be disabled via cli option disableReconciliation.
// It periodically checks whether the attached volumes from actual state
2017-01-11 08:08:20 +00:00
// are still attached to the node and update the status if they are not.
2016-04-30 06:36:27 +00:00
func ( rc * reconciler ) reconciliationLoopFunc ( ) func ( ) {
return func ( ) {
2017-01-06 22:24:51 +00:00
2016-10-14 21:21:58 +00:00
rc . reconcile ( )
2017-01-06 22:24:51 +00:00
2017-01-10 00:20:19 +00:00
if rc . disableReconciliationSync {
2017-01-06 22:24:51 +00:00
glog . V ( 5 ) . Info ( "Skipping reconciling attached volumes still attached since it is disabled via the command line." )
} else if rc . syncDuration < time . Second {
glog . V ( 5 ) . Info ( "Skipping reconciling attached volumes still attached since it is set to less than one second via the command line." )
} else if time . Since ( rc . timeOfLastSync ) > rc . syncDuration {
glog . V ( 5 ) . Info ( "Starting reconciling attached volumes still attached" )
2016-10-14 21:21:58 +00:00
rc . sync ( )
}
}
}
2016-09-07 22:30:16 +00:00
2016-10-14 21:21:58 +00:00
func ( rc * reconciler ) sync ( ) {
defer rc . updateSyncTime ( )
rc . syncStates ( )
}
2016-06-22 04:47:52 +00:00
2016-10-14 21:21:58 +00:00
func ( rc * reconciler ) updateSyncTime ( ) {
rc . timeOfLastSync = time . Now ( )
}
2016-06-22 04:47:52 +00:00
2016-10-14 21:21:58 +00:00
func ( rc * reconciler ) syncStates ( ) {
volumesPerNode := rc . actualStateOfWorld . GetAttachedVolumesPerNode ( )
2017-02-13 04:40:30 +00:00
rc . attacherDetacher . VerifyVolumesAreAttached ( volumesPerNode , rc . actualStateOfWorld )
2016-10-14 21:21:58 +00:00
}
2017-01-25 17:40:41 +00:00
// isMultiAttachForbidden checks if attaching this volume to multiple nodes is definitely not allowed/possible.
// In its current form, this function can only reliably say for which volumes it's definitely forbidden. If it returns
// false, it is not guaranteed that multi-attach is actually supported by the volume type and we must rely on the
// attacher to fail fast in such cases.
// Please see https://github.com/kubernetes/kubernetes/issues/40669 and https://github.com/kubernetes/kubernetes/pull/40148#discussion_r98055047
func ( rc * reconciler ) isMultiAttachForbidden ( volumeSpec * volume . Spec ) bool {
if volumeSpec . Volume != nil {
// Check for volume types which are known to fail slow or cause trouble when trying to multi-attach
if volumeSpec . Volume . AzureDisk != nil ||
volumeSpec . Volume . Cinder != nil {
return true
}
}
// Only if this volume is a persistent volume, we have reliable information on wether it's allowed or not to
// multi-attach. We trust in the individual volume implementations to not allow unsupported access modes
if volumeSpec . PersistentVolume != nil {
if len ( volumeSpec . PersistentVolume . Spec . AccessModes ) == 0 {
// No access mode specified so we don't know for sure. Let the attacher fail if needed
return false
}
// check if this volume is allowed to be attached to multiple PODs/nodes, if yes, return false
for _ , accessMode := range volumeSpec . PersistentVolume . Spec . AccessModes {
if accessMode == v1 . ReadWriteMany || accessMode == v1 . ReadOnlyMany {
return false
}
}
return true
}
// we don't know if it's supported or not and let the attacher fail later in cases it's not supported
return false
}
2016-10-14 21:21:58 +00:00
func ( rc * reconciler ) reconcile ( ) {
// Detaches are triggered before attaches so that volumes referenced by
// pods that are rescheduled to a different node are detached first.
// Ensure volumes that should be detached are detached.
for _ , attachedVolume := range rc . actualStateOfWorld . GetAttachedVolumes ( ) {
if ! rc . desiredStateOfWorld . VolumeExists (
attachedVolume . VolumeName , attachedVolume . NodeName ) {
2017-01-25 17:40:41 +00:00
// Don't even try to start an operation if there is already one running
// This check must be done before we do any other checks, as otherwise the other checks
// may pass while at the same time the volume leaves the pending state, resulting in
// double detach attempts
if rc . attacherDetacher . IsOperationPending ( attachedVolume . VolumeName , "" ) {
glog . V ( 10 ) . Infof ( "Operation for volume %q is already running. Can't start detach for %q" , attachedVolume . VolumeName , attachedVolume . NodeName )
continue
}
2016-10-14 21:21:58 +00:00
// Set the detach request time
elapsedTime , err := rc . actualStateOfWorld . SetDetachRequestTime ( attachedVolume . VolumeName , attachedVolume . NodeName )
if err != nil {
glog . Errorf ( "Cannot trigger detach because it fails to set detach request time with error %v" , err )
continue
}
// Check whether timeout has reached the maximum waiting time
timeout := elapsedTime > rc . maxWaitForUnmountDuration
// Check whether volume is still mounted. Skip detach if it is still mounted unless timeout
if attachedVolume . MountedByNode && ! timeout {
2017-05-07 02:16:19 +00:00
glog . V ( 12 ) . Infof ( attachedVolume . GenerateMsgDetailed ( "Cannot detach volume because it is still mounted" , "" ) )
2016-10-14 21:21:58 +00:00
continue
}
// Before triggering volume detach, mark volume as detached and update the node status
// If it fails to update node status, skip detach volume
2017-03-01 07:29:44 +00:00
err = rc . actualStateOfWorld . RemoveVolumeFromReportAsAttached ( attachedVolume . VolumeName , attachedVolume . NodeName )
if err != nil {
glog . V ( 5 ) . Infof ( "RemoveVolumeFromReportAsAttached failed while removing volume %q from node %q with: %v" ,
attachedVolume . VolumeName ,
attachedVolume . NodeName ,
err )
}
2016-10-14 21:21:58 +00:00
// Update Node Status to indicate volume is no longer safe to mount.
err = rc . nodeStatusUpdater . UpdateNodeStatuses ( )
if err != nil {
// Skip detaching this volume if unable to update node status
2017-05-07 02:16:19 +00:00
glog . Errorf ( attachedVolume . GenerateErrorDetailed ( "UpdateNodeStatuses failed while attempting to report volume as attached" , err ) . Error ( ) )
2016-10-14 21:21:58 +00:00
continue
}
// Trigger detach volume which requires verifing safe to detach step
// If timeout is true, skip verifySafeToDetach check
2017-05-07 02:16:19 +00:00
glog . V ( 5 ) . Infof ( attachedVolume . GenerateMsgDetailed ( "Starting attacherDetacher.DetachVolume" , "" ) )
2016-10-14 21:21:58 +00:00
verifySafeToDetach := ! timeout
err = rc . attacherDetacher . DetachVolume ( attachedVolume . AttachedVolume , verifySafeToDetach , rc . actualStateOfWorld )
if err == nil {
if ! timeout {
2017-05-07 02:16:19 +00:00
glog . Infof ( attachedVolume . GenerateMsgDetailed ( "attacherDetacher.DetachVolume started" , "" ) )
2016-10-14 21:21:58 +00:00
} else {
2017-02-28 15:43:06 +00:00
glog . Warningf ( attachedVolume . GenerateMsgDetailed ( "attacherDetacher.DetachVolume started" , fmt . Sprintf ( "This volume is not safe to detach, but maxWaitForUnmountDuration %v expired, force detaching" , rc . maxWaitForUnmountDuration ) ) )
2016-09-07 22:30:16 +00:00
}
2016-04-30 06:36:27 +00:00
}
2017-01-25 17:40:41 +00:00
if err != nil && ! exponentialbackoff . IsExponentialBackoff ( err ) {
// Ignore exponentialbackoff.IsExponentialBackoff errors, they are expected.
2016-10-14 21:21:58 +00:00
// Log all other errors.
2017-05-07 02:16:19 +00:00
glog . Errorf ( attachedVolume . GenerateErrorDetailed ( "attacherDetacher.DetachVolume failed to start" , err ) . Error ( ) )
2016-10-14 21:21:58 +00:00
}
2016-04-30 06:36:27 +00:00
}
2016-10-14 21:21:58 +00:00
}
2016-05-23 20:37:30 +00:00
2016-10-14 21:21:58 +00:00
// Ensure volumes that should be attached are attached.
for _ , volumeToAttach := range rc . desiredStateOfWorld . GetVolumesToAttach ( ) {
if rc . actualStateOfWorld . VolumeNodeExists (
volumeToAttach . VolumeName , volumeToAttach . NodeName ) {
// Volume/Node exists, touch it to reset detachRequestedTime
2017-05-07 02:16:19 +00:00
glog . V ( 5 ) . Infof ( volumeToAttach . GenerateMsgDetailed ( "Volume attached--touching" , "" ) )
2016-10-14 21:21:58 +00:00
rc . actualStateOfWorld . ResetDetachRequestTime ( volumeToAttach . VolumeName , volumeToAttach . NodeName )
} else {
2017-01-25 17:40:41 +00:00
// Don't even try to start an operation if there is already one running
if rc . attacherDetacher . IsOperationPending ( volumeToAttach . VolumeName , "" ) {
glog . V ( 10 ) . Infof ( "Operation for volume %q is already running. Can't start attach for %q" , volumeToAttach . VolumeName , volumeToAttach . NodeName )
continue
}
if rc . isMultiAttachForbidden ( volumeToAttach . VolumeSpec ) {
nodes := rc . actualStateOfWorld . GetNodesForVolume ( volumeToAttach . VolumeName )
if len ( nodes ) > 0 {
2017-05-19 02:17:59 +00:00
if ! volumeToAttach . MultiAttachErrorReported {
simpleMsg , detailedMsg := volumeToAttach . GenerateMsg ( "Multi-Attach error" , "Volume is already exclusively attached to one node and can't be attached to another" )
for _ , pod := range volumeToAttach . ScheduledPods {
rc . recorder . Eventf ( pod , v1 . EventTypeWarning , kevents . FailedAttachVolume , simpleMsg )
}
volumeToAttach . MultiAttachErrorReported = true
glog . Warningf ( detailedMsg )
}
2017-01-25 17:40:41 +00:00
continue
}
}
2016-10-14 21:21:58 +00:00
// Volume/Node doesn't exist, spawn a goroutine to attach it
2017-05-07 02:16:19 +00:00
glog . V ( 5 ) . Infof ( volumeToAttach . GenerateMsgDetailed ( "Starting attacherDetacher.AttachVolume" , "" ) )
2016-10-14 21:21:58 +00:00
err := rc . attacherDetacher . AttachVolume ( volumeToAttach . VolumeToAttach , rc . actualStateOfWorld )
if err == nil {
2017-05-07 02:16:19 +00:00
glog . Infof ( volumeToAttach . GenerateMsgDetailed ( "attacherDetacher.AttachVolume started" , "" ) )
2016-10-14 21:21:58 +00:00
}
2017-01-25 17:40:41 +00:00
if err != nil && ! exponentialbackoff . IsExponentialBackoff ( err ) {
// Ignore exponentialbackoff.IsExponentialBackoff errors, they are expected.
2016-10-14 21:21:58 +00:00
// Log all other errors.
2017-05-07 02:16:19 +00:00
glog . Errorf ( volumeToAttach . GenerateErrorDetailed ( "attacherDetacher.AttachVolume failed to start" , err ) . Error ( ) )
2016-05-23 20:37:30 +00:00
}
}
2016-10-14 21:21:58 +00:00
}
2016-06-16 06:48:04 +00:00
2016-10-14 21:21:58 +00:00
// Update Node Status
err := rc . nodeStatusUpdater . UpdateNodeStatuses ( )
if err != nil {
2017-02-28 15:43:06 +00:00
glog . Warningf ( "UpdateNodeStatuses failed with: %v" , err )
2016-04-30 06:36:27 +00:00
}
}