2015-10-31 00:16:57 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2015 The Kubernetes Authors .
2015-10-31 00:16:57 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2018-10-05 11:06:12 +00:00
package drain
2015-10-31 00:16:57 +00:00
import (
2016-01-27 18:27:14 +00:00
"errors"
2015-10-31 00:16:57 +00:00
"fmt"
2016-10-20 16:43:48 +00:00
"math"
2015-10-31 00:16:57 +00:00
"strings"
2016-10-13 21:34:51 +00:00
"time"
2015-10-31 00:16:57 +00:00
"github.com/spf13/cobra"
2017-07-06 03:23:30 +00:00
corev1 "k8s.io/api/core/v1"
2017-10-23 10:25:13 +00:00
policyv1beta1 "k8s.io/api/policy/v1beta1"
2017-01-13 17:48:50 +00:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-01-11 14:09:48 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2017-01-19 14:50:16 +00:00
"k8s.io/apimachinery/pkg/fields"
2017-12-05 21:36:58 +00:00
"k8s.io/apimachinery/pkg/labels"
2017-06-30 12:58:57 +00:00
"k8s.io/apimachinery/pkg/types"
2018-06-25 15:09:12 +00:00
utilerrors "k8s.io/apimachinery/pkg/util/errors"
2017-09-22 17:55:54 +00:00
"k8s.io/apimachinery/pkg/util/json"
"k8s.io/apimachinery/pkg/util/sets"
2017-06-30 12:58:57 +00:00
"k8s.io/apimachinery/pkg/util/strategicpatch"
2017-01-11 14:09:48 +00:00
"k8s.io/apimachinery/pkg/util/wait"
2017-10-23 10:25:13 +00:00
"k8s.io/client-go/kubernetes"
2017-01-19 18:27:59 +00:00
restclient "k8s.io/client-go/rest"
2017-06-30 12:58:57 +00:00
2018-05-01 17:02:44 +00:00
"k8s.io/apimachinery/pkg/runtime/schema"
2018-08-21 10:46:39 +00:00
"k8s.io/cli-runtime/pkg/genericclioptions"
"k8s.io/cli-runtime/pkg/genericclioptions/printers"
"k8s.io/cli-runtime/pkg/genericclioptions/resource"
2015-10-31 00:16:57 +00:00
cmdutil "k8s.io/kubernetes/pkg/kubectl/cmd/util"
2018-05-02 19:15:47 +00:00
"k8s.io/kubernetes/pkg/kubectl/scheme"
2017-07-07 04:04:11 +00:00
"k8s.io/kubernetes/pkg/kubectl/util/i18n"
2018-10-10 18:29:30 +00:00
"k8s.io/kubernetes/pkg/kubectl/util/templates"
2015-10-31 00:16:57 +00:00
)
type DrainOptions struct {
2018-05-02 19:15:47 +00:00
PrintFlags * genericclioptions . PrintFlags
2018-04-19 00:02:37 +00:00
ToPrinter func ( string ) ( printers . ResourcePrinterFunc , error )
Namespace string
2017-10-23 10:25:13 +00:00
client kubernetes . Interface
2016-09-07 20:29:57 +00:00
restClient * restclient . RESTClient
2015-10-31 00:16:57 +00:00
Force bool
2017-09-13 19:20:54 +00:00
DryRun bool
2015-10-31 00:16:57 +00:00
GracePeriodSeconds int
2016-01-27 18:27:14 +00:00
IgnoreDaemonsets bool
2016-10-13 21:34:51 +00:00
Timeout time . Duration
2016-06-01 21:50:13 +00:00
DeleteLocalData bool
2017-09-22 17:55:54 +00:00
Selector string
2017-12-05 21:36:58 +00:00
PodSelector string
2017-09-22 17:55:54 +00:00
nodeInfos [ ] * resource . Info
2018-04-19 00:02:37 +00:00
genericclioptions . IOStreams
2015-10-31 00:16:57 +00:00
}
2016-06-01 21:50:13 +00:00
// Takes a pod and returns a bool indicating whether or not to operate on the
// pod, an optional warning message, and an optional fatal error.
2017-10-23 10:25:13 +00:00
type podFilter func ( corev1 . Pod ) ( include bool , w * warning , f * fatal )
2016-06-01 21:50:13 +00:00
type warning struct {
string
}
type fatal struct {
string
}
2015-10-31 00:16:57 +00:00
const (
2016-10-20 16:43:48 +00:00
EvictionKind = "Eviction"
EvictionSubresource = "pods/eviction"
2018-10-30 10:35:24 +00:00
daemonsetFatal = "DaemonSet-managed Pods (use --ignore-daemonsets to ignore)"
daemonsetWarning = "ignoring DaemonSet-managed Pods"
localStorageFatal = "Pods with local storage (use --delete-local-data to override)"
localStorageWarning = "deleting Pods with local storage"
unmanagedFatal = "Pods not managed by ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet (use --force to override)"
unmanagedWarning = "deleting Pods not managed by ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet"
2016-05-20 17:49:56 +00:00
)
var (
2018-10-30 10:35:24 +00:00
cordonLong = templates . LongDesc ( i18n . T ( `
2017-03-15 03:49:10 +00:00
Mark node as unschedulable . ` ) )
2016-10-07 22:24:42 +00:00
2018-10-30 10:35:24 +00:00
cordonExample = templates . Examples ( i18n . T ( `
2016-05-20 17:49:56 +00:00
# Mark node "foo" as unschedulable .
2017-03-15 03:49:10 +00:00
kubectl cordon foo ` ) )
2015-10-31 00:16:57 +00:00
)
2018-04-19 00:02:37 +00:00
func NewCmdCordon ( f cmdutil . Factory , ioStreams genericclioptions . IOStreams ) * cobra . Command {
2018-10-30 10:35:24 +00:00
o := NewDrainOptions ( f , ioStreams )
2015-10-31 00:16:57 +00:00
2016-03-10 01:27:19 +00:00
cmd := & cobra . Command {
2018-10-05 19:59:38 +00:00
Use : "cordon NODE" ,
2017-10-11 06:26:02 +00:00
DisableFlagsInUseLine : true ,
2018-10-05 19:59:38 +00:00
Short : i18n . T ( "Mark node as unschedulable" ) ,
2018-10-30 10:35:24 +00:00
Long : cordonLong ,
Example : cordonExample ,
2015-10-31 00:16:57 +00:00
Run : func ( cmd * cobra . Command , args [ ] string ) {
2018-10-30 10:35:24 +00:00
cmdutil . CheckErr ( o . Complete ( f , cmd , args ) )
cmdutil . CheckErr ( o . RunCordonOrUncordon ( true ) )
2015-10-31 00:16:57 +00:00
} ,
}
2018-10-30 10:35:24 +00:00
cmd . Flags ( ) . StringVarP ( & o . Selector , "selector" , "l" , o . Selector , "Selector (label query) to filter on" )
2017-09-13 19:20:54 +00:00
cmdutil . AddDryRunFlag ( cmd )
2016-03-10 01:27:19 +00:00
return cmd
2015-10-31 00:16:57 +00:00
}
2016-05-20 17:49:56 +00:00
var (
2018-10-30 10:35:24 +00:00
uncordonLong = templates . LongDesc ( i18n . T ( `
2017-03-15 03:49:10 +00:00
Mark node as schedulable . ` ) )
2016-10-07 22:24:42 +00:00
2018-10-30 10:35:24 +00:00
uncordonExample = templates . Examples ( i18n . T ( `
2016-05-20 17:49:56 +00:00
# Mark node "foo" as schedulable .
2017-03-15 03:49:10 +00:00
$ kubectl uncordon foo ` ) )
2015-10-31 00:16:57 +00:00
)
2018-04-19 00:02:37 +00:00
func NewCmdUncordon ( f cmdutil . Factory , ioStreams genericclioptions . IOStreams ) * cobra . Command {
2018-10-30 10:35:24 +00:00
o := NewDrainOptions ( f , ioStreams )
2015-10-31 00:16:57 +00:00
2016-03-10 01:27:19 +00:00
cmd := & cobra . Command {
2018-10-05 19:59:38 +00:00
Use : "uncordon NODE" ,
2017-10-11 06:26:02 +00:00
DisableFlagsInUseLine : true ,
2018-10-05 19:59:38 +00:00
Short : i18n . T ( "Mark node as schedulable" ) ,
2018-10-30 10:35:24 +00:00
Long : uncordonLong ,
Example : uncordonExample ,
2015-10-31 00:16:57 +00:00
Run : func ( cmd * cobra . Command , args [ ] string ) {
2018-10-30 10:35:24 +00:00
cmdutil . CheckErr ( o . Complete ( f , cmd , args ) )
cmdutil . CheckErr ( o . RunCordonOrUncordon ( false ) )
2015-10-31 00:16:57 +00:00
} ,
}
2018-10-30 10:35:24 +00:00
cmd . Flags ( ) . StringVarP ( & o . Selector , "selector" , "l" , o . Selector , "Selector (label query) to filter on" )
2017-09-13 19:20:54 +00:00
cmdutil . AddDryRunFlag ( cmd )
2016-03-10 01:27:19 +00:00
return cmd
2015-10-31 00:16:57 +00:00
}
2016-05-20 17:49:56 +00:00
var (
2018-10-30 10:35:24 +00:00
drainLong = templates . LongDesc ( i18n . T ( `
2016-05-20 17:49:56 +00:00
Drain node in preparation for maintenance .
The given node will be marked unschedulable to prevent new pods from arriving .
2019-02-22 01:43:51 +00:00
' drain ' evicts the pods if the APIServer supports
2018-10-26 10:32:36 +00:00
[ eviction ] ( http : //kubernetes.io/docs/admin/disruptions/). Otherwise, it will use normal
DELETE to delete the pods .
2016-11-22 22:51:03 +00:00
The ' drain ' evicts or deletes all pods except mirror pods ( which cannot be deleted through
2016-05-20 17:49:56 +00:00
the API server ) . If there are DaemonSet - managed pods , drain will not proceed
without -- ignore - daemonsets , and regardless it will not delete any
DaemonSet - managed pods , because those pods would be immediately replaced by the
DaemonSet controller , which ignores unschedulable markings . If there are any
2016-09-21 04:26:04 +00:00
pods that are neither mirror pods nor managed by ReplicationController ,
2016-11-22 22:51:03 +00:00
ReplicaSet , DaemonSet , StatefulSet or Job , then drain will not delete any pods unless you
2017-02-22 01:07:42 +00:00
use -- force . -- force will also allow deletion to proceed if the managing resource of one
or more pods is missing .
2016-05-20 17:49:56 +00:00
2016-11-22 22:51:03 +00:00
' drain ' waits for graceful termination . You should not operate on the machine until
the command completes .
2016-05-20 17:49:56 +00:00
When you are ready to put the node back into service , use kubectl uncordon , which
will make the node schedulable again .
2016-05-17 11:59:43 +00:00
2017-03-15 03:49:10 +00:00
! [ Workflow ] ( http : //kubernetes.io/images/docs/kubectl_drain.svg)`))
2016-05-20 17:49:56 +00:00
2018-10-30 10:35:24 +00:00
drainExample = templates . Examples ( i18n . T ( `
2016-11-22 22:51:03 +00:00
# Drain node "foo" , even if there are pods not managed by a ReplicationController , ReplicaSet , Job , DaemonSet or StatefulSet on it .
2016-05-20 17:49:56 +00:00
$ kubectl drain foo -- force
2016-11-22 22:51:03 +00:00
# As above , but abort if there are pods not managed by a ReplicationController , ReplicaSet , Job , DaemonSet or StatefulSet , and use a grace period of 15 minutes .
2017-03-15 03:49:10 +00:00
$ kubectl drain foo -- grace - period = 900 ` ) )
2015-10-31 00:16:57 +00:00
)
2018-04-19 00:02:37 +00:00
func NewDrainOptions ( f cmdutil . Factory , ioStreams genericclioptions . IOStreams ) * DrainOptions {
2018-02-24 07:55:55 +00:00
return & DrainOptions {
2018-05-02 19:15:47 +00:00
PrintFlags : genericclioptions . NewPrintFlags ( "drained" ) . WithTypeSetter ( scheme . Scheme ) ,
2018-04-19 00:02:37 +00:00
IOStreams : ioStreams ,
2018-02-24 07:55:55 +00:00
GracePeriodSeconds : - 1 ,
}
}
2018-04-19 00:02:37 +00:00
func NewCmdDrain ( f cmdutil . Factory , ioStreams genericclioptions . IOStreams ) * cobra . Command {
2018-10-30 10:35:24 +00:00
o := NewDrainOptions ( f , ioStreams )
2015-10-31 00:16:57 +00:00
cmd := & cobra . Command {
2018-10-05 19:59:38 +00:00
Use : "drain NODE" ,
2017-10-11 06:26:02 +00:00
DisableFlagsInUseLine : true ,
2018-10-05 19:59:38 +00:00
Short : i18n . T ( "Drain node in preparation for maintenance" ) ,
2018-10-30 10:35:24 +00:00
Long : drainLong ,
Example : drainExample ,
2015-10-31 00:16:57 +00:00
Run : func ( cmd * cobra . Command , args [ ] string ) {
2018-10-30 10:35:24 +00:00
cmdutil . CheckErr ( o . Complete ( f , cmd , args ) )
cmdutil . CheckErr ( o . RunDrain ( ) )
2015-10-31 00:16:57 +00:00
} ,
}
2018-10-30 10:35:24 +00:00
cmd . Flags ( ) . BoolVar ( & o . Force , "force" , o . Force , "Continue even if there are pods not managed by a ReplicationController, ReplicaSet, Job, DaemonSet or StatefulSet." )
cmd . Flags ( ) . BoolVar ( & o . IgnoreDaemonsets , "ignore-daemonsets" , o . IgnoreDaemonsets , "Ignore DaemonSet-managed pods." )
cmd . Flags ( ) . BoolVar ( & o . DeleteLocalData , "delete-local-data" , o . DeleteLocalData , "Continue even if there are pods using emptyDir (local data that will be deleted when the node is drained)." )
cmd . Flags ( ) . IntVar ( & o . GracePeriodSeconds , "grace-period" , o . GracePeriodSeconds , "Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used." )
cmd . Flags ( ) . DurationVar ( & o . Timeout , "timeout" , o . Timeout , "The length of time to wait before giving up, zero means infinite" )
cmd . Flags ( ) . StringVarP ( & o . Selector , "selector" , "l" , o . Selector , "Selector (label query) to filter on" )
cmd . Flags ( ) . StringVarP ( & o . PodSelector , "pod-selector" , "" , o . PodSelector , "Label selector to filter pods on the node" )
2017-12-05 21:36:58 +00:00
2017-09-13 19:20:54 +00:00
cmdutil . AddDryRunFlag ( cmd )
2015-10-31 00:16:57 +00:00
return cmd
}
2018-04-19 00:02:37 +00:00
// Complete populates some fields from the factory, grabs command line
2015-10-31 00:16:57 +00:00
// arguments and looks up the node using Builder
2018-04-19 00:02:37 +00:00
func ( o * DrainOptions ) Complete ( f cmdutil . Factory , cmd * cobra . Command , args [ ] string ) error {
2015-10-31 00:16:57 +00:00
var err error
2017-09-22 17:55:54 +00:00
if len ( args ) == 0 && ! cmd . Flags ( ) . Changed ( "selector" ) {
return cmdutil . UsageErrorf ( cmd , fmt . Sprintf ( "USAGE: %s [flags]" , cmd . Use ) )
}
if len ( args ) > 0 && len ( o . Selector ) > 0 {
return cmdutil . UsageErrorf ( cmd , "error: cannot specify both a node name and a --selector option" )
}
2015-10-31 00:16:57 +00:00
2017-12-18 11:28:55 +00:00
o . DryRun = cmdutil . GetDryRunFlag ( cmd )
2017-09-13 19:20:54 +00:00
2018-04-19 00:02:37 +00:00
if o . client , err = f . KubernetesClientSet ( ) ; err != nil {
2016-09-07 20:29:57 +00:00
return err
}
2017-12-05 21:36:58 +00:00
if len ( o . PodSelector ) > 0 {
if _ , err := labels . Parse ( o . PodSelector ) ; err != nil {
return errors . New ( "--pod-selector=<pod_selector> must be a valid label selector" )
}
}
2018-04-19 00:02:37 +00:00
o . restClient , err = f . RESTClient ( )
2016-09-07 20:29:57 +00:00
if err != nil {
2015-10-31 00:16:57 +00:00
return err
}
2017-09-22 17:55:54 +00:00
o . nodeInfos = [ ] * resource . Info { }
2015-10-31 00:16:57 +00:00
2018-05-24 13:33:36 +00:00
o . Namespace , _ , err = f . ToRawKubeConfigLoader ( ) . Namespace ( )
2015-10-31 00:16:57 +00:00
if err != nil {
return err
}
2018-04-19 00:02:37 +00:00
o . ToPrinter = func ( operation string ) ( printers . ResourcePrinterFunc , error ) {
o . PrintFlags . NamePrintFlags . Operation = operation
if o . DryRun {
o . PrintFlags . Complete ( "%s (dry run)" )
}
printer , err := o . PrintFlags . ToPrinter ( )
if err != nil {
return nil , err
}
2018-05-02 19:15:47 +00:00
2018-04-19 00:02:37 +00:00
return printer . PrintObj , nil
}
builder := f . NewBuilder ( ) .
2018-09-18 21:24:20 +00:00
WithScheme ( scheme . Scheme , scheme . Scheme . PrioritizedVersionsAllGroups ( ) ... ) .
2018-04-19 00:02:37 +00:00
NamespaceParam ( o . Namespace ) . DefaultNamespace ( ) .
2017-10-17 17:24:50 +00:00
ResourceNames ( "nodes" , args ... ) .
SingleResourceType ( ) .
Flatten ( )
if len ( o . Selector ) > 0 {
2017-08-04 06:54:17 +00:00
builder = builder . LabelSelectorParam ( o . Selector ) .
2017-10-17 17:24:50 +00:00
ResourceTypes ( "nodes" )
2017-09-22 17:55:54 +00:00
}
2017-10-17 17:24:50 +00:00
r := builder . Do ( )
2015-10-31 00:16:57 +00:00
if err = r . Err ( ) ; err != nil {
return err
}
return r . Visit ( func ( info * resource . Info , err error ) error {
if err != nil {
return err
}
2018-05-01 17:02:44 +00:00
if info . Mapping . Resource . GroupResource ( ) != ( schema . GroupResource { Group : "" , Resource : "nodes" } ) {
2017-10-17 17:24:50 +00:00
return fmt . Errorf ( "error: expected resource of type node, got %q" , info . Mapping . Resource )
}
2017-09-22 17:55:54 +00:00
o . nodeInfos = append ( o . nodeInfos , info )
2015-10-31 00:16:57 +00:00
return nil
} )
}
// RunDrain runs the 'drain' command
func ( o * DrainOptions ) RunDrain ( ) error {
if err := o . RunCordonOrUncordon ( true ) ; err != nil {
return err
}
2018-05-02 19:15:47 +00:00
printObj , err := o . ToPrinter ( "drained" )
2018-04-19 00:02:37 +00:00
if err != nil {
return err
}
2017-09-22 17:55:54 +00:00
drainedNodes := sets . NewString ( )
var fatal error
for _ , info := range o . nodeInfos {
2017-09-13 19:20:54 +00:00
var err error
if ! o . DryRun {
err = o . deleteOrEvictPodsSimple ( info )
}
if err == nil || o . DryRun {
2017-09-22 17:55:54 +00:00
drainedNodes . Insert ( info . Name )
2018-05-02 19:15:47 +00:00
printObj ( info . Object , o . Out )
2017-09-22 17:55:54 +00:00
} else {
fmt . Fprintf ( o . ErrOut , "error: unable to drain node %q, aborting command...\n\n" , info . Name )
remainingNodes := [ ] string { }
fatal = err
for _ , remainingInfo := range o . nodeInfos {
if drainedNodes . Has ( remainingInfo . Name ) {
continue
}
remainingNodes = append ( remainingNodes , remainingInfo . Name )
}
if len ( remainingNodes ) > 0 {
fmt . Fprintf ( o . ErrOut , "There are pending nodes to be drained:\n" )
for _ , nodeName := range remainingNodes {
fmt . Fprintf ( o . ErrOut , " %s\n" , nodeName )
}
}
break
}
2016-10-20 16:43:48 +00:00
}
2017-09-22 17:55:54 +00:00
return fatal
2016-10-20 16:43:48 +00:00
}
2017-09-22 17:55:54 +00:00
func ( o * DrainOptions ) deleteOrEvictPodsSimple ( nodeInfo * resource . Info ) error {
pods , err := o . getPodsForDeletion ( nodeInfo )
2015-10-31 00:16:57 +00:00
if err != nil {
return err
}
2016-11-29 03:18:01 +00:00
2016-10-20 16:43:48 +00:00
err = o . deleteOrEvictPods ( pods )
if err != nil {
2017-09-22 17:55:54 +00:00
pendingPods , newErr := o . getPodsForDeletion ( nodeInfo )
2016-10-20 16:43:48 +00:00
if newErr != nil {
return newErr
}
2017-09-22 17:55:54 +00:00
fmt . Fprintf ( o . ErrOut , "There are pending pods in node %q when an error occurred: %v\n" , nodeInfo . Name , err )
2016-10-20 16:43:48 +00:00
for _ , pendingPod := range pendingPods {
2017-02-26 05:41:39 +00:00
fmt . Fprintf ( o . ErrOut , "%s/%s\n" , "pod" , pendingPod . Name )
2016-10-20 16:43:48 +00:00
}
2015-10-31 00:16:57 +00:00
}
2016-10-20 16:43:48 +00:00
return err
2015-10-31 00:16:57 +00:00
}
2017-12-05 21:58:01 +00:00
func ( o * DrainOptions ) getPodController ( pod corev1 . Pod ) * metav1 . OwnerReference {
return metav1 . GetControllerOf ( & pod )
2016-06-01 21:50:13 +00:00
}
2016-04-28 11:03:42 +00:00
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) unreplicatedFilter ( pod corev1 . Pod ) ( bool , * warning , * fatal ) {
2016-08-31 01:31:42 +00:00
// any finished pod can be removed
2017-10-23 10:25:13 +00:00
if pod . Status . Phase == corev1 . PodSucceeded || pod . Status . Phase == corev1 . PodFailed {
2016-08-31 01:31:42 +00:00
return true , nil , nil
}
2017-12-05 21:58:01 +00:00
controllerRef := o . getPodController ( pod )
2017-06-13 23:46:34 +00:00
if controllerRef != nil {
2016-06-01 21:50:13 +00:00
return true , nil , nil
}
2017-12-05 21:58:01 +00:00
if o . Force {
2018-10-30 10:35:24 +00:00
return true , & warning { unmanagedWarning } , nil
2016-06-01 21:50:13 +00:00
}
2017-12-05 21:58:01 +00:00
2018-10-30 10:35:24 +00:00
return false , nil , & fatal { unmanagedFatal }
2016-06-01 21:50:13 +00:00
}
2016-04-28 11:03:42 +00:00
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) daemonsetFilter ( pod corev1 . Pod ) ( bool , * warning , * fatal ) {
2017-02-22 01:07:42 +00:00
// Note that we return false in cases where the pod is DaemonSet managed,
2018-09-17 14:46:21 +00:00
// regardless of flags.
2017-02-22 01:07:42 +00:00
//
// The exception is for pods that are orphaned (the referencing
// management resource - including DaemonSet - is not found).
// Such pods will be deleted if --force is used.
2017-12-05 21:58:01 +00:00
controllerRef := o . getPodController ( pod )
2017-06-13 23:46:34 +00:00
if controllerRef == nil || controllerRef . Kind != "DaemonSet" {
2016-06-01 21:50:13 +00:00
return true , nil , nil
}
2018-09-17 14:46:21 +00:00
// Any finished pod can be removed.
if pod . Status . Phase == corev1 . PodSucceeded || pod . Status . Phase == corev1 . PodFailed {
return true , nil , nil
}
2017-12-05 21:58:01 +00:00
2018-12-19 16:18:53 +00:00
if _ , err := o . client . AppsV1 ( ) . DaemonSets ( pod . Namespace ) . Get ( controllerRef . Name , metav1 . GetOptions { } ) ; err != nil {
2017-12-05 21:58:01 +00:00
// remove orphaned pods with a warning if --force is used
if apierrors . IsNotFound ( err ) && o . Force {
return true , & warning { err . Error ( ) } , nil
}
2018-09-17 14:46:21 +00:00
2016-06-01 21:50:13 +00:00
return false , nil , & fatal { err . Error ( ) }
2016-04-28 11:03:42 +00:00
}
2017-12-05 21:58:01 +00:00
2016-06-01 21:50:13 +00:00
if ! o . IgnoreDaemonsets {
2018-10-30 10:35:24 +00:00
return false , nil , & fatal { daemonsetFatal }
2016-06-01 21:50:13 +00:00
}
2017-12-05 21:58:01 +00:00
2018-10-30 10:35:24 +00:00
return false , & warning { daemonsetWarning } , nil
2016-06-01 21:50:13 +00:00
}
2016-04-28 11:03:42 +00:00
2017-10-23 10:25:13 +00:00
func mirrorPodFilter ( pod corev1 . Pod ) ( bool , * warning , * fatal ) {
2017-07-06 03:23:30 +00:00
if _ , found := pod . ObjectMeta . Annotations [ corev1 . MirrorPodAnnotationKey ] ; found {
2016-06-01 21:50:13 +00:00
return false , nil , nil
2016-04-28 11:03:42 +00:00
}
2016-06-01 21:50:13 +00:00
return true , nil , nil
}
2017-10-23 10:25:13 +00:00
func hasLocalStorage ( pod corev1 . Pod ) bool {
2016-06-01 21:50:13 +00:00
for _ , volume := range pod . Spec . Volumes {
if volume . EmptyDir != nil {
return true
}
2016-04-28 11:03:42 +00:00
}
2016-06-01 21:50:13 +00:00
return false
}
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) localStorageFilter ( pod corev1 . Pod ) ( bool , * warning , * fatal ) {
2016-06-01 21:50:13 +00:00
if ! hasLocalStorage ( pod ) {
return true , nil , nil
}
2018-09-17 14:46:21 +00:00
// Any finished pod can be removed.
if pod . Status . Phase == corev1 . PodSucceeded || pod . Status . Phase == corev1 . PodFailed {
return true , nil , nil
}
2016-06-01 21:50:13 +00:00
if ! o . DeleteLocalData {
2018-10-30 10:35:24 +00:00
return false , nil , & fatal { localStorageFatal }
2016-06-01 21:50:13 +00:00
}
2018-09-17 14:46:21 +00:00
2018-10-30 10:35:24 +00:00
return true , & warning { localStorageWarning } , nil
2016-04-28 11:03:42 +00:00
}
2016-06-01 21:50:13 +00:00
// Map of status message to a list of pod names having that status.
type podStatuses map [ string ] [ ] string
func ( ps podStatuses ) Message ( ) string {
msgs := [ ] string { }
for key , pods := range ps {
msgs = append ( msgs , fmt . Sprintf ( "%s: %s" , key , strings . Join ( pods , ", " ) ) )
}
return strings . Join ( msgs , "; " )
}
2016-04-28 11:03:42 +00:00
2017-09-22 17:55:54 +00:00
// getPodsForDeletion receives resource info for a node, and returns all the pods from the given node that we
// are planning on deleting. If there are any pods preventing us from deleting, we return that list in an error.
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) getPodsForDeletion ( nodeInfo * resource . Info ) ( pods [ ] corev1 . Pod , err error ) {
2017-12-05 21:36:58 +00:00
labelSelector , err := labels . Parse ( o . PodSelector )
if err != nil {
return pods , err
}
2017-11-12 11:00:21 +00:00
podList , err := o . client . CoreV1 ( ) . Pods ( metav1 . NamespaceAll ) . List ( metav1 . ListOptions {
2017-12-05 21:36:58 +00:00
LabelSelector : labelSelector . String ( ) ,
2017-09-22 17:55:54 +00:00
FieldSelector : fields . SelectorFromSet ( fields . Set { "spec.nodeName" : nodeInfo . Name } ) . String ( ) } )
2016-04-28 11:03:42 +00:00
if err != nil {
2016-06-01 21:50:13 +00:00
return pods , err
2015-10-31 00:16:57 +00:00
}
2016-06-01 21:50:13 +00:00
ws := podStatuses { }
fs := podStatuses { }
2015-10-31 00:16:57 +00:00
for _ , pod := range podList . Items {
2016-06-01 21:50:13 +00:00
podOk := true
2017-12-01 16:26:11 +00:00
for _ , filt := range [ ] podFilter { o . daemonsetFilter , mirrorPodFilter , o . localStorageFilter , o . unreplicatedFilter } {
2016-06-01 21:50:13 +00:00
filterOk , w , f := filt ( pod )
podOk = podOk && filterOk
if w != nil {
ws [ w . string ] = append ( ws [ w . string ] , pod . Name )
2015-10-31 00:16:57 +00:00
}
2016-06-01 21:50:13 +00:00
if f != nil {
fs [ f . string ] = append ( fs [ f . string ] , pod . Name )
2015-10-31 00:16:57 +00:00
}
2017-12-01 16:26:11 +00:00
// short-circuit as soon as pod not ok
// at that point, there is no reason to run pod
// through any additional filters
if ! podOk {
break
}
2015-10-31 00:16:57 +00:00
}
2016-06-01 21:50:13 +00:00
if podOk {
2016-01-27 18:27:14 +00:00
pods = append ( pods , pod )
2015-10-31 00:16:57 +00:00
}
}
2016-06-01 21:50:13 +00:00
if len ( fs ) > 0 {
2017-10-23 10:25:13 +00:00
return [ ] corev1 . Pod { } , errors . New ( fs . Message ( ) )
2016-01-27 18:27:14 +00:00
}
2016-06-01 21:50:13 +00:00
if len ( ws ) > 0 {
2017-02-26 05:41:39 +00:00
fmt . Fprintf ( o . ErrOut , "WARNING: %s\n" , ws . Message ( ) )
2016-01-27 18:27:14 +00:00
}
2016-06-01 21:50:13 +00:00
return pods , nil
2016-01-27 18:27:14 +00:00
}
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) deletePod ( pod corev1 . Pod ) error {
2017-01-24 15:38:21 +00:00
deleteOptions := & metav1 . DeleteOptions { }
2015-10-31 00:16:57 +00:00
if o . GracePeriodSeconds >= 0 {
gracePeriodSeconds := int64 ( o . GracePeriodSeconds )
deleteOptions . GracePeriodSeconds = & gracePeriodSeconds
}
2017-11-12 11:00:21 +00:00
return o . client . CoreV1 ( ) . Pods ( pod . Namespace ) . Delete ( pod . Name , deleteOptions )
2016-10-20 16:43:48 +00:00
}
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) evictPod ( pod corev1 . Pod , policyGroupVersion string ) error {
2017-01-24 15:38:21 +00:00
deleteOptions := & metav1 . DeleteOptions { }
2016-10-20 16:43:48 +00:00
if o . GracePeriodSeconds >= 0 {
gracePeriodSeconds := int64 ( o . GracePeriodSeconds )
deleteOptions . GracePeriodSeconds = & gracePeriodSeconds
}
2017-10-23 10:25:13 +00:00
eviction := & policyv1beta1 . Eviction {
2016-12-03 18:57:26 +00:00
TypeMeta : metav1 . TypeMeta {
2016-10-20 16:43:48 +00:00
APIVersion : policyGroupVersion ,
Kind : EvictionKind ,
} ,
2017-01-17 03:38:19 +00:00
ObjectMeta : metav1 . ObjectMeta {
2016-10-20 16:43:48 +00:00
Name : pod . Name ,
Namespace : pod . Namespace ,
} ,
DeleteOptions : deleteOptions ,
}
// Remember to change change the URL manipulation func when Evction's version change
2017-11-27 14:42:00 +00:00
return o . client . PolicyV1beta1 ( ) . Evictions ( eviction . Namespace ) . Evict ( eviction )
2016-10-20 16:43:48 +00:00
}
// deleteOrEvictPods deletes or evicts the pods on the api server
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) deleteOrEvictPods ( pods [ ] corev1 . Pod ) error {
2016-10-20 16:43:48 +00:00
if len ( pods ) == 0 {
return nil
}
policyGroupVersion , err := SupportEviction ( o . client )
if err != nil {
return err
}
2015-10-31 00:16:57 +00:00
2017-10-23 10:25:13 +00:00
getPodFn := func ( namespace , name string ) ( * corev1 . Pod , error ) {
2017-11-12 11:00:21 +00:00
return o . client . CoreV1 ( ) . Pods ( namespace ) . Get ( name , metav1 . GetOptions { } )
2016-11-29 03:18:01 +00:00
}
if len ( policyGroupVersion ) > 0 {
return o . evictPods ( pods , policyGroupVersion , getPodFn )
} else {
return o . deletePods ( pods , getPodFn )
}
}
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) evictPods ( pods [ ] corev1 . Pod , policyGroupVersion string , getPodFn func ( namespace , name string ) ( * corev1 . Pod , error ) ) error {
2018-06-25 15:09:12 +00:00
returnCh := make ( chan error , 1 )
2016-11-29 03:18:01 +00:00
2015-10-31 00:16:57 +00:00
for _ , pod := range pods {
2018-06-25 15:09:12 +00:00
go func ( pod corev1 . Pod , returnCh chan error ) {
2016-11-29 03:18:01 +00:00
var err error
for {
err = o . evictPod ( pod , policyGroupVersion )
if err == nil {
break
2017-07-22 17:36:08 +00:00
} else if apierrors . IsNotFound ( err ) {
2018-06-25 15:09:12 +00:00
returnCh <- nil
2017-07-22 17:36:08 +00:00
return
2016-11-29 03:18:01 +00:00
} else if apierrors . IsTooManyRequests ( err ) {
2018-05-11 04:26:00 +00:00
fmt . Fprintf ( o . ErrOut , "error when evicting pod %q (will retry after 5s): %v\n" , pod . Name , err )
2016-11-29 03:18:01 +00:00
time . Sleep ( 5 * time . Second )
2017-07-22 17:36:08 +00:00
} else {
2018-06-25 15:09:12 +00:00
returnCh <- fmt . Errorf ( "error when evicting pod %q: %v" , pod . Name , err )
2016-11-29 03:18:01 +00:00
return
}
}
2017-10-23 10:25:13 +00:00
podArray := [ ] corev1 . Pod { pod }
2018-05-17 15:27:44 +00:00
_ , err = o . waitForDelete ( podArray , 1 * time . Second , time . Duration ( math . MaxInt64 ) , true , getPodFn )
2016-11-29 03:18:01 +00:00
if err == nil {
2018-06-25 15:09:12 +00:00
returnCh <- nil
2016-11-29 03:18:01 +00:00
} else {
2018-06-25 15:09:12 +00:00
returnCh <- fmt . Errorf ( "error when waiting for pod %q terminating: %v" , pod . Name , err )
2016-11-29 03:18:01 +00:00
}
2018-06-25 15:09:12 +00:00
} ( pod , returnCh )
2016-11-29 03:18:01 +00:00
}
doneCount := 0
2018-06-25 15:09:12 +00:00
var errors [ ] error
2016-11-29 03:18:01 +00:00
// 0 timeout means infinite, we use MaxInt64 to represent it.
var globalTimeout time . Duration
if o . Timeout == 0 {
globalTimeout = time . Duration ( math . MaxInt64 )
} else {
globalTimeout = o . Timeout
}
2018-05-27 18:26:37 +00:00
globalTimeoutCh := time . After ( globalTimeout )
2018-06-25 15:09:12 +00:00
numPods := len ( pods )
for doneCount < numPods {
2016-11-29 03:18:01 +00:00
select {
2018-06-25 15:09:12 +00:00
case err := <- returnCh :
2016-11-29 03:18:01 +00:00
doneCount ++
2018-06-25 15:09:12 +00:00
if err != nil {
errors = append ( errors , err )
2016-11-29 03:18:01 +00:00
}
2018-05-27 18:26:37 +00:00
case <- globalTimeoutCh :
2016-11-29 03:18:01 +00:00
return fmt . Errorf ( "Drain did not complete within %v" , globalTimeout )
2015-10-31 00:16:57 +00:00
}
}
2018-06-25 15:09:12 +00:00
return utilerrors . NewAggregate ( errors )
2016-11-29 03:18:01 +00:00
}
2015-10-31 00:16:57 +00:00
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) deletePods ( pods [ ] corev1 . Pod , getPodFn func ( namespace , name string ) ( * corev1 . Pod , error ) ) error {
2016-11-29 03:18:01 +00:00
// 0 timeout means infinite, we use MaxInt64 to represent it.
var globalTimeout time . Duration
if o . Timeout == 0 {
globalTimeout = time . Duration ( math . MaxInt64 )
} else {
globalTimeout = o . Timeout
}
for _ , pod := range pods {
err := o . deletePod ( pod )
2017-06-09 20:57:23 +00:00
if err != nil && ! apierrors . IsNotFound ( err ) {
2016-11-29 03:18:01 +00:00
return err
}
2016-10-18 23:00:54 +00:00
}
2018-05-17 15:27:44 +00:00
_ , err := o . waitForDelete ( pods , 1 * time . Second , globalTimeout , false , getPodFn )
2016-10-18 23:00:54 +00:00
return err
}
2017-10-23 10:25:13 +00:00
func ( o * DrainOptions ) waitForDelete ( pods [ ] corev1 . Pod , interval , timeout time . Duration , usingEviction bool , getPodFn func ( string , string ) ( * corev1 . Pod , error ) ) ( [ ] corev1 . Pod , error ) {
2016-11-29 03:18:01 +00:00
var verbStr string
if usingEviction {
verbStr = "evicted"
} else {
verbStr = "deleted"
}
2018-05-02 19:15:47 +00:00
printObj , err := o . ToPrinter ( verbStr )
2018-04-19 00:02:37 +00:00
if err != nil {
return pods , err
}
err = wait . PollImmediate ( interval , timeout , func ( ) ( bool , error ) {
2017-10-23 10:25:13 +00:00
pendingPods := [ ] corev1 . Pod { }
2016-10-13 21:34:51 +00:00
for i , pod := range pods {
2016-10-18 23:00:54 +00:00
p , err := getPodFn ( pod . Namespace , pod . Name )
2016-10-13 21:34:51 +00:00
if apierrors . IsNotFound ( err ) || ( p != nil && p . ObjectMeta . UID != pod . ObjectMeta . UID ) {
2018-05-02 19:15:47 +00:00
printObj ( & pod , o . Out )
2016-10-13 21:34:51 +00:00
continue
} else if err != nil {
return false , err
} else {
2016-10-18 23:00:54 +00:00
pendingPods = append ( pendingPods , pods [ i ] )
2016-10-13 21:34:51 +00:00
}
}
2016-10-18 23:00:54 +00:00
pods = pendingPods
if len ( pendingPods ) > 0 {
2016-10-13 21:34:51 +00:00
return false , nil
}
return true , nil
} )
2016-10-18 23:00:54 +00:00
return pods , err
2015-10-31 00:16:57 +00:00
}
2016-10-20 16:43:48 +00:00
// SupportEviction uses Discovery API to find out if the server support eviction subresource
// If support, it will return its groupVersion; Otherwise, it will return ""
2017-10-23 10:25:13 +00:00
func SupportEviction ( clientset kubernetes . Interface ) ( string , error ) {
2016-10-20 16:43:48 +00:00
discoveryClient := clientset . Discovery ( )
groupList , err := discoveryClient . ServerGroups ( )
if err != nil {
return "" , err
}
foundPolicyGroup := false
var policyGroupVersion string
for _ , group := range groupList . Groups {
if group . Name == "policy" {
foundPolicyGroup = true
policyGroupVersion = group . PreferredVersion . GroupVersion
break
}
}
if ! foundPolicyGroup {
return "" , nil
}
resourceList , err := discoveryClient . ServerResourcesForGroupVersion ( "v1" )
if err != nil {
return "" , err
}
for _ , resource := range resourceList . APIResources {
if resource . Name == EvictionSubresource && resource . Kind == EvictionKind {
return policyGroupVersion , nil
}
}
return "" , nil
}
2015-10-31 00:16:57 +00:00
// RunCordonOrUncordon runs either Cordon or Uncordon. The desired value for
// "Unschedulable" is passed as the first arg.
func ( o * DrainOptions ) RunCordonOrUncordon ( desired bool ) error {
2017-09-22 17:55:54 +00:00
cordonOrUncordon := "cordon"
if ! desired {
cordonOrUncordon = "un" + cordonOrUncordon
}
for _ , nodeInfo := range o . nodeInfos {
if nodeInfo . Mapping . GroupVersionKind . Kind == "Node" {
2018-09-18 21:24:20 +00:00
obj , err := scheme . Scheme . ConvertToVersion ( nodeInfo . Object , nodeInfo . Mapping . GroupVersionKind . GroupVersion ( ) )
2017-08-08 05:38:31 +00:00
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: unable to %s node %q: %v\n" , cordonOrUncordon , nodeInfo . Name , err )
2017-09-22 17:55:54 +00:00
continue
2017-08-08 05:38:31 +00:00
}
2017-09-22 17:55:54 +00:00
oldData , err := json . Marshal ( obj )
2017-06-30 12:58:57 +00:00
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: unable to %s node %q: %v\n" , cordonOrUncordon , nodeInfo . Name , err )
2017-09-22 17:55:54 +00:00
continue
2016-11-04 06:16:22 +00:00
}
2017-09-22 17:55:54 +00:00
node , ok := obj . ( * corev1 . Node )
if ! ok {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: unable to %s node %q: unexpected Type%T, expected Node\n" , cordonOrUncordon , nodeInfo . Name , obj )
2017-09-22 17:55:54 +00:00
continue
}
unsched := node . Spec . Unschedulable
if unsched == desired {
2018-05-02 19:15:47 +00:00
printObj , err := o . ToPrinter ( already ( desired ) )
2018-04-19 00:02:37 +00:00
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: %v\n" , err )
2018-04-19 00:02:37 +00:00
continue
}
2018-10-31 23:50:26 +00:00
printObj ( nodeInfo . Object , o . Out )
2017-09-22 17:55:54 +00:00
} else {
2017-09-13 19:20:54 +00:00
if ! o . DryRun {
helper := resource . NewHelper ( o . restClient , nodeInfo . Mapping )
node . Spec . Unschedulable = desired
newData , err := json . Marshal ( obj )
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: unable to %s node %q: %v\n" , cordonOrUncordon , nodeInfo . Name , err )
2017-09-13 19:20:54 +00:00
continue
}
patchBytes , err := strategicpatch . CreateTwoWayMergePatch ( oldData , newData , obj )
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: unable to %s node %q: %v\n" , cordonOrUncordon , nodeInfo . Name , err )
2017-09-13 19:20:54 +00:00
continue
}
2018-08-30 13:33:34 +00:00
_ , err = helper . Patch ( o . Namespace , nodeInfo . Name , types . StrategicMergePatchType , patchBytes , nil )
2017-09-13 19:20:54 +00:00
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "error: unable to %s node %q: %v\n" , cordonOrUncordon , nodeInfo . Name , err )
2017-09-13 19:20:54 +00:00
continue
}
2017-09-22 17:55:54 +00:00
}
2018-05-02 19:15:47 +00:00
printObj , err := o . ToPrinter ( changed ( desired ) )
2018-04-19 00:02:37 +00:00
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "%v\n" , err )
2018-04-19 00:02:37 +00:00
continue
}
2018-10-31 23:50:26 +00:00
printObj ( nodeInfo . Object , o . Out )
2015-10-31 00:16:57 +00:00
}
2017-09-22 17:55:54 +00:00
} else {
2018-05-02 19:15:47 +00:00
printObj , err := o . ToPrinter ( "skipped" )
2018-04-19 00:02:37 +00:00
if err != nil {
2018-09-14 04:12:06 +00:00
fmt . Fprintf ( o . ErrOut , "%v\n" , err )
2018-04-19 00:02:37 +00:00
continue
}
2018-10-31 23:50:26 +00:00
printObj ( nodeInfo . Object , o . Out )
2015-10-31 00:16:57 +00:00
}
}
return nil
}
// already() and changed() return suitable strings for {un,}cordoning
func already ( desired bool ) string {
if desired {
return "already cordoned"
}
return "already uncordoned"
}
func changed ( desired bool ) string {
if desired {
return "cordoned"
}
return "uncordoned"
}