mirror of https://github.com/k3s-io/k3s
293 lines
8.6 KiB
Go
293 lines
8.6 KiB
Go
|
/*
|
||
|
Copyright 2019 The Kubernetes Authors.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package drain
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"math"
|
||
|
"time"
|
||
|
|
||
|
corev1 "k8s.io/api/core/v1"
|
||
|
policyv1beta1 "k8s.io/api/policy/v1beta1"
|
||
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||
|
"k8s.io/apimachinery/pkg/fields"
|
||
|
"k8s.io/apimachinery/pkg/labels"
|
||
|
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||
|
"k8s.io/client-go/kubernetes"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
// EvictionKind represents the kind of evictions object
|
||
|
EvictionKind = "Eviction"
|
||
|
// EvictionSubresource represents the kind of evictions object as pod's subresource
|
||
|
EvictionSubresource = "pods/eviction"
|
||
|
)
|
||
|
|
||
|
// Helper contains the parameters to control the behaviour of drainer
|
||
|
type Helper struct {
|
||
|
Client kubernetes.Interface
|
||
|
Force bool
|
||
|
GracePeriodSeconds int
|
||
|
IgnoreAllDaemonSets bool
|
||
|
Timeout time.Duration
|
||
|
DeleteLocalData bool
|
||
|
Selector string
|
||
|
PodSelector string
|
||
|
Out io.Writer
|
||
|
ErrOut io.Writer
|
||
|
|
||
|
// TODO(justinsb): unnecessary?
|
||
|
DryRun bool
|
||
|
|
||
|
// OnPodDeletedOrEvicted is called when a pod is evicted/deleted; for printing progress output
|
||
|
OnPodDeletedOrEvicted func(pod *corev1.Pod, usingEviction bool)
|
||
|
}
|
||
|
|
||
|
// CheckEvictionSupport uses Discovery API to find out if the server support
|
||
|
// eviction subresource If support, it will return its groupVersion; Otherwise,
|
||
|
// it will return an empty string
|
||
|
func CheckEvictionSupport(clientset kubernetes.Interface) (string, error) {
|
||
|
discoveryClient := clientset.Discovery()
|
||
|
groupList, err := discoveryClient.ServerGroups()
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
foundPolicyGroup := false
|
||
|
var policyGroupVersion string
|
||
|
for _, group := range groupList.Groups {
|
||
|
if group.Name == "policy" {
|
||
|
foundPolicyGroup = true
|
||
|
policyGroupVersion = group.PreferredVersion.GroupVersion
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
if !foundPolicyGroup {
|
||
|
return "", nil
|
||
|
}
|
||
|
resourceList, err := discoveryClient.ServerResourcesForGroupVersion("v1")
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
for _, resource := range resourceList.APIResources {
|
||
|
if resource.Name == EvictionSubresource && resource.Kind == EvictionKind {
|
||
|
return policyGroupVersion, nil
|
||
|
}
|
||
|
}
|
||
|
return "", nil
|
||
|
}
|
||
|
|
||
|
func (d *Helper) makeDeleteOptions() *metav1.DeleteOptions {
|
||
|
deleteOptions := &metav1.DeleteOptions{}
|
||
|
if d.GracePeriodSeconds >= 0 {
|
||
|
gracePeriodSeconds := int64(d.GracePeriodSeconds)
|
||
|
deleteOptions.GracePeriodSeconds = &gracePeriodSeconds
|
||
|
}
|
||
|
return deleteOptions
|
||
|
}
|
||
|
|
||
|
// DeletePod will delete the given pod, or return an error if it couldn't
|
||
|
func (d *Helper) DeletePod(pod corev1.Pod) error {
|
||
|
return d.Client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, d.makeDeleteOptions())
|
||
|
}
|
||
|
|
||
|
// EvictPod will evict the give pod, or return an error if it couldn't
|
||
|
func (d *Helper) EvictPod(pod corev1.Pod, policyGroupVersion string) error {
|
||
|
eviction := &policyv1beta1.Eviction{
|
||
|
TypeMeta: metav1.TypeMeta{
|
||
|
APIVersion: policyGroupVersion,
|
||
|
Kind: EvictionKind,
|
||
|
},
|
||
|
ObjectMeta: metav1.ObjectMeta{
|
||
|
Name: pod.Name,
|
||
|
Namespace: pod.Namespace,
|
||
|
},
|
||
|
DeleteOptions: d.makeDeleteOptions(),
|
||
|
}
|
||
|
// Remember to change change the URL manipulation func when Eviction's version change
|
||
|
return d.Client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(eviction)
|
||
|
}
|
||
|
|
||
|
// GetPodsForDeletion receives resource info for a node, and returns those pods as PodDeleteList,
|
||
|
// or error if it cannot list pods. All pods that are ready to be deleted can be obtained with .Pods(),
|
||
|
// and string with all warning can be obtained with .Warnings(), and .Errors() for all errors that
|
||
|
// occurred during deletion.
|
||
|
func (d *Helper) GetPodsForDeletion(nodeName string) (*podDeleteList, []error) {
|
||
|
labelSelector, err := labels.Parse(d.PodSelector)
|
||
|
if err != nil {
|
||
|
return nil, []error{err}
|
||
|
}
|
||
|
|
||
|
podList, err := d.Client.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{
|
||
|
LabelSelector: labelSelector.String(),
|
||
|
FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": nodeName}).String()})
|
||
|
if err != nil {
|
||
|
return nil, []error{err}
|
||
|
}
|
||
|
|
||
|
pods := []podDelete{}
|
||
|
|
||
|
for _, pod := range podList.Items {
|
||
|
var status podDeleteStatus
|
||
|
for _, filter := range d.makeFilters() {
|
||
|
status = filter(pod)
|
||
|
if !status.delete {
|
||
|
// short-circuit as soon as pod is filtered out
|
||
|
// at that point, there is no reason to run pod
|
||
|
// through any additional filters
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
pods = append(pods, podDelete{
|
||
|
pod: pod,
|
||
|
status: status,
|
||
|
})
|
||
|
}
|
||
|
|
||
|
list := &podDeleteList{items: pods}
|
||
|
|
||
|
if errs := list.errors(); len(errs) > 0 {
|
||
|
return list, errs
|
||
|
}
|
||
|
|
||
|
return list, nil
|
||
|
}
|
||
|
|
||
|
// DeleteOrEvictPods deletes or evicts the pods on the api server
|
||
|
func (d *Helper) DeleteOrEvictPods(pods []corev1.Pod) error {
|
||
|
if len(pods) == 0 {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
policyGroupVersion, err := CheckEvictionSupport(d.Client)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
// TODO(justinsb): unnecessary?
|
||
|
getPodFn := func(namespace, name string) (*corev1.Pod, error) {
|
||
|
return d.Client.CoreV1().Pods(namespace).Get(name, metav1.GetOptions{})
|
||
|
}
|
||
|
|
||
|
if len(policyGroupVersion) > 0 {
|
||
|
return d.evictPods(pods, policyGroupVersion, getPodFn)
|
||
|
}
|
||
|
|
||
|
return d.deletePods(pods, getPodFn)
|
||
|
}
|
||
|
|
||
|
func (d *Helper) evictPods(pods []corev1.Pod, policyGroupVersion string, getPodFn func(namespace, name string) (*corev1.Pod, error)) error {
|
||
|
returnCh := make(chan error, 1)
|
||
|
|
||
|
for _, pod := range pods {
|
||
|
go func(pod corev1.Pod, returnCh chan error) {
|
||
|
for {
|
||
|
fmt.Fprintf(d.Out, "evicting pod %q\n", pod.Name)
|
||
|
err := d.EvictPod(pod, policyGroupVersion)
|
||
|
if err == nil {
|
||
|
break
|
||
|
} else if apierrors.IsNotFound(err) {
|
||
|
returnCh <- nil
|
||
|
return
|
||
|
} else if apierrors.IsTooManyRequests(err) {
|
||
|
fmt.Fprintf(d.ErrOut, "error when evicting pod %q (will retry after 5s): %v\n", pod.Name, err)
|
||
|
time.Sleep(5 * time.Second)
|
||
|
} else {
|
||
|
returnCh <- fmt.Errorf("error when evicting pod %q: %v", pod.Name, err)
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
_, err := waitForDelete([]corev1.Pod{pod}, 1*time.Second, time.Duration(math.MaxInt64), true, getPodFn, d.OnPodDeletedOrEvicted)
|
||
|
if err == nil {
|
||
|
returnCh <- nil
|
||
|
} else {
|
||
|
returnCh <- fmt.Errorf("error when waiting for pod %q terminating: %v", pod.Name, err)
|
||
|
}
|
||
|
}(pod, returnCh)
|
||
|
}
|
||
|
|
||
|
doneCount := 0
|
||
|
var errors []error
|
||
|
|
||
|
// 0 timeout means infinite, we use MaxInt64 to represent it.
|
||
|
var globalTimeout time.Duration
|
||
|
if d.Timeout == 0 {
|
||
|
globalTimeout = time.Duration(math.MaxInt64)
|
||
|
} else {
|
||
|
globalTimeout = d.Timeout
|
||
|
}
|
||
|
globalTimeoutCh := time.After(globalTimeout)
|
||
|
numPods := len(pods)
|
||
|
for doneCount < numPods {
|
||
|
select {
|
||
|
case err := <-returnCh:
|
||
|
doneCount++
|
||
|
if err != nil {
|
||
|
errors = append(errors, err)
|
||
|
}
|
||
|
case <-globalTimeoutCh:
|
||
|
return fmt.Errorf("drain did not complete within %v", globalTimeout)
|
||
|
}
|
||
|
}
|
||
|
return utilerrors.NewAggregate(errors)
|
||
|
}
|
||
|
|
||
|
func (d *Helper) deletePods(pods []corev1.Pod, getPodFn func(namespace, name string) (*corev1.Pod, error)) error {
|
||
|
// 0 timeout means infinite, we use MaxInt64 to represent it.
|
||
|
var globalTimeout time.Duration
|
||
|
if d.Timeout == 0 {
|
||
|
globalTimeout = time.Duration(math.MaxInt64)
|
||
|
} else {
|
||
|
globalTimeout = d.Timeout
|
||
|
}
|
||
|
for _, pod := range pods {
|
||
|
err := d.DeletePod(pod)
|
||
|
if err != nil && !apierrors.IsNotFound(err) {
|
||
|
return err
|
||
|
}
|
||
|
}
|
||
|
_, err := waitForDelete(pods, 1*time.Second, globalTimeout, false, getPodFn, d.OnPodDeletedOrEvicted)
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
func waitForDelete(pods []corev1.Pod, interval, timeout time.Duration, usingEviction bool, getPodFn func(string, string) (*corev1.Pod, error), onDoneFn func(pod *corev1.Pod, usingEviction bool)) ([]corev1.Pod, error) {
|
||
|
err := wait.PollImmediate(interval, timeout, func() (bool, error) {
|
||
|
pendingPods := []corev1.Pod{}
|
||
|
for i, pod := range pods {
|
||
|
p, err := getPodFn(pod.Namespace, pod.Name)
|
||
|
if apierrors.IsNotFound(err) || (p != nil && p.ObjectMeta.UID != pod.ObjectMeta.UID) {
|
||
|
if onDoneFn != nil {
|
||
|
onDoneFn(&pod, usingEviction)
|
||
|
}
|
||
|
continue
|
||
|
} else if err != nil {
|
||
|
return false, err
|
||
|
} else {
|
||
|
pendingPods = append(pendingPods, pods[i])
|
||
|
}
|
||
|
}
|
||
|
pods = pendingPods
|
||
|
if len(pendingPods) > 0 {
|
||
|
return false, nil
|
||
|
}
|
||
|
return true, nil
|
||
|
})
|
||
|
return pods, err
|
||
|
}
|