Merge pull request #9165 from smarterclayton/graceful

Enable graceful deletion using reconciliation loops in the Kubelet without TTL
pull/6/head
Robert Bailey 2015-08-18 10:01:40 -07:00
commit 4f856b595d
78 changed files with 1499 additions and 457 deletions

View File

@ -11017,6 +11017,11 @@
"type": "string",
"description": "RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested; see http://releases.k8s.io/HEAD/docs/devel/api-conventions.md#metadata"
},
"deletionGracePeriodSeconds": {
"type": "integer",
"format": "int64",
"description": "number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"
},
"labels": {
"type": "any",
"description": "map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services; see http://releases.k8s.io/HEAD/docs/user-guide/labels.md"
@ -12327,7 +12332,7 @@
"terminationGracePeriodSeconds": {
"type": "integer",
"format": "int64",
"description": "optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"
"description": "optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"
},
"activeDeadlineSeconds": {
"type": "integer",

View File

@ -18,6 +18,7 @@ spec:
mountPath: /varlog
- name: containers
mountPath: /var/lib/docker/containers
terminationGracePeriodSeconds: 30
volumes:
- name: varlog
hostPath:

View File

@ -19,6 +19,7 @@ spec:
mountPath: /varlog
- name: containers
mountPath: /var/lib/docker/containers
terminationGracePeriodSeconds: 30
volumes:
- name: varlog
hostPath:

View File

@ -196,7 +196,7 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st
// TODO: Write an integration test for the replication controllers watch.
go controllerManager.Run(3, util.NeverStop)
nodeController := nodecontroller.NewNodeController(nil, cl, 5*time.Minute, nodecontroller.NewPodEvictor(util.NewFakeRateLimiter()),
nodeController := nodecontroller.NewNodeController(nil, cl, 5*time.Minute, util.NewFakeRateLimiter(),
40*time.Second, 60*time.Second, 5*time.Second, nil, false)
nodeController.Run(5 * time.Second)
cadvisorInterface := new(cadvisor.Fake)
@ -872,18 +872,25 @@ func runSchedulerNoPhantomPodsTest(client *client.Client) {
// Delete a pod to free up room.
glog.Infof("Deleting pod %v", bar.Name)
err = client.Pods(api.NamespaceDefault).Delete(bar.Name, nil)
err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(0))
if err != nil {
glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err)
}
//TODO: reenable once fake_docker_client handles deletion cleanly
//time.Sleep(2 * time.Second)
pod.ObjectMeta.Name = "phantom.baz"
baz, err := client.Pods(api.NamespaceDefault).Create(pod)
if err != nil {
glog.Fatalf("Failed to create pod: %v, %v", pod, err)
}
if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil {
glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err)
if pod, perr := client.Pods(api.NamespaceDefault).Get("phantom.bar"); perr == nil {
glog.Fatalf("FAILED: 'phantom.bar' was never deleted: %#v", pod)
} else {
glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err)
}
}
glog.Info("Scheduler doesn't make phantom pods: test passed.")

View File

@ -196,7 +196,7 @@ func (s *CMServer) Run(_ []string) error {
}
nodeController := nodecontroller.NewNodeController(cloud, kubeClient,
s.PodEvictionTimeout, nodecontroller.NewPodEvictor(util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst)),
s.PodEvictionTimeout, util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst),
s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod, &s.ClusterCIDR, s.AllocateNodeCIDRs)
nodeController.Run(s.NodeSyncPeriod)

View File

@ -123,7 +123,7 @@ func (s *CMServer) Run(_ []string) error {
}
nodeController := nodecontroller.NewNodeController(cloud, kubeClient,
s.PodEvictionTimeout, nodecontroller.NewPodEvictor(util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst)),
s.PodEvictionTimeout, util.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst),
s.NodeMonitorGracePeriod, s.NodeStartupGracePeriod, s.NodeMonitorPeriod, (*net.IPNet)(&s.ClusterCIDR), s.AllocateNodeCIDRs)
nodeController.Run(s.NodeSyncPeriod)

View File

@ -182,6 +182,7 @@ spec:
mountPath: /varlog
- name: containers
mountPath: /var/lib/docker/containers
terminationGracePeriodSeconds: 30
volumes:
- name: varlog
hostPath:

View File

@ -23,7 +23,7 @@ readonly red=$(tput setaf 1)
readonly green=$(tput setaf 2)
kube::test::clear_all() {
kubectl delete "${kube_flags[@]}" rc,pods --all
kubectl delete "${kube_flags[@]}" rc,pods --all --grace-period=0
}
kube::test::get_object_assert() {

View File

@ -246,7 +246,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:'
# Command
kubectl delete pod valid-pod "${kube_flags[@]}"
kubectl delete pod valid-pod "${kube_flags[@]}" --grace-period=0
# Post-condition: no POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" ''
@ -262,7 +262,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:'
# Command
kubectl delete -f docs/admin/limitrange/valid-pod.yaml "${kube_flags[@]}"
kubectl delete -f docs/admin/limitrange/valid-pod.yaml "${kube_flags[@]}" --grace-period=0
# Post-condition: no POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" ''
@ -278,7 +278,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' 'valid-pod:'
# Command
kubectl delete pods -l'name in (valid-pod)' "${kube_flags[@]}"
kubectl delete pods -l'name in (valid-pod)' "${kube_flags[@]}" --grace-period=0
# Post-condition: no POD is running
kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' ''
@ -310,7 +310,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:'
# Command
kubectl delete --all pods "${kube_flags[@]}" # --all remove all the pods
kubectl delete --all pods "${kube_flags[@]}" --grace-period=0 # --all remove all the pods
# Post-condition: no POD is running
kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' ''
@ -327,7 +327,7 @@ runTests() {
# Pre-condition: valid-pod and redis-proxy PODs are running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'redis-proxy:valid-pod:'
# Command
kubectl delete pods valid-pod redis-proxy "${kube_flags[@]}" # delete multiple pods at once
kubectl delete pods valid-pod redis-proxy "${kube_flags[@]}" --grace-period=0 # delete multiple pods at once
# Post-condition: no POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" ''
@ -344,7 +344,7 @@ runTests() {
# Pre-condition: valid-pod and redis-proxy PODs are running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'redis-proxy:valid-pod:'
# Command
kubectl stop pods valid-pod redis-proxy "${kube_flags[@]}" # stop multiple pods at once
kubectl stop pods valid-pod redis-proxy "${kube_flags[@]}" --grace-period=0 # stop multiple pods at once
# Post-condition: no POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" ''
@ -368,7 +368,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:'
# Command
kubectl delete pods -lnew-name=new-valid-pod "${kube_flags[@]}"
kubectl delete pods -lnew-name=new-valid-pod --grace-period=0 "${kube_flags[@]}"
# Post-condition: no POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" ''
@ -419,7 +419,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:'
# Command
kubectl delete pods -l'name in (valid-pod-super-sayan)' "${kube_flags[@]}"
kubectl delete pods -l'name in (valid-pod-super-sayan)' --grace-period=0 "${kube_flags[@]}"
# Post-condition: no POD is running
kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" ''
@ -455,7 +455,7 @@ runTests() {
# Pre-condition: valid-pod POD is running
kube::test::get_object_assert 'pods --namespace=other' "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:'
# Command
kubectl delete "${kube_flags[@]}" pod --namespace=other valid-pod
kubectl delete "${kube_flags[@]}" pod --namespace=other valid-pod --grace-period=0
# Post-condition: no POD is running
kube::test::get_object_assert 'pods --namespace=other' "{{range.items}}{{$id_field}}:{{end}}" ''

View File

@ -51,6 +51,7 @@ current-release-pr
current-replicas
default-container-cpu-limit
default-container-mem-limit
delay-shutdown
deleting-pods-burst
deleting-pods-qps
deployment-label-key

View File

@ -1011,6 +1011,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje
} else {
objectMeta.Namespace = api.NamespaceNone
}
objectMeta.DeletionTimestamp = nil
objectMeta.DeletionGracePeriodSeconds = nil
strategy.PrepareForCreate(obj)
api.FillObjectMetaSystemFields(ctx, objectMeta)
api.GenerateName(strategy, objectMeta)

View File

@ -17,8 +17,11 @@ limitations under the License.
package rest
import (
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util"
)
// RESTDeleteStrategy defines deletion behavior on an object that follows Kubernetes
@ -40,12 +43,41 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje
if strategy == nil {
return false, false, nil
}
_, _, kerr := objectMetaAndKind(strategy, obj)
objectMeta, _, kerr := objectMetaAndKind(strategy, obj)
if kerr != nil {
return false, false, kerr
}
// if the object is already being deleted
if objectMeta.DeletionTimestamp != nil {
// if we are already being deleted, we may only shorten the deletion grace period
// this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set,
// so we force deletion immediately
if objectMeta.DeletionGracePeriodSeconds == nil {
return false, false, nil
}
// only a shorter grace period may be provided by a user
if options.GracePeriodSeconds != nil {
period := int64(*options.GracePeriodSeconds)
if period > *objectMeta.DeletionGracePeriodSeconds {
return false, true, nil
}
now := util.NewTime(util.Now().Add(time.Second * time.Duration(*options.GracePeriodSeconds)))
objectMeta.DeletionTimestamp = &now
objectMeta.DeletionGracePeriodSeconds = &period
options.GracePeriodSeconds = &period
return true, false, nil
}
// graceful deletion is pending, do nothing
options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds
return false, true, nil
}
if !strategy.CheckGracefulDelete(obj, options) {
return false, false, nil
}
now := util.NewTime(util.Now().Add(time.Second * time.Duration(*options.GracePeriodSeconds)))
objectMeta.DeletionTimestamp = &now
objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds
return true, false, nil
}

View File

@ -114,17 +114,20 @@ func (t *Tester) TestUpdate(valid runtime.Object, existing, older runtime.Object
// Test deleting an object.
func (t *Tester) TestDelete(createFn func() runtime.Object, wasGracefulFn func() bool, invalid ...runtime.Object) {
t.testDeleteNonExist(createFn)
t.testDeleteNoGraceful(createFn, wasGracefulFn)
t.testDeleteInvokesValidation(invalid...)
t.TestDeleteNonExist(createFn)
t.TestDeleteNoGraceful(createFn, wasGracefulFn)
t.TestDeleteInvokesValidation(invalid...)
// TODO: Test delete namespace mismatch rejection
// once #5684 is fixed.
}
// Test graceful deletion.
func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
t.testDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn)
t.testDeleteGracefulUsesZeroOnNil(createFn(), 0)
t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0)
t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn)
t.TestDeleteGracefulImmediate(createFn(), expectedGrace, wasGracefulFn)
}
// Test getting object.
@ -298,7 +301,7 @@ func (t *Tester) testUpdateFailsOnVersion(older runtime.Object) {
// =============================================================================
// Deletion tests.
func (t *Tester) testDeleteInvokesValidation(invalid ...runtime.Object) {
func (t *Tester) TestDeleteInvokesValidation(invalid ...runtime.Object) {
for i, obj := range invalid {
objectMeta := t.getObjectMetaOrFail(obj)
ctx := t.TestContext()
@ -309,7 +312,7 @@ func (t *Tester) testDeleteInvokesValidation(invalid ...runtime.Object) {
}
}
func (t *Tester) testDeleteNonExist(createFn func() runtime.Object) {
func (t *Tester) TestDeleteNonExist(createFn func() runtime.Object) {
existing := createFn()
objectMeta := t.getObjectMetaOrFail(existing)
context := t.TestContext()
@ -322,7 +325,10 @@ func (t *Tester) testDeleteNonExist(createFn func() runtime.Object) {
})
}
func (t *Tester) testDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) {
// =============================================================================
// Graceful Deletion tests.
func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) {
existing := createFn()
objectMeta := t.getObjectMetaOrFail(existing)
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
@ -338,25 +344,142 @@ func (t *Tester) testDeleteNoGraceful(createFn func() runtime.Object, wasGracefu
}
}
// =============================================================================
// Graceful Deletion tests.
func (t *Tester) testDeleteGracefulHasDefault(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
func (t *Tester) TestDeleteGracefulHasDefault(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta := t.getObjectMetaOrFail(existing)
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, &api.DeleteOptions{})
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil {
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
return
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
return
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) {
func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
// second delete duration is ignored
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if err != nil {
t.Errorf("unexpected error, object should exist: %v", err)
}
objectMeta, err = api.ObjectMetaFor(object)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object)
}
if objectMeta.DeletionTimestamp == nil {
t.Errorf("did not set deletion timestamp")
}
if objectMeta.DeletionGracePeriodSeconds == nil {
t.Fatalf("did not set deletion grace period seconds")
}
if *objectMeta.DeletionGracePeriodSeconds != expectedGrace {
t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds)
}
}
func (t *Tester) TestDeleteGracefulImmediate(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) {
objectMeta, err := api.ObjectMetaFor(existing)
if err != nil {
t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing)
}
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !wasGracefulFn() {
t.Errorf("did not gracefully delete resource")
}
// second delete is immediate, resource is deleted
out, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(0))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
_, err = t.storage.(rest.Getter).Get(ctx, objectMeta.Name)
if !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should be deleted immediately: %v", err)
}
objectMeta, err = api.ObjectMetaFor(out)
if err != nil {
t.Errorf("unexpected error: %v", err)
return
}
if objectMeta.DeletionTimestamp == nil || objectMeta.DeletionGracePeriodSeconds == nil || *objectMeta.DeletionGracePeriodSeconds != 0 {
t.Errorf("unexpected deleted meta: %#v", objectMeta)
}
}
func (t *Tester) TestDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) {
objectMeta := t.getObjectMetaOrFail(existing)
ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace)
_, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, nil)
@ -364,7 +487,7 @@ func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect
t.Errorf("unexpected error: %v", err)
}
if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) {
t.Errorf("unexpected error, object should exist: %v", err)
t.Errorf("unexpected error, object should not exist: %v", err)
}
}

View File

@ -151,6 +151,7 @@ func TestRoundTripTypes(t *testing.T) {
}
func TestEncode_Ptr(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
Labels: map[string]string{"name": "foo"},
@ -158,6 +159,8 @@ func TestEncode_Ptr(t *testing.T) {
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
obj := runtime.Object(pod)

View File

@ -89,6 +89,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer {
j.LabelSelector, _ = labels.Parse("a=b")
j.FieldSelector, _ = fields.ParseSelector("a=b")
},
func(j *api.PodSpec, c fuzz.Continue) {
c.FuzzNoCustom(j)
// has a default value
ttl := int64(30)
if c.RandBool() {
ttl = int64(c.Uint32())
}
j.TerminationGracePeriodSeconds = &ttl
},
func(j *api.PodPhase, c fuzz.Continue) {
statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown}
*j = statuses[c.Rand.Intn(len(statuses))]

View File

@ -143,6 +143,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"`
// Labels are key value pairs that may be used to scope and select individual resources.
// Label keys are of the form:
// label-key ::= prefixed-name | name

View File

@ -1176,6 +1176,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
@ -3591,6 +3597,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -1010,6 +1010,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -113,6 +113,10 @@ func addDefaultingFuncs() {
if obj.HostNetwork {
defaultHostNetworkPorts(&obj.Containers)
}
if obj.TerminationGracePeriodSeconds == nil {
period := int64(DefaultTerminationGracePeriodSeconds)
obj.TerminationGracePeriodSeconds = &period
}
},
func(obj *Probe) {
if obj.TimeoutSeconds == 0 {

View File

@ -141,6 +141,10 @@ type ObjectMeta struct {
// will send a hard termination signal to the container.
DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested; see http://releases.k8s.io/HEAD/docs/devel/api-conventions.md#metadata"`
// DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion
// was requested. Represents the most recent grace period, and may only be shortened once set.
DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"`
// Labels are key value pairs that may be used to scope and select individual resources.
// TODO: replace map[string]string with labels.LabelSet type
Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services; see http://releases.k8s.io/HEAD/docs/user-guide/labels.md"`
@ -858,6 +862,8 @@ const (
// DNSDefault indicates that the pod should use the default (as
// determined by kubelet) DNS settings.
DNSDefault DNSPolicy = "Default"
DefaultTerminationGracePeriodSeconds = 30
)
// PodSpec is a description of a pod
@ -872,7 +878,7 @@ type PodSpec struct {
// The grace period is the duration in seconds after the processes running in the pod are sent
// a termination signal and the time when the processes are forcibly halted with a kill signal.
// Set this value longer than the expected cleanup time for your process.
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"`
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"`
ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer"`
// Optional: Set DNS policy. Defaults to "ClusterFirst"
DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"`

View File

@ -265,6 +265,16 @@ func ValidateObjectMetaUpdate(new, old *api.ObjectMeta) errs.ValidationErrorList
} else {
new.CreationTimestamp = old.CreationTimestamp
}
// an object can never remove a deletion timestamp or clear/change grace period seconds
if !old.DeletionTimestamp.IsZero() {
new.DeletionTimestamp = old.DeletionTimestamp
}
if old.DeletionGracePeriodSeconds != nil && new.DeletionGracePeriodSeconds == nil {
new.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds
}
if new.DeletionGracePeriodSeconds != nil && old.DeletionGracePeriodSeconds != nil && *new.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds {
allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", new.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion"))
}
// Reject updates that don't specify a resource version
if new.ResourceVersion == "" {

View File

@ -1317,6 +1317,9 @@ func TestValidatePod(t *testing.T) {
}
func TestValidatePodUpdate(t *testing.T) {
now := util.Now()
grace := int64(30)
grace2 := int64(31)
tests := []struct {
a api.Pod
b api.Pod
@ -1403,6 +1406,30 @@ func TestValidatePodUpdate(t *testing.T) {
false,
"more containers",
},
{
api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", DeletionTimestamp: &now},
Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}},
},
api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}},
},
true,
"deletion timestamp filled out",
},
{
api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", DeletionTimestamp: &now, DeletionGracePeriodSeconds: &grace},
Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}},
},
api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", DeletionTimestamp: &now, DeletionGracePeriodSeconds: &grace2},
Spec: api.PodSpec{Containers: []api.Container{{Image: "foo:V1"}}},
},
false,
"deletion grace period seconds cleared",
},
{
api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},

View File

@ -322,7 +322,8 @@ func FilterActivePods(pods []api.Pod) []*api.Pod {
var result []*api.Pod
for i := range pods {
if api.PodSucceeded != pods[i].Status.Phase &&
api.PodFailed != pods[i].Status.Phase {
api.PodFailed != pods[i].Status.Phase &&
pods[i].DeletionTimestamp == nil {
result = append(result, &pods[i])
}
}

View File

@ -310,7 +310,11 @@ func (e *EndpointController) syncService(key string) {
continue
}
if len(pod.Status.PodIP) == 0 {
glog.V(4).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name)
glog.V(5).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name)
continue
}
if pod.DeletionTimestamp != nil {
glog.V(5).Infof("Pod is being deleted %s/%s", pod.Namespace, pod.Name)
continue
}

View File

@ -89,8 +89,11 @@ type NodeController struct {
nodeStatusMap map[string]nodeStatusData
now func() util.Time
// worker that evicts pods from unresponsive nodes.
podEvictor *PodEvictor
podEvictor *RateLimitedTimedQueue
terminationEvictor *RateLimitedTimedQueue
podEvictionTimeout time.Duration
// The maximum duration before a pod evicted from a node can be forcefully terminated.
maximumGracePeriod time.Duration
recorder record.EventRecorder
}
@ -99,7 +102,7 @@ func NewNodeController(
cloud cloudprovider.Interface,
kubeClient client.Interface,
podEvictionTimeout time.Duration,
podEvictor *PodEvictor,
podEvictionLimiter util.RateLimiter,
nodeMonitorGracePeriod time.Duration,
nodeStartupGracePeriod time.Duration,
nodeMonitorPeriod time.Duration,
@ -123,7 +126,9 @@ func NewNodeController(
kubeClient: kubeClient,
recorder: recorder,
podEvictionTimeout: podEvictionTimeout,
podEvictor: podEvictor,
maximumGracePeriod: 5 * time.Minute,
podEvictor: NewRateLimitedTimedQueue(podEvictionLimiter, false),
terminationEvictor: NewRateLimitedTimedQueue(podEvictionLimiter, false),
nodeStatusMap: make(map[string]nodeStatusData),
nodeMonitorGracePeriod: nodeMonitorGracePeriod,
nodeMonitorPeriod: nodeMonitorPeriod,
@ -145,38 +150,36 @@ func (nc *NodeController) Run(period time.Duration) {
}, nc.nodeMonitorPeriod)
go util.Forever(func() {
nc.podEvictor.TryEvict(func(nodeName string) { nc.deletePods(nodeName) })
nc.podEvictor.Try(func(value TimedValue) (bool, time.Duration) {
remaining, err := nc.deletePods(value.Value)
if err != nil {
util.HandleError(fmt.Errorf("unable to evict node %q: %v", value.Value, err))
return false, 0
}
if remaining {
glog.V(2).Infof("Pods terminating on %q", value.Value)
nc.terminationEvictor.Add(value.Value)
}
return true, 0
})
}, nodeEvictionPeriod)
}
// We observed a Node deletion in etcd. Currently we only need to remove Pods that
// were assigned to it.
func (nc *NodeController) deleteNode(nodeID string) error {
return nc.deletePods(nodeID)
}
// deletePods will delete all pods from master running on given node.
func (nc *NodeController) deletePods(nodeID string) error {
glog.V(2).Infof("Delete all pods from %v", nodeID)
pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(),
fields.OneTermEqualSelector(client.PodHost, nodeID))
if err != nil {
return err
}
nc.recordNodeEvent(nodeID, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeID))
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeID {
continue
}
glog.V(2).Infof("Delete pod %v", pod.Name)
nc.recorder.Eventf(&pod, "NodeControllerEviction", "Deleting Pod %s from Node %s", pod.Name, nodeID)
if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
glog.Errorf("Error deleting pod %v: %v", pod.Name, err)
}
}
return nil
// TODO: replace with a controller that ensures pods that are terminating complete
// in a particular time period
go util.Forever(func() {
nc.terminationEvictor.Try(func(value TimedValue) (bool, time.Duration) {
remaining, err := nc.terminatePods(value.Value, value.Added)
if err != nil {
util.HandleError(fmt.Errorf("unable to terminate pods on node %q: %v", value.Value, err))
return false, 0
}
if remaining != 0 {
glog.V(2).Infof("Pods still terminating on %q, estimated completion %s", value.Value, remaining)
return false, remaining
}
return true, 0
})
}, nodeEvictionPeriod)
}
// Generates num pod CIDRs that could be assigned to nodes.
@ -271,18 +274,18 @@ func (nc *NodeController) monitorNodeStatus() error {
// Check eviction timeout against decisionTimestamp
if lastReadyCondition.Status == api.ConditionFalse &&
decisionTimestamp.After(nc.nodeStatusMap[node.Name].readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
if nc.podEvictor.AddNodeToEvict(node.Name) {
if nc.podEvictor.Add(node.Name) {
glog.Infof("Adding pods to evict: %v is later than %v + %v", decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout)
}
}
if lastReadyCondition.Status == api.ConditionUnknown &&
decisionTimestamp.After(nc.nodeStatusMap[node.Name].probeTimestamp.Add(nc.podEvictionTimeout-gracePeriod)) {
if nc.podEvictor.AddNodeToEvict(node.Name) {
if nc.podEvictor.Add(node.Name) {
glog.Infof("Adding pods to evict2: %v is later than %v + %v", decisionTimestamp, nc.nodeStatusMap[node.Name].readyTransitionTimestamp, nc.podEvictionTimeout-gracePeriod)
}
}
if lastReadyCondition.Status == api.ConditionTrue {
if nc.podEvictor.RemoveNodeToEvict(node.Name) {
if nc.podEvictor.Remove(node.Name) {
glog.Infof("Pods on %v won't be evicted", node.Name)
}
}
@ -302,8 +305,8 @@ func (nc *NodeController) monitorNodeStatus() error {
}
if _, err := instances.ExternalID(node.Name); err != nil && err == cloudprovider.InstanceNotFound {
glog.Infof("Deleting node (no longer present in cloud provider): %s", node.Name)
nc.recordNodeEvent(node.Name, "DeleteingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name))
if err := nc.deletePods(node.Name); err != nil {
nc.recordNodeEvent(node.Name, "DeletingNode", fmt.Sprintf("Deleting Node %v because it's not present according to cloud provider", node.Name))
if _, err := nc.deletePods(node.Name); err != nil {
glog.Errorf("Unable to delete pods from node %s: %v", node.Name, err)
continue
}
@ -311,6 +314,7 @@ func (nc *NodeController) monitorNodeStatus() error {
glog.Errorf("Unable to delete node %s: %v", node.Name, err)
continue
}
nc.podEvictor.Add(node.Name)
}
}
}
@ -502,3 +506,88 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
return gracePeriod, lastReadyCondition, readyCondition, err
}
// We observed a Node deletion in etcd. Currently we only need to remove Pods that
// were assigned to it.
func (nc *NodeController) deleteNode(nodeID string) error {
nc.podEvictor.Add(nodeID)
return nil
}
// deletePods will delete all pods from master running on given node, and return true
// if any pods were deleted.
func (nc *NodeController) deletePods(nodeID string) (bool, error) {
remaining := false
glog.V(2).Infof("Delete all pods from %s", nodeID)
pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(),
fields.OneTermEqualSelector(client.PodHost, nodeID))
if err != nil {
return remaining, err
}
nc.recordNodeEvent(nodeID, "DeletingAllPods", fmt.Sprintf("Deleting all Pods from Node %v.", nodeID))
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeID {
continue
}
// if the pod has already been deleted, ignore it
if pod.DeletionGracePeriodSeconds != nil {
continue
}
glog.V(2).Infof("Delete pod %v", pod.Name)
nc.recorder.Eventf(&pod, "NodeControllerEviction", "Deleting Pod %s from Node %s", pod.Name, nodeID)
if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, nil); err != nil {
return false, err
}
remaining = true
}
return remaining, nil
}
// terminatePods will ensure all pods on the given node that are in terminating state are eventually
// cleaned up
func (nc *NodeController) terminatePods(nodeID string, since time.Time) (time.Duration, error) {
remaining := time.Duration(0)
glog.V(2).Infof("Terminating all pods on %s", nodeID)
pods, err := nc.kubeClient.Pods(api.NamespaceAll).List(labels.Everything(),
fields.OneTermEqualSelector(client.PodHost, nodeID))
if err != nil {
return remaining, err
}
nc.recordNodeEvent(nodeID, "TerminatingAllPods", fmt.Sprintf("Terminating all Pods on Node %s.", nodeID))
now := time.Now()
elapsed := now.Sub(since)
for _, pod := range pods.Items {
// Defensive check, also needed for tests.
if pod.Spec.NodeName != nodeID {
continue
}
// only clean terminated pods
if pod.DeletionGracePeriodSeconds == nil {
continue
}
grace := time.Duration(*pod.DeletionGracePeriodSeconds) * time.Second
if grace > nc.maximumGracePeriod {
grace = nc.maximumGracePeriod
}
next := grace - elapsed
if next < 0 {
glog.V(2).Infof("Removing pod %v after %s grace period", pod.Name, grace)
nc.recordNodeEvent(nodeID, "TerminatingEvictedPod", fmt.Sprintf("Pod %s has exceeded the grace period for deletion after being evicted from Node %q and is being force killed", pod.Name, nodeID))
if err := nc.kubeClient.Pods(pod.Namespace).Delete(pod.Name, api.NewDeleteOptions(0)); err != nil {
glog.Errorf("Error completing deletion of pod %s: %v", pod.Name, err)
next = 1
}
}
if remaining < next {
remaining = next
}
}
return remaining, nil
}

View File

@ -324,9 +324,8 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
podEvictor := NewPodEvictor(util.NewFakeRateLimiter())
nodeController := NewNodeController(nil, item.fakeNodeHandler,
evictionTimeout, podEvictor, testNodeMonitorGracePeriod,
evictionTimeout, util.NewFakeRateLimiter(), testNodeMonitorGracePeriod,
testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {
@ -340,7 +339,17 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
podEvictor.TryEvict(func(nodeName string) { nodeController.deletePods(nodeName) })
nodeController.podEvictor.Try(func(value TimedValue) (bool, time.Duration) {
remaining, _ := nodeController.deletePods(value.Value)
if remaining {
nodeController.terminationEvictor.Add(value.Value)
}
return true, 0
})
nodeController.podEvictor.Try(func(value TimedValue) (bool, time.Duration) {
nodeController.terminatePods(value.Value, value.Added)
return true, 0
})
podEvicted := false
for _, action := range item.fakeNodeHandler.Actions() {
if action.GetVerb() == "delete" && action.GetResource() == "pods" {
@ -531,7 +540,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, item.fakeNodeHandler, 5*time.Minute, NewPodEvictor(util.NewFakeRateLimiter()),
nodeController := NewNodeController(nil, item.fakeNodeHandler, 5*time.Minute, util.NewFakeRateLimiter(),
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {
@ -610,7 +619,7 @@ func TestNodeDeletion(t *testing.T) {
Fake: testclient.NewSimpleFake(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0"), *newPod("pod1", "node1")}}),
}
nodeController := NewNodeController(nil, fakeNodeHandler, 5*time.Minute, NewPodEvictor(util.NewFakeRateLimiter()),
nodeController := NewNodeController(nil, fakeNodeHandler, 5*time.Minute, util.NewFakeRateLimiter(),
testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, false)
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.monitorNodeStatus(); err != nil {
@ -620,6 +629,10 @@ func TestNodeDeletion(t *testing.T) {
if err := nodeController.monitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
}
nodeController.podEvictor.Try(func(value TimedValue) (bool, time.Duration) {
nodeController.deletePods(value.Value)
return true, 0
})
podEvicted := false
for _, action := range fakeNodeHandler.Actions() {
if action.GetVerb() == "delete" && action.GetResource() == "pods" {

View File

@ -1,129 +0,0 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodecontroller
import (
"sync"
"k8s.io/kubernetes/pkg/util"
"github.com/golang/glog"
)
// A FIFO queue which additionally guarantees that any element can be added only once until
// it is removed.
type UniqueQueue struct {
lock sync.Mutex
queue []string
set util.StringSet
}
// Entity responsible for evicting Pods from inserted Nodes. It uses RateLimiter to avoid
// evicting everything at once. Note that we rate limit eviction of Nodes not individual Pods.
type PodEvictor struct {
queue UniqueQueue
deletingPodsRateLimiter util.RateLimiter
}
// Adds a new value to the queue if it wasn't added before, or was explicitly removed by the
// Remove call. Returns true if new value was added.
func (q *UniqueQueue) Add(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
if !q.set.Has(value) {
q.queue = append(q.queue, value)
q.set.Insert(value)
return true
} else {
return false
}
}
// Removes the value from the queue, so Get() call won't return it, and allow subsequent addition
// of the given value. If the value is not present does nothing and returns false.
func (q *UniqueQueue) Remove(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
q.set.Delete(value)
for i, val := range q.queue {
if val == value {
if i > 0 && i < len(q.queue)-1 {
q.queue = append(q.queue[0:i], q.queue[i+1:len(q.queue)]...)
} else if i > 0 {
q.queue = q.queue[0 : len(q.queue)-1]
} else {
q.queue = q.queue[1:len(q.queue)]
}
return true
}
}
return false
}
// Returns the oldest added value that wasn't returned yet.
func (q *UniqueQueue) Get() (string, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return "", false
}
result := q.queue[0]
q.queue = q.queue[1:len(q.queue)]
return result, true
}
// Creates new PodEvictor which will use given RateLimiter to oversee eviction.
func NewPodEvictor(deletingPodsRateLimiter util.RateLimiter) *PodEvictor {
return &PodEvictor{
queue: UniqueQueue{
queue: make([]string, 0),
set: util.NewStringSet(),
},
deletingPodsRateLimiter: deletingPodsRateLimiter,
}
}
// Tries to evict all Pods from previously inserted Nodes. Ends prematurely if RateLimiter forbids any eviction.
// Each Node is processed only once, as long as it's not Removed, i.e. calling multiple AddNodeToEvict does not result
// with multiple evictions as long as RemoveNodeToEvict is not called.
func (pe *PodEvictor) TryEvict(delFunc func(string)) {
val, ok := pe.queue.Get()
for ok {
if pe.deletingPodsRateLimiter.CanAccept() {
glog.Infof("PodEvictor is evicting Pods on Node: %v", val)
delFunc(val)
} else {
glog.V(1).Info("PodEvictor is rate limitted.")
break
}
val, ok = pe.queue.Get()
}
}
// Adds Node to the Evictor to be processed later. Won't add the same Node second time if it was already
// added and not removed.
func (pe *PodEvictor) AddNodeToEvict(nodeName string) bool {
return pe.queue.Add(nodeName)
}
// Removes Node from the Evictor. The Node won't be processed until added again.
func (pe *PodEvictor) RemoveNodeToEvict(nodeName string) bool {
return pe.queue.Remove(nodeName)
}

View File

@ -0,0 +1,183 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodecontroller
import (
"container/heap"
"sync"
"time"
"k8s.io/kubernetes/pkg/util"
)
// TimedValue is a value that should be processed at a designated time.
type TimedValue struct {
Value string
Added time.Time
Next time.Time
}
// now is used to test time
var now func() time.Time = time.Now
// TimedQueue is a priority heap where the lowest Next is at the front of the queue
type TimedQueue []*TimedValue
func (h TimedQueue) Len() int { return len(h) }
func (h TimedQueue) Less(i, j int) bool { return h[i].Next.Before(h[j].Next) }
func (h TimedQueue) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *TimedQueue) Push(x interface{}) {
*h = append(*h, x.(*TimedValue))
}
func (h *TimedQueue) Pop() interface{} {
old := *h
n := len(old)
x := old[n-1]
*h = old[0 : n-1]
return x
}
// A FIFO queue which additionally guarantees that any element can be added only once until
// it is removed.
type UniqueQueue struct {
lock sync.Mutex
queue TimedQueue
set util.StringSet
}
// Adds a new value to the queue if it wasn't added before, or was explicitly removed by the
// Remove call. Returns true if new value was added.
func (q *UniqueQueue) Add(value TimedValue) bool {
q.lock.Lock()
defer q.lock.Unlock()
if q.set.Has(value.Value) {
return false
}
heap.Push(&q.queue, &value)
q.set.Insert(value.Value)
return true
}
// Removes the value from the queue, so Get() call won't return it, and allow subsequent addition
// of the given value. If the value is not present does nothing and returns false.
func (q *UniqueQueue) Remove(value string) bool {
q.lock.Lock()
defer q.lock.Unlock()
q.set.Delete(value)
for i, val := range q.queue {
if val.Value == value {
if i > 0 && i < len(q.queue)-1 {
q.queue = append(q.queue[0:i], q.queue[i+1:len(q.queue)]...)
} else if i > 0 {
q.queue = q.queue[0 : len(q.queue)-1]
} else {
q.queue = q.queue[1:len(q.queue)]
}
return true
}
}
return false
}
// Returns the oldest added value that wasn't returned yet.
func (q *UniqueQueue) Get() (TimedValue, bool) {
q.lock.Lock()
defer q.lock.Unlock()
if len(q.queue) == 0 {
return TimedValue{}, false
}
result := q.queue.Pop().(*TimedValue)
q.set.Delete(result.Value)
return *result, true
}
// RateLimitedTimedQueue is a unique item priority queue ordered by the expected next time
// of execution. It is also rate limited.
type RateLimitedTimedQueue struct {
queue UniqueQueue
limiter util.RateLimiter
leak bool
}
// Creates new queue which will use given RateLimiter to oversee execution. If leak is true,
// items which are rate limited will be leakped. Otherwise, rate limited items will be requeued.
func NewRateLimitedTimedQueue(limiter util.RateLimiter, leak bool) *RateLimitedTimedQueue {
return &RateLimitedTimedQueue{
queue: UniqueQueue{
queue: TimedQueue{},
set: util.NewStringSet(),
},
limiter: limiter,
leak: leak,
}
}
// ActionFunc takes a timed value and returns false if the item must be retried, with an optional
// time.Duration if some minimum wait interval should be used.
type ActionFunc func(TimedValue) (bool, time.Duration)
// Try processes the queue. Ends prematurely if RateLimiter forbids an action and leak is true.
// Otherwise, requeues the item to be processed. Each value is processed once if fn returns true,
// otherwise it is added back to the queue. The returned remaining is used to identify the minimum
// time to execute the next item in the queue.
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
val, ok := q.queue.Get()
for ok {
// rate limit the queue checking
if q.leak {
if !q.limiter.CanAccept() {
break
}
} else {
q.limiter.Accept()
}
now := now()
if now.Before(val.Next) {
q.queue.Add(val)
val, ok = q.queue.Get()
// we do not sleep here because other values may be added at the front of the queue
continue
}
if ok, wait := fn(val); !ok {
val.Next = now.Add(wait + 1)
q.queue.Add(val)
}
val, ok = q.queue.Get()
}
}
// Adds value to the queue to be processed. Won't add the same value a second time if it was already
// added and not removed.
func (q *RateLimitedTimedQueue) Add(value string) bool {
now := now()
return q.queue.Add(TimedValue{
Value: value,
Added: now,
Next: now,
})
}
// Removes Node from the Evictor. The Node won't be processed until added again.
func (q *RateLimitedTimedQueue) Remove(value string) bool {
return q.queue.Remove(value)
}

View File

@ -17,14 +17,16 @@ limitations under the License.
package nodecontroller
import (
"reflect"
"testing"
"time"
"k8s.io/kubernetes/pkg/util"
)
func CheckQueueEq(lhs, rhs []string) bool {
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
for i := 0; i < len(lhs); i++ {
if rhs[i] != lhs[i] {
if rhs[i].Value != lhs[i] {
return false
}
}
@ -36,10 +38,10 @@ func CheckSetEq(lhs, rhs util.StringSet) bool {
}
func TestAddNode(t *testing.T) {
evictor := NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor := NewRateLimitedTimedQueue(util.NewFakeRateLimiter(), true)
evictor.Add("first")
evictor.Add("second")
evictor.Add("third")
queuePattern := []string{"first", "second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
@ -59,11 +61,11 @@ func TestAddNode(t *testing.T) {
}
func TestDelNode(t *testing.T) {
evictor := NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("first")
evictor := NewRateLimitedTimedQueue(util.NewFakeRateLimiter(), true)
evictor.Add("first")
evictor.Add("second")
evictor.Add("third")
evictor.Remove("first")
queuePattern := []string{"second", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
@ -81,11 +83,11 @@ func TestDelNode(t *testing.T) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("second")
evictor = NewRateLimitedTimedQueue(util.NewFakeRateLimiter(), true)
evictor.Add("first")
evictor.Add("second")
evictor.Add("third")
evictor.Remove("second")
queuePattern = []string{"first", "third"}
if len(evictor.queue.queue) != len(queuePattern) {
@ -103,11 +105,11 @@ func TestDelNode(t *testing.T) {
t.Errorf("Invalid map. Got %v, expected %v", evictor.queue.set, setPattern)
}
evictor = NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("third")
evictor = NewRateLimitedTimedQueue(util.NewFakeRateLimiter(), true)
evictor.Add("first")
evictor.Add("second")
evictor.Add("third")
evictor.Remove("third")
queuePattern = []string{"first", "second"}
if len(evictor.queue.queue) != len(queuePattern) {
@ -126,15 +128,18 @@ func TestDelNode(t *testing.T) {
}
}
func TestEvictNode(t *testing.T) {
evictor := NewPodEvictor(util.NewFakeRateLimiter())
evictor.AddNodeToEvict("first")
evictor.AddNodeToEvict("second")
evictor.AddNodeToEvict("third")
evictor.RemoveNodeToEvict("second")
func TestTry(t *testing.T) {
evictor := NewRateLimitedTimedQueue(util.NewFakeRateLimiter(), true)
evictor.Add("first")
evictor.Add("second")
evictor.Add("third")
evictor.Remove("second")
deletedMap := util.NewStringSet()
evictor.TryEvict(func(nodeName string) { deletedMap.Insert(nodeName) })
evictor.Try(func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
setPattern := util.NewStringSet("first", "third")
if len(deletedMap) != len(setPattern) {
@ -144,3 +149,35 @@ func TestEvictNode(t *testing.T) {
t.Errorf("Invalid map. Got %v, expected %v", deletedMap, setPattern)
}
}
func TestTryOrdering(t *testing.T) {
evictor := NewRateLimitedTimedQueue(util.NewFakeRateLimiter(), false)
evictor.Add("first")
evictor.Add("second")
evictor.Add("third")
order := []string{}
count := 0
queued := false
evictor.Try(func(value TimedValue) (bool, time.Duration) {
count++
if value.Added.IsZero() {
t.Fatalf("added should not be zero")
}
if value.Next.IsZero() {
t.Fatalf("next should not be zero")
}
if !queued && value.Value == "second" {
queued = true
return false, time.Millisecond
}
order = append(order, value.Value)
return true, 0
})
if reflect.DeepEqual(order, []string{"first", "third", "second"}) {
t.Fatalf("order was wrong: %v", order)
}
if count != 4 {
t.Fatalf("unexpected iterations: %d", count)
}
}

View File

@ -213,6 +213,12 @@ func (rm *ReplicationManager) getPodController(pod *api.Pod) *api.ReplicationCon
// When a pod is created, enqueue the controller that manages it and update it's expectations.
func (rm *ReplicationManager) addPod(obj interface{}) {
pod := obj.(*api.Pod)
if pod.DeletionTimestamp != nil {
// on a restart of the controller manager, it's possible a new pod shows up in a state that
// is already pending deletion. Prevent the pod from being a creation observation.
rm.deletePod(pod)
return
}
if rc := rm.getPodController(pod); rc != nil {
rcKey, err := controller.KeyFunc(rc)
if err != nil {
@ -234,6 +240,15 @@ func (rm *ReplicationManager) updatePod(old, cur interface{}) {
}
// TODO: Write a unittest for this case
curPod := cur.(*api.Pod)
if curPod.DeletionTimestamp != nil {
// when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period,
// and after such time has passed, the kubelet actually deletes it from the store. We receive an update
// for modification of the deletion timestamp and expect an rc to create more replicas asap, not wait
// until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because
// an rc never initiates a phase change, and so is never asleep waiting for the same.
rm.deletePod(curPod)
return
}
if rc := rm.getPodController(curPod); rc != nil {
rm.enqueueController(rc)
}

View File

@ -53,6 +53,12 @@ func deepCopy_api_ObjectMeta(in api.ObjectMeta, out *api.ObjectMeta, c *conversi
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -57,6 +57,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *v1.ObjectM
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {
@ -115,6 +121,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *v1.ObjectMeta, out *api.ObjectM
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -70,6 +70,12 @@ func deepCopy_v1_ObjectMeta(in v1.ObjectMeta, out *v1.ObjectMeta, c *conversion.
} else {
out.DeletionTimestamp = nil
}
if in.DeletionGracePeriodSeconds != nil {
out.DeletionGracePeriodSeconds = new(int64)
*out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds
} else {
out.DeletionGracePeriodSeconds = nil
}
if in.Labels != nil {
out.Labels = make(map[string]string)
for key, val := range in.Labels {

View File

@ -38,6 +38,7 @@ import (
)
func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) {
grace := int64(30)
pods := &api.PodList{
ListMeta: api.ListMeta{
ResourceVersion: "15",
@ -46,15 +47,17 @@ func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList)
{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -536,6 +539,7 @@ func TestGetMultipleTypeObjectsWithDirectReference(t *testing.T) {
}
}
func watchTestData() ([]api.Pod, []watch.Event) {
grace := int64(30)
pods := []api.Pod{
{
ObjectMeta: api.ObjectMeta{
@ -544,8 +548,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "10",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
}
@ -559,8 +564,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "11",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -573,8 +579,9 @@ func watchTestData() ([]api.Pod, []watch.Event) {
ResourceVersion: "12",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -34,6 +34,7 @@ import (
)
func TestMerge(t *testing.T) {
grace := int64(30)
tests := []struct {
obj runtime.Object
fragment string
@ -54,8 +55,9 @@ func TestMerge(t *testing.T) {
Name: "foo",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -122,8 +124,9 @@ func TestMerge(t *testing.T) {
VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}},
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -417,7 +417,12 @@ func describePod(pod *api.Pod, rcs []api.ReplicationController, events *api.Even
fmt.Fprintf(out, "Image(s):\t%s\n", makeImageList(&pod.Spec))
fmt.Fprintf(out, "Node:\t%s\n", pod.Spec.NodeName+"/"+pod.Status.HostIP)
fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(pod.Labels))
fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase))
if pod.DeletionTimestamp != nil {
fmt.Fprintf(out, "Status:\tTerminating (expires %s)\n", pod.DeletionTimestamp.Time.Format(time.RFC1123Z))
fmt.Fprintf(out, "Termination Grace Period:\t%ds\n", pod.DeletionGracePeriodSeconds)
} else {
fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase))
}
fmt.Fprintf(out, "Reason:\t%s\n", pod.Status.Reason)
fmt.Fprintf(out, "Message:\t%s\n", pod.Status.Message)
fmt.Fprintf(out, "IP:\t%s\n", pod.Status.PodIP)

View File

@ -83,6 +83,7 @@ func fakeClientWith(testName string, t *testing.T, data map[string]string) Clien
}
func testData() (*api.PodList, *api.ServiceList) {
grace := int64(30)
pods := &api.PodList{
ListMeta: api.ListMeta{
ResourceVersion: "15",
@ -91,15 +92,17 @@ func testData() (*api.PodList, *api.ServiceList) {
{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -128,6 +128,7 @@ func TestHelperCreate(t *testing.T) {
return true
}
grace := int64(30)
tests := []struct {
Resp *http.Response
RespFunc client.HTTPClientFunc
@ -172,8 +173,9 @@ func TestHelperCreate(t *testing.T) {
ExpectObject: &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
Resp: &http.Response{StatusCode: http.StatusOK, Body: objBody(&api.Status{Status: api.StatusSuccess})},
@ -381,6 +383,7 @@ func TestHelperReplace(t *testing.T) {
return true
}
grace := int64(30)
tests := []struct {
Resp *http.Response
RespFunc client.HTTPClientFunc
@ -418,8 +421,9 @@ func TestHelperReplace(t *testing.T) {
ExpectObject: &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "10"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
Overwrite: true,

View File

@ -410,6 +410,9 @@ func printPod(pod *api.Pod, w io.Writer, withNamespace bool, wide bool, columnLa
readyContainers++
}
}
if pod.DeletionTimestamp != nil {
reason = "Terminating"
}
if withNamespace {
if _, err := fmt.Fprintf(w, "%s\t", namespace); err != nil {

View File

@ -880,6 +880,7 @@ func TestUpdateExistingReplicationController(t *testing.T) {
func TestUpdateWithRetries(t *testing.T) {
codec := testapi.Codec()
grace := int64(30)
rc := &api.ReplicationController{
ObjectMeta: api.ObjectMeta{Name: "rc",
Labels: map[string]string{
@ -897,8 +898,9 @@ func TestUpdateWithRetries(t *testing.T) {
},
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},

View File

@ -31,6 +31,7 @@ import (
func noDefault(*api.Pod) error { return nil }
func TestDecodeSinglePod(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
TypeMeta: api.TypeMeta{
APIVersion: "",
@ -41,8 +42,9 @@ func TestDecodeSinglePod(t *testing.T) {
Namespace: "mynamespace",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",
@ -91,6 +93,7 @@ func TestDecodeSinglePod(t *testing.T) {
}
func TestDecodePodList(t *testing.T) {
grace := int64(30)
pod := &api.Pod{
TypeMeta: api.TypeMeta{
APIVersion: "",
@ -101,8 +104,9 @@ func TestDecodePodList(t *testing.T) {
Namespace: "mynamespace",
},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",

View File

@ -217,9 +217,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
for _, ref := range filtered {
name := kubecontainer.GetPodFullName(ref)
if existing, found := pods[name]; found {
if !reflect.DeepEqual(existing.Spec, ref.Spec) {
if checkAndUpdatePod(existing, ref) {
// this is an update
existing.Spec = ref.Spec
updates.Pods = append(updates.Pods, existing)
continue
}
@ -261,9 +260,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
name := kubecontainer.GetPodFullName(ref)
if existing, found := oldPods[name]; found {
pods[name] = existing
if !reflect.DeepEqual(existing.Spec, ref.Spec) {
if checkAndUpdatePod(existing, ref) {
// this is an update
existing.Spec = ref.Spec
updates.Pods = append(updates.Pods, existing)
continue
}
@ -335,6 +333,23 @@ func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventReco
return
}
// checkAndUpdatePod updates existing if ref makes a meaningful change and returns true, or
// returns false if there was no update.
func checkAndUpdatePod(existing, ref *api.Pod) bool {
// TODO: it would be better to update the whole object and only preserve certain things
// like the source annotation or the UID (to ensure safety)
if reflect.DeepEqual(existing.Spec, ref.Spec) &&
reflect.DeepEqual(existing.DeletionTimestamp, ref.DeletionTimestamp) &&
reflect.DeepEqual(existing.DeletionGracePeriodSeconds, ref.DeletionGracePeriodSeconds) {
return false
}
// this is an update
existing.Spec = ref.Spec
existing.DeletionTimestamp = ref.DeletionTimestamp
existing.DeletionGracePeriodSeconds = ref.DeletionGracePeriodSeconds
return true
}
// Sync sends a copy of the current state through the update channel.
func (s *podStorage) Sync() {
s.updateLock.Lock()

View File

@ -69,6 +69,7 @@ func writeTestFile(t *testing.T, dir, name string, contents string) *os.File {
func TestReadPodsFromFile(t *testing.T) {
hostname := "random-test-hostname"
grace := int64(30)
var testCases = []struct {
desc string
pod runtime.Object
@ -98,9 +99,10 @@ func TestReadPodsFromFile(t *testing.T) {
SelfLink: getSelfLink("test-"+hostname, "mynamespace"),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "image",
Image: "test/image",

View File

@ -123,6 +123,7 @@ func TestExtractInvalidPods(t *testing.T) {
func TestExtractPodsFromHTTP(t *testing.T) {
hostname := "different-value"
grace := int64(30)
var testCases = []struct {
desc string
pods runtime.Object
@ -156,9 +157,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("foo-"+hostname, "mynamespace"),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "1",
Image: "foo",
@ -209,9 +212,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("foo-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "1",
Image: "foo",
@ -229,9 +234,11 @@ func TestExtractPodsFromHTTP(t *testing.T) {
SelfLink: getSelfLink("bar-"+hostname, kubelet.NamespaceDefault),
},
Spec: api.PodSpec{
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
NodeName: hostname,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{{
Name: "2",
Image: "bar",

View File

@ -163,13 +163,13 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ Pod, _ api.PodStatus, _ []api.Secr
return f.Err
}
func (f *FakeRuntime) KillPod(pod Pod) error {
func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error {
f.Lock()
defer f.Unlock()
f.CalledFunctions = append(f.CalledFunctions, "KillPod")
f.KilledPods = append(f.KilledPods, string(pod.ID))
for _, c := range pod.Containers {
f.KilledPods = append(f.KilledPods, string(runningPod.ID))
for _, c := range runningPod.Containers {
f.KilledContainers = append(f.KilledContainers, c.Name)
}
return f.Err

View File

@ -54,8 +54,8 @@ type Runtime interface {
GetPods(all bool) ([]*Pod, error)
// Syncs the running pod into the desired pod.
SyncPod(pod *api.Pod, runningPod Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error
// KillPod kills all the containers of a pod.
KillPod(pod Pod) error
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
KillPod(pod *api.Pod, runningPod Pod) error
// GetPodStatus retrieves the status of the pod, including the information of
// all containers in the pod. Clients of this interface assume the containers
// statuses in a pod always have a deterministic ordering (eg: sorted by name).

View File

@ -56,12 +56,21 @@ import (
const (
maxReasonCacheEntries = 200
kubernetesPodLabel = "io.kubernetes.pod.data"
kubernetesContainerLabel = "io.kubernetes.container.name"
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
// hence, setting ndots to be 5.
ndotsDNSOption = "options ndots:5\n"
// In order to avoid unnecessary SIGKILLs, give every container a minimum grace
// period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
// potentially dangerous.
// TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
// SIGTERM for certain process types, which may justify setting this to 0.
minimumGracePeriodInSeconds = 2
kubernetesNameLabel = "io.kubernetes.pod.name"
kubernetesPodLabel = "io.kubernetes.pod.data"
kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
kubernetesContainerLabel = "io.kubernetes.container.name"
)
// DockerManager implements the Runtime interface.
@ -588,12 +597,19 @@ func (dm *DockerManager) runContainer(
if len(containerHostname) > hostnameMaxLen {
containerHostname = containerHostname[:hostnameMaxLen]
}
// Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
// while the Kubelet is down and there is no information available to recover the pod. This includes
// termination information like the termination grace period and the pre stop hooks.
// TODO: keep these labels up to date if the pod changes
namespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
labels := map[string]string{
"io.kubernetes.pod.name": namespacedName.String(),
kubernetesNameLabel: namespacedName.String(),
}
if pod.Spec.TerminationGracePeriodSeconds != nil {
labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10)
}
if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
glog.V(1).Infof("Setting preStop hook")
// TODO: This is kind of hacky, we should really just encode the bits we need.
data, err := latest.Codec.Encode(pod)
if err != nil {
@ -1104,40 +1120,56 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream
}
// Kills all containers in the specified pod
func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
// Send the kills in parallel since they may take a long time. Len + 1 since there
// can be Len errors + the networkPlugin teardown error.
errs := make(chan error, len(pod.Containers)+1)
errs := make(chan error, len(runningPod.Containers)+1)
wg := sync.WaitGroup{}
var networkID types.UID
for _, container := range pod.Containers {
var (
networkContainer *kubecontainer.Container
networkSpec *api.Container
)
for _, container := range runningPod.Containers {
wg.Add(1)
go func(container *kubecontainer.Container) {
defer util.HandleCrash()
defer wg.Done()
var containerSpec *api.Container
if pod != nil {
for i, c := range pod.Spec.Containers {
if c.Name == container.Name {
containerSpec = &pod.Spec.Containers[i]
break
}
}
}
// TODO: Handle this without signaling the pod infra container to
// adapt to the generic container runtime.
if container.Name == PodInfraContainerName {
// Store the container runtime for later deletion.
// We do this so that PreStop handlers can run in the network namespace.
networkID = container.ID
networkContainer = container
networkSpec = containerSpec
return
}
if err := dm.killContainer(container.ID); err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
err := dm.killContainer(container.ID, containerSpec, pod)
if err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
errs <- err
}
}(container)
}
wg.Wait()
if len(networkID) > 0 {
if err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(networkID)); err != nil {
if networkContainer != nil {
if err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(networkContainer.ID)); err != nil {
glog.Errorf("Failed tearing down the infra container: %v", err)
errs <- err
}
if err := dm.killContainer(networkID); err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID)
if err := dm.killContainer(networkContainer.ID, networkSpec, pod); err != nil {
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
errs <- err
}
}
@ -1152,75 +1184,142 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error {
return nil
}
// KillContainerInPod kills a container in the pod.
func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error {
// Locate the container.
pods, err := dm.GetPods(false)
if err != nil {
return err
}
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
targetContainer := targetPod.FindContainerByName(container.Name)
if targetContainer == nil {
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
}
return dm.killContainer(targetContainer.ID)
}
// TODO(vmarmol): Unexport this as it is no longer used externally.
// KillContainer kills a container identified by containerID.
// Internally, it invokes docker's StopContainer API with a timeout of 10s.
// TODO: Deprecate this function in favor of KillContainerInPod.
func (dm *DockerManager) KillContainer(containerID types.UID) error {
return dm.killContainer(containerID)
}
func (dm *DockerManager) killContainer(containerID types.UID) error {
ID := string(containerID)
glog.V(2).Infof("Killing container with id %q", ID)
inspect, err := dm.client.InspectContainer(ID)
if err != nil {
return err
}
var found bool
var preStop string
if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil {
preStop, found = inspect.Config.Labels[kubernetesPodLabel]
}
if found {
var pod api.Pod
err := latest.Codec.DecodeInto([]byte(preStop), &pod)
// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
// and will attempt to lookup the other information if missing.
func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error {
switch {
case len(containerID) == 0:
// Locate the container.
pods, err := dm.GetPods(false)
if err != nil {
glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID)
} else {
name := inspect.Config.Labels[kubernetesContainerLabel]
var container *api.Container
return err
}
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
targetContainer := targetPod.FindContainerByName(container.Name)
if targetContainer == nil {
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
}
containerID = targetContainer.ID
case container == nil || pod == nil:
// Read information about the container from labels
inspect, err := dm.client.InspectContainer(string(containerID))
if err != nil {
return err
}
storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect)
if cerr != nil {
glog.Errorf("unable to access pod data from container: %v", err)
}
if container == nil {
container = storedContainer
}
if pod == nil {
pod = storedPod
}
}
return dm.killContainer(containerID, container, pod)
}
// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
// KillContainerInPod if information must be retrieved first.
func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error {
ID := string(containerID)
name := ID
if container != nil {
name = fmt.Sprintf("%s %s", name, container.Name)
}
if pod != nil {
name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name)
}
gracePeriod := int64(minimumGracePeriodInSeconds)
if pod != nil {
switch {
case pod.DeletionGracePeriodSeconds != nil:
gracePeriod = *pod.DeletionGracePeriodSeconds
case pod.Spec.TerminationGracePeriodSeconds != nil:
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
}
}
glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod)
start := util.Now()
if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
glog.V(4).Infof("Running preStop hook for container %q", name)
// TODO: timebox PreStop execution to at most gracePeriod
if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil {
glog.Errorf("preStop hook for container %q failed: %v", name, err)
}
gracePeriod -= int64(util.Now().Sub(start.Time).Seconds())
}
dm.readinessManager.RemoveReadiness(ID)
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
err := dm.client.StopContainer(ID, uint(gracePeriod))
if _, ok := err.(*docker.ContainerNotRunning); ok && err != nil {
glog.V(4).Infof("Container %q has already exited", name)
return nil
}
if err == nil {
glog.V(2).Infof("Container %q exited after %s", name, util.Now().Sub(start.Time))
} else {
glog.V(2).Infof("Container %q termination failed after %s: %v", name, util.Now().Sub(start.Time), err)
}
ref, ok := dm.containerRefManager.GetRef(ID)
if !ok {
glog.Warningf("No ref for pod '%q'", name)
} else {
// TODO: pass reason down here, and state, or move this call up the stack.
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
}
return err
}
var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container")
// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) {
if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil {
return nil, nil, errNoPodOnContainer
}
labels := inspect.Config.Labels
// the pod data may not be set
if body, found := labels[kubernetesPodLabel]; found {
pod = &api.Pod{}
if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil {
name := labels[kubernetesContainerLabel]
for ix := range pod.Spec.Containers {
if pod.Spec.Containers[ix].Name == name {
container = &pod.Spec.Containers[ix]
break
}
}
if container != nil {
glog.V(1).Infof("Running preStop hook")
if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil {
glog.Errorf("failed to run preStop hook: %v", err)
}
} else {
glog.Errorf("unable to find container %v, %s", pod, name)
if container == nil {
err = fmt.Errorf("unable to find container %s in pod %v", name, pod)
}
} else {
pod = nil
}
}
// attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
// field (the one used by kill)
if pod == nil {
if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok {
if seconds, err := strconv.ParseInt(period, 10, 64); err == nil {
pod = &api.Pod{}
pod.DeletionGracePeriodSeconds = &seconds
}
}
}
dm.readinessManager.RemoveReadiness(ID)
err = dm.client.StopContainer(ID, 10)
ref, ok := dm.containerRefManager.GetRef(ID)
if !ok {
glog.Warningf("No ref for pod '%v'", ID)
} else {
// TODO: pass reason down here, and state, or move this call up the stack.
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
}
return err
return
}
// Run a single container from a pod. Returns the docker container ID
@ -1253,7 +1352,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
if handlerErr != nil {
dm.killContainer(types.UID(id))
dm.killContainer(types.UID(id), container, pod)
return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr)
}
}
@ -1534,10 +1633,10 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
podFullName := kubecontainer.GetPodFullName(pod)
containerChanges, err := dm.computePodContainerChanges(pod, runningPod, podStatus)
glog.V(3).Infof("Got container changes for pod %q: %+v", podFullName, containerChanges)
if err != nil {
return err
}
glog.V(3).Infof("Got container changes for pod %q: %+v", podFullName, containerChanges)
if containerChanges.StartInfraContainer || (len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0) {
if len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0 {
@ -1547,7 +1646,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
}
// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
err = dm.KillPod(runningPod)
err = dm.KillPod(pod, runningPod)
if err != nil {
return err
}
@ -1557,7 +1656,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod
_, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)]
if !keep {
glog.V(3).Infof("Killing unwanted container %+v", container)
err = dm.KillContainer(container.ID)
// attempt to find the appropriate container policy
var podContainer *api.Container
for i, c := range pod.Spec.Containers {
if c.Name == container.Name {
podContainer = &pod.Spec.Containers[i]
break
}
}
err = dm.KillContainerInPod(container.ID, podContainer, pod)
if err != nil {
glog.Errorf("Error killing container: %v", err)
}

View File

@ -405,7 +405,7 @@ func TestKillContainerInPod(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
@ -478,14 +478,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil {
t.Errorf("unexpected error: %v", err)
}
// Assert the container has been stopped.
if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil {
t.Errorf("container was not stopped correctly: %v", err)
}
verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"})
verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"})
if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) {
t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd)
}
@ -522,7 +522,7 @@ func TestKillContainerInPodWithError(t *testing.T) {
manager.readinessManager.SetReadiness(c.ID, true)
}
if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil {
if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil {
t.Errorf("expected error, found nil")
}
@ -568,7 +568,7 @@ func replaceProber(dm *DockerManager, result probe.Result, err error) {
// Unknown or error.
//
// PLEASE READ THE PROBE DOCS BEFORE CHANGING THIS TEST IF YOU ARE UNSURE HOW PROBES ARE SUPPOSED TO WORK:
// (See https://github.com/GoogleCloudPlatform/kubernetes/blob/master/docs/pod-states.md#pod-conditions)
// (See https://k8s.io/kubernetes/blob/master/docs/pod-states.md#pod-conditions)
func TestProbeContainer(t *testing.T) {
manager, _ := newTestDockerManager()
dc := &docker.APIContainers{
@ -1021,7 +1021,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) {
verifyCalls(t, fakeDocker, []string{
// Kill the container since pod infra container is not running.
"inspect_container", "stop",
"stop",
// Create pod infra container.
"create", "start", "inspect_container",
// Create container.
@ -1096,7 +1096,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill the duplicated container.
"inspect_container", "stop",
"stop",
})
// Expect one of the duplicates to be killed.
if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") {
@ -1150,7 +1150,7 @@ func TestSyncPodBadHash(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill and restart the bad hash container.
"inspect_container", "stop", "create", "start", "inspect_container",
"stop", "create", "start", "inspect_container",
})
if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil {
@ -1208,7 +1208,7 @@ func TestSyncPodsUnhealthy(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Kill the unhealthy container.
"inspect_container", "stop",
"stop",
// Restart the unhealthy container.
"create", "start", "inspect_container",
})
@ -1443,7 +1443,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) {
// Check the pod infra container.
"inspect_container",
// Stop the last pod infra container.
"inspect_container", "stop",
"stop",
},
[]string{},
[]string{"9876"},
@ -1910,7 +1910,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) {
// Create the container.
"create", "start",
// Kill the container since event handler fails.
"inspect_container", "stop",
"stop",
})
// TODO(yifan): Check the stopped container's name.

View File

@ -1120,8 +1120,8 @@ func parseResolvConf(reader io.Reader) (nameservers []string, searches []string,
}
// Kill all running containers in a pod (includes the pod infra container).
func (kl *Kubelet) killPod(pod kubecontainer.Pod) error {
return kl.containerRuntime.KillPod(pod)
func (kl *Kubelet) killPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
return kl.containerRuntime.KillPod(pod, runningPod)
}
type empty struct{}
@ -1177,9 +1177,10 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont
}()
// Kill pods we can't run.
err := canRunPod(pod)
if err != nil {
kl.killPod(runningPod)
if err := canRunPod(pod); err != nil || pod.DeletionTimestamp != nil {
if err := kl.killPod(pod, runningPod); err != nil {
util.HandleError(err)
}
return err
}
@ -1366,6 +1367,32 @@ func (kl *Kubelet) cleanupOrphanedVolumes(pods []*api.Pod, runningPods []*kubeco
return nil
}
// Delete any pods that are no longer running and are marked for deletion.
func (kl *Kubelet) cleanupTerminatedPods(pods []*api.Pod, runningPods []*kubecontainer.Pod) error {
var terminating []*api.Pod
for _, pod := range pods {
if pod.DeletionTimestamp != nil {
found := false
for _, runningPod := range runningPods {
if runningPod.ID == pod.UID {
found = true
break
}
}
if found {
podFullName := kubecontainer.GetPodFullName(pod)
glog.V(5).Infof("Keeping terminated pod %q and uid %q, still running", podFullName, pod.UID)
continue
}
terminating = append(terminating, pod)
}
}
if !kl.statusManager.TerminatePods(terminating) {
return errors.New("not all pods were successfully terminated")
}
return nil
}
// pastActiveDeadline returns true if the pod has been active for more than
// ActiveDeadlineSeconds.
func (kl *Kubelet) pastActiveDeadline(pod *api.Pod) bool {
@ -1528,6 +1555,10 @@ func (kl *Kubelet) cleanupPods(allPods []*api.Pod, admittedPods []*api.Pod) erro
// Remove any orphaned mirror pods.
kl.podManager.DeleteOrphanedMirrorPods()
if err := kl.cleanupTerminatedPods(allPods, runningPods); err != nil {
glog.Errorf("Failed to cleanup terminated pods: %v", err)
}
return err
}
@ -1550,7 +1581,7 @@ func (kl *Kubelet) killUnwantedPods(desiredPods map[types.UID]empty,
}()
glog.V(1).Infof("Killing unwanted pod %q", pod.Name)
// Stop the containers.
err = kl.killPod(*pod)
err = kl.killPod(nil, *pod)
if err != nil {
glog.Errorf("Failed killing the pod %q: %v", pod.Name, err)
return

View File

@ -64,7 +64,7 @@ func (mc *basicMirrorClient) DeleteMirrorPod(podFullName string) error {
return err
}
glog.V(4).Infof("Deleting a mirror pod %q", podFullName)
if err := mc.apiserverClient.Pods(namespace).Delete(name, nil); err != nil {
if err := mc.apiserverClient.Pods(namespace).Delete(name, api.NewDeleteOptions(0)); err != nil {
glog.Errorf("Failed deleting a mirror pod %q: %v", podFullName, err)
}
return nil

View File

@ -676,11 +676,11 @@ func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) {
}
// KillPod invokes 'systemctl kill' to kill the unit that runs the pod.
func (r *runtime) KillPod(pod kubecontainer.Pod) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", pod.Name)
func (r *runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name)
// TODO(yifan): More graceful stop. Replace with StopUnit and wait for a timeout.
r.systemd.KillUnit(makePodServiceFileName(pod.ID), int32(syscall.SIGKILL))
r.systemd.KillUnit(makePodServiceFileName(runningPod.ID), int32(syscall.SIGKILL))
return r.systemd.Reload()
}
@ -887,7 +887,7 @@ func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus
if restartPod {
// TODO(yifan): Handle network plugin.
if err := r.KillPod(runningPod); err != nil {
if err := r.KillPod(pod, runningPod); err != nil {
return err
}
if err := r.RunPod(pod); err != nil {

View File

@ -24,6 +24,7 @@ import (
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/errors"
"k8s.io/kubernetes/pkg/client"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types"
@ -122,7 +123,7 @@ func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) {
// Currently this routine is not called for the same pod from multiple
// workers and/or the kubelet but dropping the lock before sending the
// status down the channel feels like an easy way to get a bullet in foot.
if !found || !isStatusEqual(&oldStatus, &status) {
if !found || !isStatusEqual(&oldStatus, &status) || pod.DeletionTimestamp != nil {
s.podStatuses[podFullName] = status
s.podStatusChannel <- podStatusSyncRequest{pod, status}
} else {
@ -130,6 +131,29 @@ func (s *statusManager) SetPodStatus(pod *api.Pod, status api.PodStatus) {
}
}
// TerminatePods resets the container status for the provided pods to terminated and triggers
// a status update. This function may not enqueue all the provided pods, in which case it will
// return false
func (s *statusManager) TerminatePods(pods []*api.Pod) bool {
sent := true
s.podStatusesLock.Lock()
defer s.podStatusesLock.Unlock()
for _, pod := range pods {
for i := range pod.Status.ContainerStatuses {
pod.Status.ContainerStatuses[i].State = api.ContainerState{
Terminated: &api.ContainerStateTerminated{},
}
}
select {
case s.podStatusChannel <- podStatusSyncRequest{pod, pod.Status}:
default:
sent = false
glog.V(4).Infof("Termination notice for %q was dropped because the status channel is full", kubeletUtil.FormatPodName(pod))
}
}
return sent
}
func (s *statusManager) DeletePodStatus(podFullName string) {
s.podStatusesLock.Lock()
defer s.podStatusesLock.Unlock()
@ -161,13 +185,33 @@ func (s *statusManager) syncBatch() error {
}
// TODO: make me easier to express from client code
statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name)
if errors.IsNotFound(err) {
glog.V(3).Infof("Pod %q was deleted on the server", pod.Name)
return nil
}
if err == nil {
if len(pod.UID) > 0 && statusPod.UID != pod.UID {
glog.V(3).Infof("Pod %q was deleted and then recreated, skipping status update", kubeletUtil.FormatPodName(pod))
return nil
}
statusPod.Status = status
_, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
// TODO: handle conflict as a retry, make that easier too.
statusPod, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod)
if err == nil {
glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod))
return nil
if pod.DeletionTimestamp == nil {
return nil
}
if !notRunning(pod.Status.ContainerStatuses) {
glog.V(3).Infof("Pod %q is terminated, but some pods are still running", pod.Name)
return nil
}
if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil {
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name)
s.DeletePodStatus(podFullName)
return nil
}
}
}
@ -181,3 +225,14 @@ func (s *statusManager) syncBatch() error {
go s.DeletePodStatus(podFullName)
return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err)
}
// notRunning returns true if every status is terminated or waiting, or the status list
// is empty.
func notRunning(statuses []api.ContainerStatus) bool {
for _, status := range statuses {
if status.State.Terminated == nil && status.State.Waiting == nil {
return false
}
}
return true
}

View File

@ -153,8 +153,21 @@ func TestUnchangedStatus(t *testing.T) {
verifyUpdates(t, syncer, 1)
}
func TestSyncBatchIgnoresNotFound(t *testing.T) {
syncer := newTestStatusManager()
syncer.SetPodStatus(testPod, getRandomPodStatus())
err := syncer.syncBatch()
if err != nil {
t.Errorf("unexpected syncing error: %v", err)
}
verifyActions(t, syncer.kubeClient, []testclient.Action{
testclient.GetActionImpl{ActionImpl: testclient.ActionImpl{Verb: "get", Resource: "pods"}},
})
}
func TestSyncBatch(t *testing.T) {
syncer := newTestStatusManager()
syncer.kubeClient = testclient.NewSimpleFake(testPod)
syncer.SetPodStatus(testPod, getRandomPodStatus())
err := syncer.syncBatch()
if err != nil {
@ -167,6 +180,22 @@ func TestSyncBatch(t *testing.T) {
)
}
func TestSyncBatchChecksMismatchedUID(t *testing.T) {
syncer := newTestStatusManager()
testPod.UID = "first"
differentPod := *testPod
differentPod.UID = "second"
syncer.kubeClient = testclient.NewSimpleFake(testPod)
syncer.SetPodStatus(&differentPod, getRandomPodStatus())
err := syncer.syncBatch()
if err != nil {
t.Errorf("unexpected syncing error: %v", err)
}
verifyActions(t, syncer.kubeClient, []testclient.Action{
testclient.GetActionImpl{ActionImpl: testclient.ActionImpl{Verb: "get", Resource: "pods"}},
})
}
// shuffle returns a new shuffled list of container statuses.
func shuffle(statuses []api.ContainerStatus) []api.ContainerStatus {
numStatuses := len(statuses)

View File

@ -695,7 +695,7 @@ func TestDelete(t *testing.T) {
// If the controller is still around after trying to delete either the delete
// failed, or we're deleting it gracefully.
if fakeClient.Data[key].R.Node != nil {
return true
return fakeClient.Data[key].R.Node.TTL != 0
}
return false
}

View File

@ -37,6 +37,7 @@ var testTTL uint64 = 60
func NewTestEventStorage(t *testing.T) (*tools.FakeEtcdClient, *REST) {
f := tools.NewFakeEtcdClient(t)
f.HideExpires = true
f.TestIndex = true
s := etcdstorage.NewEtcdStorage(f, testapi.Codec(), etcdtest.PathPrefix())

View File

@ -341,6 +341,11 @@ func (e *Etcd) Get(ctx api.Context, name string) (runtime.Object, error) {
return obj, nil
}
var (
errAlreadyDeleting = fmt.Errorf("abort delete")
errDeleteNow = fmt.Errorf("delete now")
)
// Delete removes the item from etcd.
func (e *Etcd) Delete(ctx api.Context, name string, options *api.DeleteOptions) (runtime.Object, error) {
key, err := e.KeyFunc(ctx, name)
@ -367,13 +372,41 @@ func (e *Etcd) Delete(ctx api.Context, name string, options *api.DeleteOptions)
if pendingGraceful {
return e.finalizeDelete(obj, false)
}
if graceful && *options.GracePeriodSeconds != 0 {
if graceful {
trace.Step("Graceful deletion")
out := e.NewFunc()
if err := e.Storage.Set(key, obj, out, uint64(*options.GracePeriodSeconds)); err != nil {
lastGraceful := int64(0)
err := e.Storage.GuaranteedUpdate(
key, out, false,
storage.SimpleUpdate(func(existing runtime.Object) (runtime.Object, error) {
graceful, pendingGraceful, err := rest.BeforeDelete(e.DeleteStrategy, ctx, existing, options)
if err != nil {
return nil, err
}
if pendingGraceful {
return nil, errAlreadyDeleting
}
if !graceful {
return nil, errDeleteNow
}
lastGraceful = *options.GracePeriodSeconds
return existing, nil
}),
)
switch err {
case nil:
if lastGraceful > 0 {
return out, nil
}
// fall through and delete immediately
case errDeleteNow:
// we've updated the object to have a zero grace period, or it's already at 0, so
// we should fall through and truly delete the object.
case errAlreadyDeleting:
return e.finalizeDelete(obj, true)
default:
return nil, etcderr.InterpretUpdateError(err, e.EndpointName, name)
}
return e.finalizeDelete(out, true)
}
// delete immediately, or no graceful deletion supported

View File

@ -300,7 +300,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := storage.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
pvStart := validNewPersistentVolume("foo")
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 0)
pvIn := &api.PersistentVolume{
ObjectMeta: api.ObjectMeta{

View File

@ -298,7 +298,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := storage.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
pvcStart := validNewPersistentVolumeClaim("foo", api.NamespaceDefault)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 0)
pvc := &api.PersistentVolumeClaim{
ObjectMeta: api.ObjectMeta{

View File

@ -56,6 +56,7 @@ func newStorage(t *testing.T) (*REST, *BindingREST, *StatusREST, *tools.FakeEtcd
}
func validNewPod() *api.Pod {
grace := int64(30)
return &api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "foo",
@ -64,6 +65,8 @@ func validNewPod() *api.Pod {
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
Containers: []api.Container{
{
Name: "foo",
@ -118,8 +121,10 @@ func TestDelete(t *testing.T) {
key = etcdtest.AddPrefix(key)
test := resttest.New(t, storage, fakeEtcdClient.SetError)
expectedNode := "some-node"
createFn := func() runtime.Object {
pod := validChangedPod()
pod.Spec.NodeName = expectedNode
fakeEtcdClient.Data[key] = tools.EtcdResponseWithError{
R: &etcd.Response{
Node: &etcd.Node{
@ -134,8 +139,17 @@ func TestDelete(t *testing.T) {
if fakeEtcdClient.Data[key].R.Node == nil {
return false
}
return fakeEtcdClient.Data[key].R.Node.TTL == 30
obj, err := latest.Codec.Decode([]byte(fakeEtcdClient.Data[key].R.Node.Value))
if err != nil {
return false
}
pod := obj.(*api.Pod)
t.Logf("found object %#v", pod.ObjectMeta)
return pod.DeletionTimestamp != nil && pod.DeletionGracePeriodSeconds != nil && *pod.DeletionGracePeriodSeconds != 0
}
test.TestDeleteGraceful(createFn, 30, gracefulSetFn)
expectedNode = ""
test.TestDelete(createFn, gracefulSetFn)
}
@ -1027,6 +1041,7 @@ func TestEtcdUpdateScheduled(t *testing.T) {
},
}), 1)
grace := int64(30)
podIn := api.Pod{
ObjectMeta: api.ObjectMeta{
Name: "foo",
@ -1048,6 +1063,8 @@ func TestEtcdUpdateScheduled(t *testing.T) {
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
_, _, err := registry.Update(ctx, &podIn)
@ -1088,7 +1105,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
},
},
}
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 0)
podIn := api.Pod{
ObjectMeta: api.ObjectMeta{
@ -1117,6 +1134,8 @@ func TestEtcdUpdateStatus(t *testing.T) {
expected := podStart
expected.ResourceVersion = "2"
grace := int64(30)
expected.Spec.TerminationGracePeriodSeconds = &grace
expected.Spec.RestartPolicy = api.RestartPolicyAlways
expected.Spec.DNSPolicy = api.DNSClusterFirst
expected.Spec.Containers[0].ImagePullPolicy = api.PullIfNotPresent

View File

@ -81,15 +81,49 @@ func (podStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fiel
return append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...)
}
// AllowUnconditionalUpdate allows pods to be overwritten
func (podStrategy) AllowUnconditionalUpdate() bool {
return true
}
// CheckGracefulDelete allows a pod to be gracefully deleted.
// CheckGracefulDelete allows a pod to be gracefully deleted. It updates the DeleteOptions to
// reflect the desired grace value.
func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool {
if options == nil {
return false
}
pod := obj.(*api.Pod)
period := int64(0)
// user has specified a value
if options.GracePeriodSeconds != nil {
period = *options.GracePeriodSeconds
} else {
// use the default value if set, or deletes the pod immediately (0)
if pod.Spec.TerminationGracePeriodSeconds != nil {
period = *pod.Spec.TerminationGracePeriodSeconds
}
}
// if the pod is not scheduled, delete immediately
if len(pod.Spec.NodeName) == 0 {
period = 0
}
// ensure the options and the pod are in sync
options.GracePeriodSeconds = &period
return true
}
type podStrategyWithoutGraceful struct {
podStrategy
}
// CheckGracefulDelete prohibits graceful deletion.
func (podStrategyWithoutGraceful) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool {
return false
}
// StrategyWithoutGraceful implements the legacy instant delele behavior.
var StrategyWithoutGraceful = podStrategyWithoutGraceful{Strategy}
type podStatusStrategy struct {
podStrategy
}
@ -100,6 +134,7 @@ func (podStatusStrategy) PrepareForUpdate(obj, old runtime.Object) {
newPod := obj.(*api.Pod)
oldPod := old.(*api.Pod)
newPod.Spec = oldPod.Spec
newPod.DeletionTimestamp = nil
}
func (podStatusStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fielderrors.ValidationErrorList {

View File

@ -389,7 +389,7 @@ func TestEtcdUpdateStatus(t *testing.T) {
key, _ := registry.KeyFunc(ctx, "foo")
key = etcdtest.AddPrefix(key)
resourcequotaStart := validNewResourceQuota()
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 1)
fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 0)
resourcequotaIn := &api.ResourceQuota{
ObjectMeta: api.ObjectMeta{

View File

@ -54,11 +54,13 @@ func newTestCacher(client tools.EtcdClient) *storage.Cacher {
}
func makeTestPod(name string) *api.Pod {
gracePeriod := int64(30)
return &api.Pod{
ObjectMeta: api.ObjectMeta{Namespace: "ns", Name: name},
Spec: api.PodSpec{
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
TerminationGracePeriodSeconds: &gracePeriod,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
},
}
}

View File

@ -394,14 +394,21 @@ func (h *etcdHelper) GuaranteedUpdate(key string, ptrToType runtime.Object, igno
ttl := uint64(0)
if node != nil {
index = node.ModifiedIndex
if node.TTL > 0 {
if node.TTL != 0 {
ttl = uint64(node.TTL)
}
if node.Expiration != nil && ttl == 0 {
ttl = 1
}
} else if res != nil {
index = res.EtcdIndex
}
if newTTL != nil {
if ttl != 0 && *newTTL == 0 {
// TODO: remove this after we have verified this is no longer an issue
glog.V(4).Infof("GuaranteedUpdate is clearing TTL for %q, may not be intentional", key)
}
ttl = *newTTL
}

View File

@ -123,28 +123,32 @@ func TestList(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -206,6 +210,7 @@ func TestListAcrossDirectories(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
@ -213,22 +218,25 @@ func TestListAcrossDirectories(t *testing.T) {
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -278,28 +286,32 @@ func TestListExcludesDirectories(t *testing.T) {
},
},
}
grace := int64(30)
expect := api.PodList{
ListMeta: api.ListMeta{ResourceVersion: "10"},
Items: []api.Pod{
{
ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
{
ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
},
},
@ -319,11 +331,13 @@ func TestGet(t *testing.T) {
fakeClient := tools.NewFakeEtcdClient(t)
helper := newEtcdHelper(fakeClient, testapi.Codec(), etcdtest.PathPrefix())
key := etcdtest.AddPrefix("/some/key")
grace := int64(30)
expect := api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
fakeClient.Set(key, runtime.EncodeOrDie(testapi.Codec(), &expect), 0)

View File

@ -38,6 +38,7 @@ const (
EtcdSet = "set"
EtcdCAS = "compareAndSwap"
EtcdDelete = "delete"
EtcdExpire = "expire"
)
// TransformFunc attempts to convert an object to another object for use with a watcher.
@ -353,7 +354,7 @@ func (w *etcdWatcher) sendResult(res *etcd.Response) {
w.sendAdd(res)
case EtcdSet, EtcdCAS:
w.sendModify(res)
case EtcdDelete:
case EtcdDelete, EtcdExpire:
w.sendDelete(res)
default:
glog.Errorf("unknown action: %v", res.Action)

View File

@ -20,6 +20,7 @@ import (
"errors"
"sort"
"sync"
"time"
"github.com/coreos/go-etcd/etcd"
)
@ -52,6 +53,8 @@ type FakeEtcdClient struct {
TestIndex bool
ChangeIndex uint64
LastSetTTL uint64
// Will avoid setting the expires header on objects to make comparison easier
HideExpires bool
Machines []string
// Will become valid after Watch is called; tester may write to it. Tester may
@ -175,6 +178,11 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons
prevResult := f.Data[key]
createdIndex := prevResult.R.Node.CreatedIndex
f.t.Logf("updating %v, index %v -> %v (ttl: %d)", key, createdIndex, i, ttl)
var expires *time.Time
if !f.HideExpires && ttl > 0 {
now := time.Now()
expires = &now
}
result := EtcdResponseWithError{
R: &etcd.Response{
Node: &etcd.Node{
@ -182,6 +190,7 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons
CreatedIndex: createdIndex,
ModifiedIndex: i,
TTL: int64(ttl),
Expiration: expires,
},
},
}

View File

@ -132,11 +132,13 @@ func PriorityTwo(pod *api.Pod, podLister algorithm.PodLister, minionLister algor
}
func TestDefaultErrorFunc(t *testing.T) {
grace := int64(30)
testPod := &api.Pod{
ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "bar"},
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSClusterFirst,
TerminationGracePeriodSeconds: &grace,
},
}
handler := util.FakeHandler{

View File

@ -791,7 +791,7 @@ func getUDData(jpgExpected string, ns string) func(*client.Client, string) error
if strings.Contains(data.Image, jpgExpected) {
return nil
} else {
return errors.New(fmt.Sprintf("data served up in container is innaccurate, %s didn't contain %s", data, jpgExpected))
return errors.New(fmt.Sprintf("data served up in container is inaccurate, %s didn't contain %s", data, jpgExpected))
}
}
}

View File

@ -78,8 +78,8 @@ var _ = Describe("Pod Disks", func() {
By("cleaning up PD-RW test environment")
// Teardown pods, PD. Ignore errors.
// Teardown should do nothing unless test failed.
podClient.Delete(host0Pod.Name, nil)
podClient.Delete(host1Pod.Name, nil)
podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0))
podClient.Delete(host1Pod.Name, api.NewDeleteOptions(0))
detachPD(host0Name, diskName)
detachPD(host1Name, diskName)
deletePD(diskName)
@ -98,7 +98,7 @@ var _ = Describe("Pod Disks", func() {
Logf("Wrote value: %v", testFileContents)
By("deleting host0Pod")
expectNoError(podClient.Delete(host0Pod.Name, nil), "Failed to delete host0Pod")
expectNoError(podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod")
By("submitting host1Pod to kubernetes")
_, err = podClient.Create(host1Pod)
@ -113,7 +113,7 @@ var _ = Describe("Pod Disks", func() {
Expect(strings.TrimSpace(v)).To(Equal(strings.TrimSpace(testFileContents)))
By("deleting host1Pod")
expectNoError(podClient.Delete(host1Pod.Name, nil), "Failed to delete host1Pod")
expectNoError(podClient.Delete(host1Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host1Pod")
By(fmt.Sprintf("deleting PD %q", diskName))
deletePDWithRetry(diskName)
@ -136,9 +136,9 @@ var _ = Describe("Pod Disks", func() {
By("cleaning up PD-RO test environment")
// Teardown pods, PD. Ignore errors.
// Teardown should do nothing unless test failed.
podClient.Delete(rwPod.Name, nil)
podClient.Delete(host0ROPod.Name, nil)
podClient.Delete(host1ROPod.Name, nil)
podClient.Delete(rwPod.Name, api.NewDeleteOptions(0))
podClient.Delete(host0ROPod.Name, api.NewDeleteOptions(0))
podClient.Delete(host1ROPod.Name, api.NewDeleteOptions(0))
detachPD(host0Name, diskName)
detachPD(host1Name, diskName)
@ -149,7 +149,7 @@ var _ = Describe("Pod Disks", func() {
_, err = podClient.Create(rwPod)
expectNoError(err, "Failed to create rwPod")
expectNoError(framework.WaitForPodRunning(rwPod.Name))
expectNoError(podClient.Delete(rwPod.Name, nil), "Failed to delete host0Pod")
expectNoError(podClient.Delete(rwPod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod")
expectNoError(waitForPDDetach(diskName, host0Name))
By("submitting host0ROPod to kubernetes")
@ -165,10 +165,10 @@ var _ = Describe("Pod Disks", func() {
expectNoError(framework.WaitForPodRunning(host1ROPod.Name))
By("deleting host0ROPod")
expectNoError(podClient.Delete(host0ROPod.Name, nil), "Failed to delete host0ROPod")
expectNoError(podClient.Delete(host0ROPod.Name, api.NewDeleteOptions(0)), "Failed to delete host0ROPod")
By("deleting host1ROPod")
expectNoError(podClient.Delete(host1ROPod.Name, nil), "Failed to delete host1ROPod")
expectNoError(podClient.Delete(host1ROPod.Name, api.NewDeleteOptions(0)), "Failed to delete host1ROPod")
By(fmt.Sprintf("deleting PD %q", diskName))
deletePDWithRetry(diskName)

View File

@ -43,7 +43,7 @@ func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectResta
// At the end of the test, clean up by removing the pod.
defer func() {
By("deleting the pod")
c.Pods(ns).Delete(podDescr.Name, nil)
c.Pods(ns).Delete(podDescr.Name, api.NewDeleteOptions(0))
}()
// Wait until the pod is not pending. (Here we need to check for something other than
@ -86,15 +86,14 @@ func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectResta
func testHostIP(c *client.Client, ns string, pod *api.Pod) {
podClient := c.Pods(ns)
By("creating pod")
defer podClient.Delete(pod.Name, nil)
_, err := podClient.Create(pod)
if err != nil {
defer podClient.Delete(pod.Name, api.NewDeleteOptions(0))
if _, err := podClient.Create(pod); err != nil {
Failf("Failed to create pod: %v", err)
}
By("ensuring that pod is running and has a hostIP")
// Wait for the pods to enter the running state. Waiting loops until the pods
// are running so non-running pods cause a timeout for this test.
err = waitForPodRunningInNamespace(c, pod.Name, ns)
err := waitForPodRunningInNamespace(c, pod.Name, ns)
Expect(err).NotTo(HaveOccurred())
// Try to make sure we get a hostIP for each pod.
hostIPTimeout := 2 * time.Minute
@ -222,7 +221,7 @@ var _ = Describe("Pods", func() {
// We call defer here in case there is a problem with
// the test so we can ensure that we clean up after
// ourselves
defer podClient.Delete(pod.Name, nil)
defer podClient.Delete(pod.Name, api.NewDeleteOptions(0))
_, err = podClient.Create(pod)
if err != nil {
Failf("Failed to create pod: %v", err)
@ -235,7 +234,7 @@ var _ = Describe("Pods", func() {
}
Expect(len(pods.Items)).To(Equal(1))
By("veryfying pod creation was observed")
By("verifying pod creation was observed")
select {
case event, _ := <-w.ResultChan():
if event.Type != watch.Added {
@ -245,7 +244,7 @@ var _ = Describe("Pods", func() {
Fail("Timeout while waiting for pod creation")
}
By("deleting the pod")
By("deleting the pod gracefully")
if err := podClient.Delete(pod.Name, nil); err != nil {
Failf("Failed to delete pod: %v", err)
}
@ -253,11 +252,13 @@ var _ = Describe("Pods", func() {
By("verifying pod deletion was observed")
deleted := false
timeout := false
var lastPod *api.Pod
timer := time.After(podStartTimeout)
for !deleted && !timeout {
select {
case event, _ := <-w.ResultChan():
if event.Type == watch.Deleted {
lastPod = event.Object.(*api.Pod)
deleted = true
}
case <-timer:
@ -268,6 +269,9 @@ var _ = Describe("Pods", func() {
Fail("Failed to observe pod deletion")
}
Expect(lastPod.DeletionTimestamp).ToNot(BeNil())
Expect(lastPod.Spec.TerminationGracePeriodSeconds).ToNot(BeZero())
pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything())
if err != nil {
Fail(fmt.Sprintf("Failed to list pods to verify deletion: %v", err))
@ -312,7 +316,7 @@ var _ = Describe("Pods", func() {
By("submitting the pod to kubernetes")
defer func() {
By("deleting the pod")
podClient.Delete(pod.Name, nil)
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
}()
pod, err := podClient.Create(pod)
if err != nil {
@ -376,7 +380,7 @@ var _ = Describe("Pods", func() {
},
},
}
defer framework.Client.Pods(framework.Namespace.Name).Delete(serverPod.Name, nil)
defer framework.Client.Pods(framework.Namespace.Name).Delete(serverPod.Name, api.NewDeleteOptions(0))
_, err := framework.Client.Pods(framework.Namespace.Name).Create(serverPod)
if err != nil {
Failf("Failed to create serverPod: %v", err)
@ -600,7 +604,7 @@ var _ = Describe("Pods", func() {
// We call defer here in case there is a problem with
// the test so we can ensure that we clean up after
// ourselves
podClient.Delete(pod.Name)
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
}()
By("waiting for the pod to start running")
@ -673,7 +677,7 @@ var _ = Describe("Pods", func() {
// We call defer here in case there is a problem with
// the test so we can ensure that we clean up after
// ourselves
podClient.Delete(pod.Name)
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
}()
By("waiting for the pod to start running")

View File

@ -831,20 +831,24 @@ func expectNoError(err error, explain ...interface{}) {
ExpectWithOffset(1, err).NotTo(HaveOccurred(), explain...)
}
// Stops everything from filePath from namespace ns and checks if everything maching selectors from the given namespace is correctly stopped.
// Stops everything from filePath from namespace ns and checks if everything matching selectors from the given namespace is correctly stopped.
func cleanup(filePath string, ns string, selectors ...string) {
By("using stop to clean up resources")
By("using delete to clean up resources")
var nsArg string
if ns != "" {
nsArg = fmt.Sprintf("--namespace=%s", ns)
}
runKubectl("stop", "-f", filePath, nsArg)
runKubectl("stop", "--grace-period=0", "-f", filePath, nsArg)
for _, selector := range selectors {
resources := runKubectl("get", "pods,rc,svc", "-l", selector, "--no-headers", nsArg)
resources := runKubectl("get", "rc,svc", "-l", selector, "--no-headers", nsArg)
if resources != "" {
Failf("Resources left running after stop:\n%s", resources)
}
pods := runKubectl("get", "pods", "-l", selector, nsArg, "-t", "{{ range .items }}{{ if not .metadata.deletionTimestamp }}{{ .metadata.name }}{{ \"\\n\" }}{{ end }}{{ end }}")
if pods != "" {
Failf("Pods left unterminated after stop:\n%s", pods)
}
}
}

View File

@ -0,0 +1,28 @@
{
"kind": "Pod",
"apiVersion": "v1",
"metadata": {
"name": "slow-pod",
"labels": {
"name": "nettest"
}
},
"spec": {
"containers": [
{
"name": "webserver",
"image": "gcr.io/google_containers/nettest:1.5",
"args": [
"-service=nettest",
"-delay-shutdown=10"
],
"ports": [
{
"containerPort": 8080,
"protocol": "TCP"
}
]
}
]
}
}

View File

@ -0,0 +1,42 @@
{
"kind": "ReplicationController",
"apiVersion": "v1",
"metadata": {
"name": "slow-rc",
"labels": {
"name": "nettest"
}
},
"spec": {
"replicas": 8,
"selector": {
"name": "nettest"
},
"template": {
"metadata": {
"labels": {
"name": "nettest"
}
},
"spec": {
"terminationGracePeriodSeconds": 5,
"containers": [
{
"name": "webserver",
"image": "gcr.io/google_containers/nettest:1.5",
"args": [
"-service=nettest",
"-delay-shutdown=10"
],
"ports": [
{
"containerPort": 8080,
"protocol": "TCP"
}
]
}
]
}
}
}
}

View File

@ -39,7 +39,9 @@ import (
"math/rand"
"net/http"
"os"
"os/signal"
"sync"
"syscall"
"time"
"k8s.io/kubernetes/pkg/client"
@ -47,10 +49,11 @@ import (
)
var (
port = flag.Int("port", 8080, "Port number to serve at.")
peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.")
service = flag.String("service", "nettest", "Service to find other network test pods in.")
namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.")
port = flag.Int("port", 8080, "Port number to serve at.")
peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.")
service = flag.String("service", "nettest", "Service to find other network test pods in.")
namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.")
delayShutdown = flag.Int("delay-shutdown", 0, "Number of seconds to delay shutdown when receiving SIGTERM.")
)
// State tracks the internal state of our little http server.
@ -178,6 +181,17 @@ func main() {
log.Fatalf("Error getting hostname: %v", err)
}
if *delayShutdown > 0 {
termCh := make(chan os.Signal)
signal.Notify(termCh, syscall.SIGTERM)
go func() {
<-termCh
log.Printf("Sleeping %d seconds before exit ...", *delayShutdown)
time.Sleep(time.Duration(*delayShutdown) * time.Second)
os.Exit(0)
}()
}
state := State{
Hostname: hostname,
StillContactingPeers: true,

View File

@ -232,7 +232,7 @@ var deleteNow string = `
{
"kind": "DeleteOptions",
"apiVersion": "` + testapi.Version() + `",
"gracePeriodSeconds": null%s
"gracePeriodSeconds": 0%s
}
`

View File

@ -80,6 +80,59 @@ func TestGet(t *testing.T) {
})
}
func TestWriteTTL(t *testing.T) {
client := framework.NewEtcdClient()
etcdStorage := etcd.NewEtcdStorage(client, testapi.Codec(), "")
framework.WithEtcdKey(func(key string) {
testObject := api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: "foo"}}
if err := etcdStorage.Set(key, &testObject, nil, 0); err != nil {
t.Fatalf("unexpected error: %v", err)
}
result := &api.ServiceAccount{}
err := etcdStorage.GuaranteedUpdate(key, result, false, func(obj runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
if in, ok := obj.(*api.ServiceAccount); !ok || in.Name != "foo" {
t.Fatalf("unexpected existing object: %v", obj)
}
if res.TTL != 0 {
t.Fatalf("unexpected TTL: %#v", res)
}
ttl := uint64(10)
out := &api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: "out"}}
return out, &ttl, nil
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Name != "out" {
t.Errorf("unexpected response: %#v", result)
}
if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL != 10 {
t.Fatalf("unexpected get: %v %#v", err, res)
}
result = &api.ServiceAccount{}
err = etcdStorage.GuaranteedUpdate(key, result, false, func(obj runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
if in, ok := obj.(*api.ServiceAccount); !ok || in.Name != "out" {
t.Fatalf("unexpected existing object: %v", obj)
}
if res.TTL <= 1 {
t.Fatalf("unexpected TTL: %#v", res)
}
out := &api.ServiceAccount{ObjectMeta: api.ObjectMeta{Name: "out2"}}
return out, nil, nil
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result.Name != "out2" {
t.Errorf("unexpected response: %#v", result)
}
if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL <= 1 {
t.Fatalf("unexpected get: %v %#v", err, res)
}
})
}
func TestWatch(t *testing.T) {
client := framework.NewEtcdClient()
etcdStorage := etcd.NewEtcdStorage(client, testapi.Codec(), etcdtest.PathPrefix())

View File

@ -277,7 +277,7 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore
t.Logf("Test %d: Pod got scheduled on a schedulable node", i)
}
err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, nil)
err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, api.NewDeleteOptions(0))
if err != nil {
t.Errorf("Failed to delete pod: %v", err)
}