diff --git a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml index a2075fd927..d5419146f4 100644 --- a/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml +++ b/cluster/saltbase/salt/fluentd-es/fluentd-es.yaml @@ -18,6 +18,7 @@ spec: mountPath: /varlog - name: containers mountPath: /var/lib/docker/containers + terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: diff --git a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml index c6b1547af3..31ba54acb7 100644 --- a/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml +++ b/cluster/saltbase/salt/fluentd-gcp/fluentd-gcp.yaml @@ -19,6 +19,7 @@ spec: mountPath: /varlog - name: containers mountPath: /var/lib/docker/containers + terminationGracePeriodSeconds: 30 volumes: - name: varlog hostPath: diff --git a/cmd/integration/integration.go b/cmd/integration/integration.go index 6f3246666d..3e7d694edb 100644 --- a/cmd/integration/integration.go +++ b/cmd/integration/integration.go @@ -872,18 +872,24 @@ func runSchedulerNoPhantomPodsTest(client *client.Client) { // Delete a pod to free up room. glog.Infof("Deleting pod %v", bar.Name) - err = client.Pods(api.NamespaceDefault).Delete(bar.Name, nil) + err = client.Pods(api.NamespaceDefault).Delete(bar.Name, api.NewDeleteOptions(1)) if err != nil { glog.Fatalf("FAILED: couldn't delete pod %q: %v", bar.Name, err) } + time.Sleep(2 * time.Second) + pod.ObjectMeta.Name = "phantom.baz" baz, err := client.Pods(api.NamespaceDefault).Create(pod) if err != nil { glog.Fatalf("Failed to create pod: %v, %v", pod, err) } if err := wait.Poll(time.Second, time.Second*60, podRunning(client, baz.Namespace, baz.Name)); err != nil { - glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) + if pod, perr := client.Pods(api.NamespaceDefault).Get("phantom.bar"); perr == nil { + glog.Fatalf("FAILED: 'phantom.bar' was never deleted: %#v", pod) + } else { + glog.Fatalf("FAILED: (Scheduler probably didn't process deletion of 'phantom.bar') Pod never started running: %v", err) + } } glog.Info("Scheduler doesn't make phantom pods: test passed.") diff --git a/contrib/for-tests/network-tester/slow-pod.json b/contrib/for-tests/network-tester/slow-pod.json new file mode 100644 index 0000000000..8fce984aed --- /dev/null +++ b/contrib/for-tests/network-tester/slow-pod.json @@ -0,0 +1,28 @@ +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "slow-pod", + "labels": { + "name": "nettest" + } + }, + "spec": { + "containers": [ + { + "name": "webserver", + "image": "gcr.io/google_containers/nettest:1.5", + "args": [ + "-service=nettest", + "-delay-shutdown=10" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ] + } + ] + } +} diff --git a/contrib/for-tests/network-tester/slow-rc.json b/contrib/for-tests/network-tester/slow-rc.json new file mode 100644 index 0000000000..d70a145555 --- /dev/null +++ b/contrib/for-tests/network-tester/slow-rc.json @@ -0,0 +1,42 @@ +{ + "kind": "ReplicationController", + "apiVersion": "v1", + "metadata": { + "name": "slow-rc", + "labels": { + "name": "nettest" + } + }, + "spec": { + "replicas": 8, + "selector": { + "name": "nettest" + }, + "template": { + "metadata": { + "labels": { + "name": "nettest" + } + }, + "spec": { + "terminationGracePeriodSeconds": 5, + "containers": [ + { + "name": "webserver", + "image": "gcr.io/google_containers/nettest:1.5", + "args": [ + "-service=nettest", + "-delay-shutdown=10" + ], + "ports": [ + { + "containerPort": 8080, + "protocol": "TCP" + } + ] + } + ] + } + } + } +} diff --git a/hack/lib/test.sh b/hack/lib/test.sh index 02a4c8ad9e..c48337763a 100644 --- a/hack/lib/test.sh +++ b/hack/lib/test.sh @@ -23,7 +23,7 @@ readonly red=$(tput setaf 1) readonly green=$(tput setaf 2) kube::test::clear_all() { - kubectl delete "${kube_flags[@]}" rc,pods --all + kubectl delete "${kube_flags[@]}" rc,pods --all --grace-period=0 } kube::test::get_object_assert() { diff --git a/hack/test-cmd.sh b/hack/test-cmd.sh index 4e1635ca7a..c650bf06af 100755 --- a/hack/test-cmd.sh +++ b/hack/test-cmd.sh @@ -247,6 +247,11 @@ runTests() { kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command kubectl delete pod valid-pod "${kube_flags[@]}" + # Post-condition: pod is still there, in terminating + kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' + [[ "$(kubectl get pods "${kube_flags[@]}" | grep Terminating)" ]] + # Command + kubectl delete pod valid-pod "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -262,7 +267,7 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete -f docs/admin/limitrange/valid-pod.yaml "${kube_flags[@]}" + kubectl delete -f docs/admin/limitrange/valid-pod.yaml "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -278,7 +283,7 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' 'valid-pod:' # Command - kubectl delete pods -l'name in (valid-pod)' "${kube_flags[@]}" + kubectl delete pods -l'name in (valid-pod)' "${kube_flags[@]}" --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' '' @@ -310,7 +315,7 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete --all pods "${kube_flags[@]}" # --all remove all the pods + kubectl delete --all pods "${kube_flags[@]}" --grace-period=0 # --all remove all the pods # Post-condition: no POD is running kube::test::get_object_assert "pods -l'name in (valid-pod)'" '{{range.items}}{{$id_field}}:{{end}}' '' @@ -327,7 +332,7 @@ runTests() { # Pre-condition: valid-pod and redis-proxy PODs are running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'redis-proxy:valid-pod:' # Command - kubectl delete pods valid-pod redis-proxy "${kube_flags[@]}" # delete multiple pods at once + kubectl delete pods valid-pod redis-proxy "${kube_flags[@]}" --grace-period=0 # delete multiple pods at once # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -344,7 +349,7 @@ runTests() { # Pre-condition: valid-pod and redis-proxy PODs are running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'redis-proxy:valid-pod:' # Command - kubectl stop pods valid-pod redis-proxy "${kube_flags[@]}" # stop multiple pods at once + kubectl stop pods valid-pod redis-proxy "${kube_flags[@]}" --grace-period=0 # stop multiple pods at once # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -368,7 +373,7 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete pods -lnew-name=new-valid-pod "${kube_flags[@]}" + kubectl delete pods -lnew-name=new-valid-pod --grace-period=0 "${kube_flags[@]}" # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -419,7 +424,7 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete pods -l'name in (valid-pod-super-sayan)' "${kube_flags[@]}" + kubectl delete pods -l'name in (valid-pod-super-sayan)' --grace-period=0 "${kube_flags[@]}" # Post-condition: no POD is running kube::test::get_object_assert pods "{{range.items}}{{$id_field}}:{{end}}" '' @@ -455,7 +460,7 @@ runTests() { # Pre-condition: valid-pod POD is running kube::test::get_object_assert 'pods --namespace=other' "{{range.items}}{{$id_field}}:{{end}}" 'valid-pod:' # Command - kubectl delete "${kube_flags[@]}" pod --namespace=other valid-pod + kubectl delete "${kube_flags[@]}" pod --namespace=other valid-pod --grace-period=0 # Post-condition: no POD is running kube::test::get_object_assert 'pods --namespace=other' "{{range.items}}{{$id_field}}:{{end}}" '' diff --git a/pkg/api/deep_copy_generated.go b/pkg/api/deep_copy_generated.go index 2a88c4cda3..f4acae42ea 100644 --- a/pkg/api/deep_copy_generated.go +++ b/pkg/api/deep_copy_generated.go @@ -1011,6 +1011,12 @@ func deepCopy_api_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Clone } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/rest/create.go b/pkg/api/rest/create.go index 3d51ded7e7..e653a6b74a 100644 --- a/pkg/api/rest/create.go +++ b/pkg/api/rest/create.go @@ -59,6 +59,8 @@ func BeforeCreate(strategy RESTCreateStrategy, ctx api.Context, obj runtime.Obje } else { objectMeta.Namespace = api.NamespaceNone } + objectMeta.DeletionTimestamp = nil + objectMeta.DeletionGracePeriodSeconds = nil strategy.PrepareForCreate(obj) api.FillObjectMetaSystemFields(ctx, objectMeta) api.GenerateName(strategy, objectMeta) diff --git a/pkg/api/rest/delete.go b/pkg/api/rest/delete.go index 9d433f80fe..d1b04e6dde 100644 --- a/pkg/api/rest/delete.go +++ b/pkg/api/rest/delete.go @@ -40,12 +40,37 @@ func BeforeDelete(strategy RESTDeleteStrategy, ctx api.Context, obj runtime.Obje if strategy == nil { return false, false, nil } - _, _, kerr := objectMetaAndKind(strategy, obj) + objectMeta, _, kerr := objectMetaAndKind(strategy, obj) if kerr != nil { return false, false, kerr } + + // if the object is already being deleted + if objectMeta.DeletionTimestamp != nil { + // if we are already being deleted, we may only shorten the deletion grace period + // this means the object was gracefully deleted previously but deletionGracePeriodSeconds was not set, + // so we force deletion immediately + if objectMeta.DeletionGracePeriodSeconds == nil { + return false, false, nil + } + // only a shorter grace period may be provided by a user + if options.GracePeriodSeconds != nil { + period := int64(*options.GracePeriodSeconds) + if period > *objectMeta.DeletionGracePeriodSeconds { + return false, true, nil + } + objectMeta.DeletionGracePeriodSeconds = &period + options.GracePeriodSeconds = &period + return true, false, nil + } + // graceful deletion is pending, do nothing + options.GracePeriodSeconds = objectMeta.DeletionGracePeriodSeconds + return false, true, nil + } + if !strategy.CheckGracefulDelete(obj, options) { return false, false, nil } + objectMeta.DeletionGracePeriodSeconds = options.GracePeriodSeconds return true, false, nil } diff --git a/pkg/api/rest/resttest/resttest.go b/pkg/api/rest/resttest/resttest.go index f8c1716ef6..5576692c09 100644 --- a/pkg/api/rest/resttest/resttest.go +++ b/pkg/api/rest/resttest/resttest.go @@ -298,6 +298,22 @@ func (t *Tester) testUpdateFailsOnVersion(older runtime.Object) { // ============================================================================= // Deletion tests. +func (t *Tester) TestDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) { + existing := createFn() + objectMeta := t.getObjectMetaOrFail(existing) + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(10)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { + t.Errorf("unexpected error, object should not exist: %v", err) + } + if wasGracefulFn() { + t.Errorf("resource should not support graceful delete") + } +} + func (t *Tester) testDeleteInvokesValidation(invalid ...runtime.Object) { for i, obj := range invalid { objectMeta := t.getObjectMetaOrFail(obj) @@ -322,25 +338,16 @@ func (t *Tester) testDeleteNonExist(createFn func() runtime.Object) { }) } -func (t *Tester) testDeleteNoGraceful(createFn func() runtime.Object, wasGracefulFn func() bool) { - existing := createFn() - objectMeta := t.getObjectMetaOrFail(existing) - ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) - _, err := t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(10)) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { - t.Errorf("unexpected error, object should not exist: %v", err) - } - if wasGracefulFn() { - t.Errorf("resource should not support graceful delete") - } -} - // ============================================================================= // Graceful Deletion tests. +func (t *Tester) TestDeleteGraceful(createFn func() runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + t.TestDeleteGracefulHasDefault(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulWithValue(createFn(), expectedGrace, wasGracefulFn) + t.TestDeleteGracefulUsesZeroOnNil(createFn(), 0) + t.TestDeleteGracefulExtend(createFn(), expectedGrace, wasGracefulFn) +} + func (t *Tester) testDeleteGracefulHasDefault(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { objectMeta := t.getObjectMetaOrFail(existing) ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) @@ -348,12 +355,99 @@ func (t *Tester) testDeleteGracefulHasDefault(existing runtime.Object, expectedG if err != nil { t.Errorf("unexpected error: %v", err) } - if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); err != nil { + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + return + } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { t.Errorf("unexpected error, object should exist: %v", err) + return + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } +} + +func (t *Tester) TestDeleteGracefulWithValue(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2)) + if err != nil { + t.Errorf("unexpected error: %v", err) } if !wasGracefulFn() { t.Errorf("did not gracefully delete resource") } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { + t.Errorf("unexpected error, object should exist: %v", err) + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace+2 { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } +} + +func (t *Tester) TestDeleteGracefulExtend(existing runtime.Object, expectedGrace int64, wasGracefulFn func() bool) { + objectMeta, err := api.ObjectMetaFor(existing) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, existing) + } + + ctx := api.WithNamespace(t.TestContext(), objectMeta.Namespace) + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if !wasGracefulFn() { + t.Errorf("did not gracefully delete resource") + } + // second delete duration is ignored + _, err = t.storage.(rest.GracefulDeleter).Delete(ctx, objectMeta.Name, api.NewDeleteOptions(expectedGrace+2)) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + object, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name) + if err != nil { + t.Errorf("unexpected error, object should exist: %v", err) + } + objectMeta, err = api.ObjectMetaFor(object) + if err != nil { + t.Fatalf("object does not have ObjectMeta: %v\n%#v", err, object) + } + if objectMeta.DeletionTimestamp == nil { + t.Errorf("did not set deletion timestamp") + } + if objectMeta.DeletionGracePeriodSeconds == nil { + t.Fatalf("did not set deletion grace period seconds") + } + if *objectMeta.DeletionGracePeriodSeconds != expectedGrace { + t.Errorf("actual grace period does not match expected: %d", *objectMeta.DeletionGracePeriodSeconds) + } } func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expectedGrace int64) { @@ -364,7 +458,7 @@ func (t *Tester) testDeleteGracefulUsesZeroOnNil(existing runtime.Object, expect t.Errorf("unexpected error: %v", err) } if _, err := t.storage.(rest.Getter).Get(ctx, objectMeta.Name); !errors.IsNotFound(err) { - t.Errorf("unexpected error, object should exist: %v", err) + t.Errorf("unexpected error, object should not exist: %v", err) } } diff --git a/pkg/api/serialization_test.go b/pkg/api/serialization_test.go index ba3b0b86bc..cc19b4d1d1 100644 --- a/pkg/api/serialization_test.go +++ b/pkg/api/serialization_test.go @@ -151,6 +151,7 @@ func TestRoundTripTypes(t *testing.T) { } func TestEncode_Ptr(t *testing.T) { + grace := int64(30) pod := &api.Pod{ ObjectMeta: api.ObjectMeta{ Labels: map[string]string{"name": "foo"}, @@ -158,6 +159,8 @@ func TestEncode_Ptr(t *testing.T) { Spec: api.PodSpec{ RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, }, } obj := runtime.Object(pod) diff --git a/pkg/api/testing/fuzzer.go b/pkg/api/testing/fuzzer.go index a88202f8e4..2ea5c8fc46 100644 --- a/pkg/api/testing/fuzzer.go +++ b/pkg/api/testing/fuzzer.go @@ -89,6 +89,15 @@ func FuzzerFor(t *testing.T, version string, src rand.Source) *fuzz.Fuzzer { j.LabelSelector, _ = labels.Parse("a=b") j.FieldSelector, _ = fields.ParseSelector("a=b") }, + func(j *api.PodSpec, c fuzz.Continue) { + c.FuzzNoCustom(j) + // has a default value + ttl := int64(30) + if c.RandBool() { + ttl = int64(c.Uint32()) + } + j.TerminationGracePeriodSeconds = &ttl + }, func(j *api.PodPhase, c fuzz.Continue) { statuses := []api.PodPhase{api.PodPending, api.PodRunning, api.PodFailed, api.PodUnknown} *j = statuses[c.Rand.Intn(len(statuses))] diff --git a/pkg/api/types.go b/pkg/api/types.go index 1698cda387..028d61a467 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -143,6 +143,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty"` + // Labels are key value pairs that may be used to scope and select individual resources. // Label keys are of the form: // label-key ::= prefixed-name | name diff --git a/pkg/api/v1/conversion_generated.go b/pkg/api/v1/conversion_generated.go index c0736192e2..a1bdb14cd0 100644 --- a/pkg/api/v1/conversion_generated.go +++ b/pkg/api/v1/conversion_generated.go @@ -1176,6 +1176,12 @@ func convert_api_ObjectMeta_To_v1_ObjectMeta(in *api.ObjectMeta, out *ObjectMeta } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { @@ -3591,6 +3597,12 @@ func convert_v1_ObjectMeta_To_api_ObjectMeta(in *ObjectMeta, out *api.ObjectMeta } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1/deep_copy_generated.go b/pkg/api/v1/deep_copy_generated.go index 24a3acdd9c..cc285d8392 100644 --- a/pkg/api/v1/deep_copy_generated.go +++ b/pkg/api/v1/deep_copy_generated.go @@ -1010,6 +1010,12 @@ func deepCopy_v1_ObjectMeta(in ObjectMeta, out *ObjectMeta, c *conversion.Cloner } else { out.DeletionTimestamp = nil } + if in.DeletionGracePeriodSeconds != nil { + out.DeletionGracePeriodSeconds = new(int64) + *out.DeletionGracePeriodSeconds = *in.DeletionGracePeriodSeconds + } else { + out.DeletionGracePeriodSeconds = nil + } if in.Labels != nil { out.Labels = make(map[string]string) for key, val := range in.Labels { diff --git a/pkg/api/v1/defaults.go b/pkg/api/v1/defaults.go index 903d6f42b6..eee2a647c0 100644 --- a/pkg/api/v1/defaults.go +++ b/pkg/api/v1/defaults.go @@ -113,6 +113,10 @@ func addDefaultingFuncs() { if obj.HostNetwork { defaultHostNetworkPorts(&obj.Containers) } + if obj.TerminationGracePeriodSeconds == nil { + period := int64(DefaultTerminationGracePeriodSeconds) + obj.TerminationGracePeriodSeconds = &period + } }, func(obj *Probe) { if obj.TimeoutSeconds == 0 { diff --git a/pkg/api/v1/types.go b/pkg/api/v1/types.go index 097b7b8125..ea0749f584 100644 --- a/pkg/api/v1/types.go +++ b/pkg/api/v1/types.go @@ -141,6 +141,10 @@ type ObjectMeta struct { // will send a hard termination signal to the container. DeletionTimestamp *util.Time `json:"deletionTimestamp,omitempty" description:"RFC 3339 date and time at which the object will be deleted; populated by the system when a graceful deletion is requested, read-only; if not set, graceful deletion of the object has not been requested; see http://releases.k8s.io/HEAD/docs/devel/api-conventions.md#metadata"` + // DeletionGracePeriodSeconds records the graceful deletion value set when graceful deletion + // was requested. Represents the most recent grace period, and may only be shortened once set. + DeletionGracePeriodSeconds *int64 `json:"deletionGracePeriodSeconds,omitempty" description:"number of seconds allowed for this object to gracefully terminate before it will be removed from the system; only set when deletionTimestamp is also set, read-only; may only be shortened"` + // Labels are key value pairs that may be used to scope and select individual resources. // TODO: replace map[string]string with labels.LabelSet type Labels map[string]string `json:"labels,omitempty" description:"map of string keys and values that can be used to organize and categorize objects; may match selectors of replication controllers and services; see http://releases.k8s.io/HEAD/docs/user-guide/labels.md"` @@ -858,6 +862,8 @@ const ( // DNSDefault indicates that the pod should use the default (as // determined by kubelet) DNS settings. DNSDefault DNSPolicy = "Default" + + DefaultTerminationGracePeriodSeconds = 30 ) // PodSpec is a description of a pod @@ -872,7 +878,7 @@ type PodSpec struct { // The grace period is the duration in seconds after the processes running in the pod are sent // a termination signal and the time when the processes are forcibly halted with a kill signal. // Set this value longer than the expected cleanup time for your process. - TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process"` + TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty" description:"optional duration in seconds the pod needs to terminate gracefully; may be decreased in delete request; value must be non-negative integer; the value zero indicates delete immediately; if this value is not set, the default grace period will be used instead; the grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal; set this value longer than the expected cleanup time for your process; defaults to 30 seconds"` ActiveDeadlineSeconds *int64 `json:"activeDeadlineSeconds,omitempty" description:"optional duration in seconds the pod may be active on the node relative to StartTime before the system will actively try to mark it failed and kill associated containers; value must be a positive integer"` // Optional: Set DNS policy. Defaults to "ClusterFirst" DNSPolicy DNSPolicy `json:"dnsPolicy,omitempty" description:"DNS policy for containers within the pod; one of 'ClusterFirst' or 'Default'"` diff --git a/pkg/api/validation/validation.go b/pkg/api/validation/validation.go index 9d42e5b142..ed9578ebe8 100644 --- a/pkg/api/validation/validation.go +++ b/pkg/api/validation/validation.go @@ -265,6 +265,16 @@ func ValidateObjectMetaUpdate(new, old *api.ObjectMeta) errs.ValidationErrorList } else { new.CreationTimestamp = old.CreationTimestamp } + // an object can never remove a deletion timestamp or clear/change grace period seconds + if !old.DeletionTimestamp.IsZero() { + new.DeletionTimestamp = old.DeletionTimestamp + } + if old.DeletionGracePeriodSeconds != nil && new.DeletionGracePeriodSeconds == nil { + new.DeletionGracePeriodSeconds = old.DeletionGracePeriodSeconds + } + if new.DeletionGracePeriodSeconds != nil && *new.DeletionGracePeriodSeconds != *old.DeletionGracePeriodSeconds { + allErrs = append(allErrs, errs.NewFieldInvalid("deletionGracePeriodSeconds", new.DeletionGracePeriodSeconds, "field is immutable; may only be changed via deletion")) + } // Reject updates that don't specify a resource version if new.ResourceVersion == "" { diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 2438c8148a..77419e155e 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -322,7 +322,8 @@ func FilterActivePods(pods []api.Pod) []*api.Pod { var result []*api.Pod for i := range pods { if api.PodSucceeded != pods[i].Status.Phase && - api.PodFailed != pods[i].Status.Phase { + api.PodFailed != pods[i].Status.Phase && + pods[i].DeletionTimestamp == nil { result = append(result, &pods[i]) } } diff --git a/pkg/controller/endpoint/endpoints_controller.go b/pkg/controller/endpoint/endpoints_controller.go index d4962cf1df..88345f9dea 100644 --- a/pkg/controller/endpoint/endpoints_controller.go +++ b/pkg/controller/endpoint/endpoints_controller.go @@ -310,7 +310,11 @@ func (e *EndpointController) syncService(key string) { continue } if len(pod.Status.PodIP) == 0 { - glog.V(4).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name) + glog.V(5).Infof("Failed to find an IP for pod %s/%s", pod.Namespace, pod.Name) + continue + } + if pod.DeletionTimestamp != nil { + glog.V(5).Infof("Pod is being deleted %s/%s", pod.Namespace, pod.Name) continue } diff --git a/pkg/controller/replication/replication_controller.go b/pkg/controller/replication/replication_controller.go index b212647c45..726a8f735a 100644 --- a/pkg/controller/replication/replication_controller.go +++ b/pkg/controller/replication/replication_controller.go @@ -213,6 +213,12 @@ func (rm *ReplicationManager) getPodController(pod *api.Pod) *api.ReplicationCon // When a pod is created, enqueue the controller that manages it and update it's expectations. func (rm *ReplicationManager) addPod(obj interface{}) { pod := obj.(*api.Pod) + if pod.DeletionTimestamp != nil { + // on a restart of the controller manager, it's possible a new pod shows up in a state that + // is already pending deletion. Prevent the pod from being a creation observation. + rm.deletePod(pod) + return + } if rc := rm.getPodController(pod); rc != nil { rcKey, err := controller.KeyFunc(rc) if err != nil { @@ -234,6 +240,15 @@ func (rm *ReplicationManager) updatePod(old, cur interface{}) { } // TODO: Write a unittest for this case curPod := cur.(*api.Pod) + if curPod.DeletionTimestamp != nil { + // when a pod is deleted gracefully it's deletion timestamp is first modified to reflect a grace period, + // and after such time has passed, the kubelet actually deletes it from the store. We receive an update + // for modification of the deletion timestamp and expect an rc to create more replicas asap, not wait + // until the kubelet actually deletes the pod. This is different from the Phase of a pod changing, because + // an rc never initiates a phase change, and so is never asleep waiting for the same. + rm.deletePod(curPod) + return + } if rc := rm.getPodController(curPod); rc != nil { rm.enqueueController(rc) } diff --git a/pkg/kubectl/cmd/get_test.go b/pkg/kubectl/cmd/get_test.go index bb49d4c476..7d7267d5a4 100644 --- a/pkg/kubectl/cmd/get_test.go +++ b/pkg/kubectl/cmd/get_test.go @@ -38,6 +38,7 @@ import ( ) func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) { + grace := int64(30) pods := &api.PodList{ ListMeta: api.ListMeta{ ResourceVersion: "15", @@ -46,15 +47,17 @@ func testData() (*api.PodList, *api.ServiceList, *api.ReplicationControllerList) { ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -536,6 +539,7 @@ func TestGetMultipleTypeObjectsWithDirectReference(t *testing.T) { } } func watchTestData() ([]api.Pod, []watch.Event) { + grace := int64(30) pods := []api.Pod{ { ObjectMeta: api.ObjectMeta{ @@ -544,8 +548,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "10", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, } @@ -559,8 +564,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "11", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -573,8 +579,9 @@ func watchTestData() ([]api.Pod, []watch.Event) { ResourceVersion: "12", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/cmd/util/helpers_test.go b/pkg/kubectl/cmd/util/helpers_test.go index c870531160..0a9cd7459a 100644 --- a/pkg/kubectl/cmd/util/helpers_test.go +++ b/pkg/kubectl/cmd/util/helpers_test.go @@ -34,6 +34,7 @@ import ( ) func TestMerge(t *testing.T) { + grace := int64(30) tests := []struct { obj runtime.Object fragment string @@ -54,8 +55,9 @@ func TestMerge(t *testing.T) { Name: "foo", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -122,8 +124,9 @@ func TestMerge(t *testing.T) { VolumeSource: api.VolumeSource{EmptyDir: &api.EmptyDirVolumeSource{}}, }, }, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/describe.go b/pkg/kubectl/describe.go index bb1ec67d12..107abc65f3 100644 --- a/pkg/kubectl/describe.go +++ b/pkg/kubectl/describe.go @@ -417,7 +417,12 @@ func describePod(pod *api.Pod, rcs []api.ReplicationController, events *api.Even fmt.Fprintf(out, "Image(s):\t%s\n", makeImageList(&pod.Spec)) fmt.Fprintf(out, "Node:\t%s\n", pod.Spec.NodeName+"/"+pod.Status.HostIP) fmt.Fprintf(out, "Labels:\t%s\n", formatLabels(pod.Labels)) - fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase)) + if pod.DeletionTimestamp != nil { + fmt.Fprintf(out, "Status:\tTerminating (expires %s)\n", pod.DeletionTimestamp.Time.Format(time.RFC1123Z)) + fmt.Fprintf(out, "Termination Grace Period:\t%ss\n", pod.DeletionGracePeriodSeconds) + } else { + fmt.Fprintf(out, "Status:\t%s\n", string(pod.Status.Phase)) + } fmt.Fprintf(out, "Reason:\t%s\n", pod.Status.Reason) fmt.Fprintf(out, "Message:\t%s\n", pod.Status.Message) fmt.Fprintf(out, "IP:\t%s\n", pod.Status.PodIP) diff --git a/pkg/kubectl/resource/builder_test.go b/pkg/kubectl/resource/builder_test.go index ebe480c3f3..7b96f74656 100644 --- a/pkg/kubectl/resource/builder_test.go +++ b/pkg/kubectl/resource/builder_test.go @@ -83,6 +83,7 @@ func fakeClientWith(testName string, t *testing.T, data map[string]string) Clien } func testData() (*api.PodList, *api.ServiceList) { + grace := int64(30) pods := &api.PodList{ ListMeta: api.ListMeta{ ResourceVersion: "15", @@ -91,15 +92,17 @@ func testData() (*api.PodList, *api.ServiceList) { { ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "test", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", Namespace: "test", ResourceVersion: "11"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubectl/resource/helper_test.go b/pkg/kubectl/resource/helper_test.go index d40da3d271..51f81a7925 100644 --- a/pkg/kubectl/resource/helper_test.go +++ b/pkg/kubectl/resource/helper_test.go @@ -128,6 +128,7 @@ func TestHelperCreate(t *testing.T) { return true } + grace := int64(30) tests := []struct { Resp *http.Response RespFunc client.HTTPClientFunc @@ -172,8 +173,9 @@ func TestHelperCreate(t *testing.T) { ExpectObject: &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, Resp: &http.Response{StatusCode: http.StatusOK, Body: objBody(&api.Status{Status: api.StatusSuccess})}, @@ -381,6 +383,7 @@ func TestHelperReplace(t *testing.T) { return true } + grace := int64(30) tests := []struct { Resp *http.Response RespFunc client.HTTPClientFunc @@ -418,8 +421,9 @@ func TestHelperReplace(t *testing.T) { ExpectObject: &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "10"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, Overwrite: true, diff --git a/pkg/kubectl/resource_printer.go b/pkg/kubectl/resource_printer.go index 0db84a8c33..09ae64128c 100644 --- a/pkg/kubectl/resource_printer.go +++ b/pkg/kubectl/resource_printer.go @@ -410,6 +410,9 @@ func printPod(pod *api.Pod, w io.Writer, withNamespace bool, wide bool, columnLa readyContainers++ } } + if pod.DeletionTimestamp != nil { + reason = "Terminating" + } if withNamespace { if _, err := fmt.Fprintf(w, "%s\t", namespace); err != nil { diff --git a/pkg/kubectl/rolling_updater_test.go b/pkg/kubectl/rolling_updater_test.go index 827b40777e..67b50955d8 100644 --- a/pkg/kubectl/rolling_updater_test.go +++ b/pkg/kubectl/rolling_updater_test.go @@ -880,6 +880,7 @@ func TestUpdateExistingReplicationController(t *testing.T) { func TestUpdateWithRetries(t *testing.T) { codec := testapi.Codec() + grace := int64(30) rc := &api.ReplicationController{ ObjectMeta: api.ObjectMeta{Name: "rc", Labels: map[string]string{ @@ -897,8 +898,9 @@ func TestUpdateWithRetries(t *testing.T) { }, }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, diff --git a/pkg/kubelet/config/common_test.go b/pkg/kubelet/config/common_test.go index 9ec02de37c..fcf16781ba 100644 --- a/pkg/kubelet/config/common_test.go +++ b/pkg/kubelet/config/common_test.go @@ -31,6 +31,7 @@ import ( func noDefault(*api.Pod) error { return nil } func TestDecodeSinglePod(t *testing.T) { + grace := int64(30) pod := &api.Pod{ TypeMeta: api.TypeMeta{ APIVersion: "", @@ -41,8 +42,9 @@ func TestDecodeSinglePod(t *testing.T) { Namespace: "mynamespace", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", @@ -91,6 +93,7 @@ func TestDecodeSinglePod(t *testing.T) { } func TestDecodePodList(t *testing.T) { + grace := int64(30) pod := &api.Pod{ TypeMeta: api.TypeMeta{ APIVersion: "", @@ -101,8 +104,9 @@ func TestDecodePodList(t *testing.T) { Namespace: "mynamespace", }, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", diff --git a/pkg/kubelet/config/config.go b/pkg/kubelet/config/config.go index cbb8ef760c..f8f16fa65e 100644 --- a/pkg/kubelet/config/config.go +++ b/pkg/kubelet/config/config.go @@ -217,9 +217,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de for _, ref := range filtered { name := kubecontainer.GetPodFullName(ref) if existing, found := pods[name]; found { - if !reflect.DeepEqual(existing.Spec, ref.Spec) { + if checkAndUpdatePod(existing, ref) { // this is an update - existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } @@ -261,9 +260,8 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de name := kubecontainer.GetPodFullName(ref) if existing, found := oldPods[name]; found { pods[name] = existing - if !reflect.DeepEqual(existing.Spec, ref.Spec) { + if checkAndUpdatePod(existing, ref) { // this is an update - existing.Spec = ref.Spec updates.Pods = append(updates.Pods, existing) continue } @@ -335,6 +333,23 @@ func filterInvalidPods(pods []*api.Pod, source string, recorder record.EventReco return } +// checkAndUpdatePod updates existing if ref makes a meaningful change and returns true, or +// returns false if there was no update. +func checkAndUpdatePod(existing, ref *api.Pod) bool { + // TODO: it would be better to update the whole object and only preserve certain things + // like the source annotation or the UID (to ensure safety) + if reflect.DeepEqual(existing.Spec, ref.Spec) && + reflect.DeepEqual(existing.DeletionTimestamp, ref.DeletionTimestamp) && + reflect.DeepEqual(existing.DeletionGracePeriodSeconds, ref.DeletionGracePeriodSeconds) { + return false + } + // this is an update + existing.Spec = ref.Spec + existing.DeletionTimestamp = ref.DeletionTimestamp + existing.DeletionGracePeriodSeconds = ref.DeletionGracePeriodSeconds + return true +} + // Sync sends a copy of the current state through the update channel. func (s *podStorage) Sync() { s.updateLock.Lock() diff --git a/pkg/kubelet/config/file_test.go b/pkg/kubelet/config/file_test.go index 440761e326..66ca28ba97 100644 --- a/pkg/kubelet/config/file_test.go +++ b/pkg/kubelet/config/file_test.go @@ -69,6 +69,7 @@ func writeTestFile(t *testing.T, dir, name string, contents string) *os.File { func TestReadPodsFromFile(t *testing.T) { hostname := "random-test-hostname" + grace := int64(30) var testCases = []struct { desc string pod runtime.Object @@ -98,9 +99,10 @@ func TestReadPodsFromFile(t *testing.T) { SelfLink: getSelfLink("test-"+hostname, "mynamespace"), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{{ Name: "image", Image: "test/image", diff --git a/pkg/kubelet/config/http_test.go b/pkg/kubelet/config/http_test.go index 08fafe9288..4279609414 100644 --- a/pkg/kubelet/config/http_test.go +++ b/pkg/kubelet/config/http_test.go @@ -123,6 +123,7 @@ func TestExtractInvalidPods(t *testing.T) { func TestExtractPodsFromHTTP(t *testing.T) { hostname := "different-value" + grace := int64(30) var testCases = []struct { desc string pods runtime.Object @@ -156,9 +157,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("foo-"+hostname, "mynamespace"), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "1", Image: "foo", @@ -209,9 +212,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("foo-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "1", Image: "foo", @@ -229,9 +234,11 @@ func TestExtractPodsFromHTTP(t *testing.T) { SelfLink: getSelfLink("bar-"+hostname, kubelet.NamespaceDefault), }, Spec: api.PodSpec{ - NodeName: hostname, - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + NodeName: hostname, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, + Containers: []api.Container{{ Name: "2", Image: "bar", diff --git a/pkg/kubelet/container/fake_runtime.go b/pkg/kubelet/container/fake_runtime.go index 03d65e50a3..324d7ae6bf 100644 --- a/pkg/kubelet/container/fake_runtime.go +++ b/pkg/kubelet/container/fake_runtime.go @@ -163,13 +163,13 @@ func (f *FakeRuntime) SyncPod(pod *api.Pod, _ Pod, _ api.PodStatus, _ []api.Secr return f.Err } -func (f *FakeRuntime) KillPod(pod Pod) error { +func (f *FakeRuntime) KillPod(pod *api.Pod, runningPod Pod) error { f.Lock() defer f.Unlock() f.CalledFunctions = append(f.CalledFunctions, "KillPod") - f.KilledPods = append(f.KilledPods, string(pod.ID)) - for _, c := range pod.Containers { + f.KilledPods = append(f.KilledPods, string(runningPod.ID)) + for _, c := range runningPod.Containers { f.KilledContainers = append(f.KilledContainers, c.Name) } return f.Err diff --git a/pkg/kubelet/container/runtime.go b/pkg/kubelet/container/runtime.go index d50f021f6a..8d476f9b16 100644 --- a/pkg/kubelet/container/runtime.go +++ b/pkg/kubelet/container/runtime.go @@ -54,8 +54,8 @@ type Runtime interface { GetPods(all bool) ([]*Pod, error) // Syncs the running pod into the desired pod. SyncPod(pod *api.Pod, runningPod Pod, podStatus api.PodStatus, pullSecrets []api.Secret) error - // KillPod kills all the containers of a pod. - KillPod(pod Pod) error + // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be. + KillPod(pod *api.Pod, runningPod Pod) error // GetPodStatus retrieves the status of the pod, including the information of // all containers in the pod. Clients of this interface assume the containers // statuses in a pod always have a deterministic ordering (eg: sorted by name). diff --git a/pkg/kubelet/dockertools/manager.go b/pkg/kubelet/dockertools/manager.go index b845191a82..3de60eb192 100644 --- a/pkg/kubelet/dockertools/manager.go +++ b/pkg/kubelet/dockertools/manager.go @@ -56,12 +56,21 @@ import ( const ( maxReasonCacheEntries = 200 - kubernetesPodLabel = "io.kubernetes.pod.data" - kubernetesContainerLabel = "io.kubernetes.container.name" // ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified) // we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative. // hence, setting ndots to be 5. ndotsDNSOption = "options ndots:5\n" + // In order to avoid unnecessary SIGKILLs, give every container a minimum grace + // period after SIGTERM. Docker will guarantee the termination, but SIGTERM is + // potentially dangerous. + // TODO: evaluate whether there are scenarios in which SIGKILL is preferable to + // SIGTERM for certain process types, which may justify setting this to 0. + minimumGracePeriodInSeconds = 2 + + kubernetesNameLabel = "io.kubernetes.pod.name" + kubernetesPodLabel = "io.kubernetes.pod.data" + kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod" + kubernetesContainerLabel = "io.kubernetes.container.name" ) // DockerManager implements the Runtime interface. @@ -588,12 +597,19 @@ func (dm *DockerManager) runContainer( if len(containerHostname) > hostnameMaxLen { containerHostname = containerHostname[:hostnameMaxLen] } + + // Pod information is recorded on the container as labels to preserve it in the event the pod is deleted + // while the Kubelet is down and there is no information available to recover the pod. This includes + // termination information like the termination grace period and the pre stop hooks. + // TODO: keep these labels up to date if the pod changes namespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} labels := map[string]string{ - "io.kubernetes.pod.name": namespacedName.String(), + kubernetesNameLabel: namespacedName.String(), + } + if pod.Spec.TerminationGracePeriodSeconds != nil { + labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10) } if container.Lifecycle != nil && container.Lifecycle.PreStop != nil { - glog.V(1).Infof("Setting preStop hook") // TODO: This is kind of hacky, we should really just encode the bits we need. data, err := latest.Codec.Encode(pod) if err != nil { @@ -1104,40 +1120,56 @@ func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream } // Kills all containers in the specified pod -func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { +func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error { // Send the kills in parallel since they may take a long time. Len + 1 since there // can be Len errors + the networkPlugin teardown error. - errs := make(chan error, len(pod.Containers)+1) + errs := make(chan error, len(runningPod.Containers)+1) wg := sync.WaitGroup{} - var networkID types.UID - for _, container := range pod.Containers { + var ( + networkContainer *kubecontainer.Container + networkSpec *api.Container + ) + for _, container := range runningPod.Containers { wg.Add(1) go func(container *kubecontainer.Container) { defer util.HandleCrash() defer wg.Done() + var containerSpec *api.Container + if pod != nil { + for i, c := range pod.Spec.Containers { + if c.Name == container.Name { + containerSpec = &pod.Spec.Containers[i] + break + } + } + } + // TODO: Handle this without signaling the pod infra container to // adapt to the generic container runtime. if container.Name == PodInfraContainerName { // Store the container runtime for later deletion. // We do this so that PreStop handlers can run in the network namespace. - networkID = container.ID + networkContainer = container + networkSpec = containerSpec return } - if err := dm.killContainer(container.ID); err != nil { - glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID) + + err := dm.killContainer(container.ID, containerSpec, pod) + if err != nil { + glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID) errs <- err } }(container) } wg.Wait() - if len(networkID) > 0 { - if err := dm.networkPlugin.TearDownPod(pod.Namespace, pod.Name, kubeletTypes.DockerID(networkID)); err != nil { + if networkContainer != nil { + if err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubeletTypes.DockerID(networkContainer.ID)); err != nil { glog.Errorf("Failed tearing down the infra container: %v", err) errs <- err } - if err := dm.killContainer(networkID); err != nil { - glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, pod.ID) + if err := dm.killContainer(networkContainer.ID, networkSpec, pod); err != nil { + glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID) errs <- err } } @@ -1152,75 +1184,128 @@ func (dm *DockerManager) KillPod(pod kubecontainer.Pod) error { return nil } -// KillContainerInPod kills a container in the pod. -func (dm *DockerManager) KillContainerInPod(container api.Container, pod *api.Pod) error { - // Locate the container. - pods, err := dm.GetPods(false) - if err != nil { - return err - } - targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) - targetContainer := targetPod.FindContainerByName(container.Name) - if targetContainer == nil { - return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) - } - return dm.killContainer(targetContainer.ID) -} - -// TODO(vmarmol): Unexport this as it is no longer used externally. -// KillContainer kills a container identified by containerID. -// Internally, it invokes docker's StopContainer API with a timeout of 10s. -// TODO: Deprecate this function in favor of KillContainerInPod. -func (dm *DockerManager) KillContainer(containerID types.UID) error { - return dm.killContainer(containerID) -} - -func (dm *DockerManager) killContainer(containerID types.UID) error { - ID := string(containerID) - glog.V(2).Infof("Killing container with id %q", ID) - inspect, err := dm.client.InspectContainer(ID) - if err != nil { - return err - } - var found bool - var preStop string - if inspect != nil && inspect.Config != nil && inspect.Config.Labels != nil { - preStop, found = inspect.Config.Labels[kubernetesPodLabel] - } - if found { - var pod api.Pod - err := latest.Codec.DecodeInto([]byte(preStop), &pod) +// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod, +// and will attempt to lookup the other information if missing. +func (dm *DockerManager) KillContainerInPod(containerID types.UID, container *api.Container, pod *api.Pod) error { + switch { + case len(containerID) == 0: + // Locate the container. + pods, err := dm.GetPods(false) if err != nil { - glog.Errorf("Failed to decode prestop: %s, %s", preStop, ID) - } else { - name := inspect.Config.Labels[kubernetesContainerLabel] - var container *api.Container + return err + } + targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID) + targetContainer := targetPod.FindContainerByName(container.Name) + if targetContainer == nil { + return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name) + } + containerID = targetContainer.ID + + case container == nil || pod == nil: + // Read information about the container from labels + inspect, err := dm.client.InspectContainer(string(containerID)) + if err != nil { + return err + } + storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect) + if cerr != nil { + glog.Errorf("unable to access pod data from container: %v", err) + } + if container == nil { + container = storedContainer + } + if pod == nil { + pod = storedPod + } + } + return dm.killContainer(containerID, container, pod) +} + +// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke +// KillContainerInPod if information must be retrieved first. +func (dm *DockerManager) killContainer(containerID types.UID, container *api.Container, pod *api.Pod) error { + ID := string(containerID) + name := ID + if container != nil { + name = fmt.Sprintf("%s %s", name, container.Name) + } + if pod != nil { + name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name) + } + + gracePeriod := int64(minimumGracePeriodInSeconds) + if pod != nil && pod.DeletionGracePeriodSeconds != nil { + gracePeriod = *pod.DeletionGracePeriodSeconds + } + glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod) + + if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil { + glog.V(4).Infof("Running preStop hook for container %q", name) + start := util.Now() + // TODO: timebox PreStop execution to at most gracePeriod + if err := dm.runner.Run(ID, pod, container, container.Lifecycle.PreStop); err != nil { + glog.Errorf("preStop hook for container %q failed: %v", name, err) + } + gracePeriod -= int64(util.Now().Sub(start.Time).Seconds()) + } + + dm.readinessManager.RemoveReadiness(ID) + + // always give containers a minimal shutdown window to avoid unnecessary SIGKILLs + if gracePeriod < minimumGracePeriodInSeconds { + gracePeriod = minimumGracePeriodInSeconds + } + err := dm.client.StopContainer(ID, uint(gracePeriod)) + ref, ok := dm.containerRefManager.GetRef(ID) + if !ok { + glog.Warningf("No ref for pod '%q'", name) + } else { + // TODO: pass reason down here, and state, or move this call up the stack. + dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12)) + } + return err +} + +var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container") + +// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels +func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) { + if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil { + return nil, nil, errNoPodOnContainer + } + labels := inspect.Config.Labels + + // the pod data may not be set + if body, found := labels[kubernetesPodLabel]; found { + pod = &api.Pod{} + if err = latest.Codec.DecodeInto([]byte(body), pod); err == nil { + name := labels[kubernetesContainerLabel] for ix := range pod.Spec.Containers { if pod.Spec.Containers[ix].Name == name { container = &pod.Spec.Containers[ix] break } } - if container != nil { - glog.V(1).Infof("Running preStop hook") - if err := dm.runner.Run(ID, &pod, container, container.Lifecycle.PreStop); err != nil { - glog.Errorf("failed to run preStop hook: %v", err) - } - } else { - glog.Errorf("unable to find container %v, %s", pod, name) + if container == nil { + err = fmt.Errorf("unable to find container %s in pod %v", name, pod) + } + } else { + pod = nil + } + } + + // attempt to find the default grace period if we didn't commit a pod, but set the generic metadata + // field (the one used by kill) + if pod == nil { + if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok { + if seconds, err := strconv.ParseInt(period, 10, 64); err == nil { + pod = &api.Pod{} + pod.DeletionGracePeriodSeconds = &seconds } } } - dm.readinessManager.RemoveReadiness(ID) - err = dm.client.StopContainer(ID, 10) - ref, ok := dm.containerRefManager.GetRef(ID) - if !ok { - glog.Warningf("No ref for pod '%v'", ID) - } else { - // TODO: pass reason down here, and state, or move this call up the stack. - dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12)) - } - return err + + return } // Run a single container from a pod. Returns the docker container ID @@ -1253,7 +1338,7 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe if container.Lifecycle != nil && container.Lifecycle.PostStart != nil { handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart) if handlerErr != nil { - dm.killContainer(types.UID(id)) + dm.killContainer(types.UID(id), container, pod) return kubeletTypes.DockerID(""), fmt.Errorf("failed to call event handler: %v", handlerErr) } } @@ -1413,6 +1498,11 @@ func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kub containersToKeep := make(map[kubeletTypes.DockerID]int) createPodInfraContainer := false + if pod.DeletionTimestamp != nil { + glog.V(4).Infof("Pod is terminating %q", podFullName) + return PodContainerChangesSpec{}, nil + } + var err error var podInfraContainerID kubeletTypes.DockerID var changed bool @@ -1547,7 +1637,7 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod } // Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container) - err = dm.KillPod(runningPod) + err = dm.KillPod(pod, runningPod) if err != nil { return err } @@ -1557,7 +1647,15 @@ func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, pod _, keep := containerChanges.ContainersToKeep[kubeletTypes.DockerID(container.ID)] if !keep { glog.V(3).Infof("Killing unwanted container %+v", container) - err = dm.KillContainer(container.ID) + // attempt to find the appropriate container policy + var podContainer *api.Container + for i, c := range pod.Spec.Containers { + if c.Name == container.Name { + podContainer = &pod.Spec.Containers[i] + break + } + } + err = dm.KillContainerInPod(container.ID, podContainer, pod) if err != nil { glog.Errorf("Error killing container: %v", err) } diff --git a/pkg/kubelet/dockertools/manager_test.go b/pkg/kubelet/dockertools/manager_test.go index 137988c0ae..0da1ca938b 100644 --- a/pkg/kubelet/dockertools/manager_test.go +++ b/pkg/kubelet/dockertools/manager_test.go @@ -405,7 +405,7 @@ func TestKillContainerInPod(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil { t.Errorf("unexpected error: %v", err) } // Assert the container has been stopped. @@ -478,14 +478,14 @@ func TestKillContainerInPodWithPreStop(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err != nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err != nil { t.Errorf("unexpected error: %v", err) } // Assert the container has been stopped. if err := fakeDocker.AssertStopped([]string{containerToKill.ID}); err != nil { t.Errorf("container was not stopped correctly: %v", err) } - verifyCalls(t, fakeDocker, []string{"list", "inspect_container", "create_exec", "start_exec", "stop"}) + verifyCalls(t, fakeDocker, []string{"list", "create_exec", "start_exec", "stop"}) if !reflect.DeepEqual(expectedCmd, fakeDocker.execCmd) { t.Errorf("expected: %v, got %v", expectedCmd, fakeDocker.execCmd) } @@ -522,7 +522,7 @@ func TestKillContainerInPodWithError(t *testing.T) { manager.readinessManager.SetReadiness(c.ID, true) } - if err := manager.KillContainerInPod(pod.Spec.Containers[0], pod); err == nil { + if err := manager.KillContainerInPod("", &pod.Spec.Containers[0], pod); err == nil { t.Errorf("expected error, found nil") } @@ -1021,7 +1021,7 @@ func TestSyncPodDeletesWithNoPodInfraContainer(t *testing.T) { verifyCalls(t, fakeDocker, []string{ // Kill the container since pod infra container is not running. - "inspect_container", "stop", + "stop", // Create pod infra container. "create", "start", "inspect_container", // Create container. @@ -1096,7 +1096,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill the duplicated container. - "inspect_container", "stop", + "stop", }) // Expect one of the duplicates to be killed. if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") { @@ -1150,7 +1150,7 @@ func TestSyncPodBadHash(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill and restart the bad hash container. - "inspect_container", "stop", "create", "start", "inspect_container", + "stop", "create", "start", }) if err := fakeDocker.AssertStopped([]string{"1234"}); err != nil { @@ -1208,7 +1208,7 @@ func TestSyncPodsUnhealthy(t *testing.T) { // Check the pod infra container. "inspect_container", // Kill the unhealthy container. - "inspect_container", "stop", + "stop", // Restart the unhealthy container. "create", "start", "inspect_container", }) @@ -1443,7 +1443,7 @@ func TestSyncPodWithRestartPolicy(t *testing.T) { // Check the pod infra container. "inspect_container", // Stop the last pod infra container. - "inspect_container", "stop", + "stop", }, []string{}, []string{"9876"}, @@ -1910,7 +1910,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) { // Create the container. "create", "start", // Kill the container since event handler fails. - "inspect_container", "stop", + "stop", }) // TODO(yifan): Check the stopped container's name. diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 3fd7c83f45..5e5a094c24 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1120,8 +1120,8 @@ func parseResolvConf(reader io.Reader) (nameservers []string, searches []string, } // Kill all running containers in a pod (includes the pod infra container). -func (kl *Kubelet) killPod(pod kubecontainer.Pod) error { - return kl.containerRuntime.KillPod(pod) +func (kl *Kubelet) killPod(pod *api.Pod, runningPod kubecontainer.Pod) error { + return kl.containerRuntime.KillPod(pod, runningPod) } type empty struct{} @@ -1179,7 +1179,7 @@ func (kl *Kubelet) syncPod(pod *api.Pod, mirrorPod *api.Pod, runningPod kubecont // Kill pods we can't run. err := canRunPod(pod) if err != nil { - kl.killPod(runningPod) + kl.killPod(pod, runningPod) return err } @@ -1550,7 +1550,7 @@ func (kl *Kubelet) killUnwantedPods(desiredPods map[types.UID]empty, }() glog.V(1).Infof("Killing unwanted pod %q", pod.Name) // Stop the containers. - err = kl.killPod(*pod) + err = kl.killPod(nil, *pod) if err != nil { glog.Errorf("Failed killing the pod %q: %v", pod.Name, err) return diff --git a/pkg/kubelet/rkt/rkt.go b/pkg/kubelet/rkt/rkt.go index 92d718636b..bb9ef9147b 100644 --- a/pkg/kubelet/rkt/rkt.go +++ b/pkg/kubelet/rkt/rkt.go @@ -676,11 +676,11 @@ func (r *runtime) GetPods(all bool) ([]*kubecontainer.Pod, error) { } // KillPod invokes 'systemctl kill' to kill the unit that runs the pod. -func (r *runtime) KillPod(pod kubecontainer.Pod) error { - glog.V(4).Infof("Rkt is killing pod: name %q.", pod.Name) +func (r *runtime) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error { + glog.V(4).Infof("Rkt is killing pod: name %q.", runningPod.Name) // TODO(yifan): More graceful stop. Replace with StopUnit and wait for a timeout. - r.systemd.KillUnit(makePodServiceFileName(pod.ID), int32(syscall.SIGKILL)) + r.systemd.KillUnit(makePodServiceFileName(runningPod.ID), int32(syscall.SIGKILL)) return r.systemd.Reload() } @@ -887,7 +887,7 @@ func (r *runtime) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus if restartPod { // TODO(yifan): Handle network plugin. - if err := r.KillPod(runningPod); err != nil { + if err := r.KillPod(pod, runningPod); err != nil { return err } if err := r.RunPod(pod); err != nil { diff --git a/pkg/kubelet/status_manager.go b/pkg/kubelet/status_manager.go index 9f7f45e1b0..5e9c753f9f 100644 --- a/pkg/kubelet/status_manager.go +++ b/pkg/kubelet/status_manager.go @@ -24,6 +24,7 @@ import ( "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/errors" "k8s.io/kubernetes/pkg/client" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubeletTypes "k8s.io/kubernetes/pkg/kubelet/types" @@ -161,13 +162,24 @@ func (s *statusManager) syncBatch() error { } // TODO: make me easier to express from client code statusPod, err = s.kubeClient.Pods(statusPod.Namespace).Get(statusPod.Name) + if errors.IsNotFound(err) { + glog.V(3).Infof("Pod %q was deleted on the server", pod.Name) + return nil + } if err == nil { statusPod.Status = status - _, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) // TODO: handle conflict as a retry, make that easier too. + statusPod, err = s.kubeClient.Pods(pod.Namespace).UpdateStatus(statusPod) if err == nil { glog.V(3).Infof("Status for pod %q updated successfully", kubeletUtil.FormatPodName(pod)) - return nil + + if statusPod.DeletionTimestamp == nil || !allTerminated(statusPod.Status.ContainerStatuses) { + return nil + } + if err := s.kubeClient.Pods(statusPod.Namespace).Delete(statusPod.Name, api.NewDeleteOptions(0)); err == nil { + glog.V(3).Infof("Pod %q fully terminated and removed from etcd", statusPod.Name) + return nil + } } } @@ -181,3 +193,14 @@ func (s *statusManager) syncBatch() error { go s.DeletePodStatus(podFullName) return fmt.Errorf("error updating status for pod %q: %v", kubeletUtil.FormatPodName(pod), err) } + +// allTerminated returns true if every status is terminated, or the status list +// is empty. +func allTerminated(statuses []api.ContainerStatus) bool { + for _, status := range statuses { + if status.State.Terminated == nil { + return false + } + } + return true +} diff --git a/pkg/registry/controller/etcd/etcd_test.go b/pkg/registry/controller/etcd/etcd_test.go index c826400d35..fddea31d16 100644 --- a/pkg/registry/controller/etcd/etcd_test.go +++ b/pkg/registry/controller/etcd/etcd_test.go @@ -695,7 +695,7 @@ func TestDelete(t *testing.T) { // If the controller is still around after trying to delete either the delete // failed, or we're deleting it gracefully. if fakeClient.Data[key].R.Node != nil { - return true + return fakeClient.Data[key].R.Node.TTL != 0 } return false } diff --git a/pkg/registry/event/etcd/etcd_test.go b/pkg/registry/event/etcd/etcd_test.go index bf3504303d..6d7403368f 100644 --- a/pkg/registry/event/etcd/etcd_test.go +++ b/pkg/registry/event/etcd/etcd_test.go @@ -37,6 +37,7 @@ var testTTL uint64 = 60 func NewTestEventStorage(t *testing.T) (*tools.FakeEtcdClient, *REST) { f := tools.NewFakeEtcdClient(t) + f.HideExpires = true f.TestIndex = true s := etcdstorage.NewEtcdStorage(f, testapi.Codec(), etcdtest.PathPrefix()) diff --git a/pkg/registry/persistentvolume/etcd/etcd_test.go b/pkg/registry/persistentvolume/etcd/etcd_test.go index 07da7b165b..c7b31488d0 100644 --- a/pkg/registry/persistentvolume/etcd/etcd_test.go +++ b/pkg/registry/persistentvolume/etcd/etcd_test.go @@ -300,7 +300,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := storage.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) pvStart := validNewPersistentVolume("foo") - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvStart), 0) pvIn := &api.PersistentVolume{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go b/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go index e6fecd389e..c36d0e01e2 100644 --- a/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go +++ b/pkg/registry/persistentvolumeclaim/etcd/etcd_test.go @@ -298,7 +298,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := storage.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) pvcStart := validNewPersistentVolumeClaim("foo", api.NamespaceDefault) - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, pvcStart), 0) pvc := &api.PersistentVolumeClaim{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/registry/pod/etcd/etcd_test.go b/pkg/registry/pod/etcd/etcd_test.go index e05f3065e9..1bd879b201 100644 --- a/pkg/registry/pod/etcd/etcd_test.go +++ b/pkg/registry/pod/etcd/etcd_test.go @@ -56,6 +56,7 @@ func newStorage(t *testing.T) (*REST, *BindingREST, *StatusREST, *tools.FakeEtcd } func validNewPod() *api.Pod { + grace := int64(30) return &api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", @@ -64,6 +65,8 @@ func validNewPod() *api.Pod { Spec: api.PodSpec{ RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, Containers: []api.Container{ { Name: "foo", @@ -134,9 +137,9 @@ func TestDelete(t *testing.T) { if fakeEtcdClient.Data[key].R.Node == nil { return false } - return fakeEtcdClient.Data[key].R.Node.TTL == 30 + return fakeEtcdClient.Data[key].R.Node.TTL != 0 } - test.TestDelete(createFn, gracefulSetFn) + test.TestDeleteGraceful(createFn, 30, gracefulSetFn) } func expectPod(t *testing.T, out runtime.Object) (*api.Pod, bool) { @@ -1027,6 +1030,7 @@ func TestEtcdUpdateScheduled(t *testing.T) { }, }), 1) + grace := int64(30) podIn := api.Pod{ ObjectMeta: api.ObjectMeta{ Name: "foo", @@ -1048,6 +1052,8 @@ func TestEtcdUpdateScheduled(t *testing.T) { }, RestartPolicy: api.RestartPolicyAlways, DNSPolicy: api.DNSClusterFirst, + + TerminationGracePeriodSeconds: &grace, }, } _, _, err := registry.Update(ctx, &podIn) @@ -1088,7 +1094,7 @@ func TestEtcdUpdateStatus(t *testing.T) { }, }, } - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, &podStart), 0) podIn := api.Pod{ ObjectMeta: api.ObjectMeta{ @@ -1117,6 +1123,8 @@ func TestEtcdUpdateStatus(t *testing.T) { expected := podStart expected.ResourceVersion = "2" + grace := int64(30) + expected.Spec.TerminationGracePeriodSeconds = &grace expected.Spec.RestartPolicy = api.RestartPolicyAlways expected.Spec.DNSPolicy = api.DNSClusterFirst expected.Spec.Containers[0].ImagePullPolicy = api.PullIfNotPresent diff --git a/pkg/registry/pod/rest.go b/pkg/registry/pod/rest.go index e62d7ab27a..43135278d5 100644 --- a/pkg/registry/pod/rest.go +++ b/pkg/registry/pod/rest.go @@ -81,13 +81,31 @@ func (podStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fiel return append(errorList, validation.ValidatePodUpdate(obj.(*api.Pod), old.(*api.Pod))...) } +// AllowUnconditionalUpdate allows pods to be overwritten func (podStrategy) AllowUnconditionalUpdate() bool { return true } -// CheckGracefulDelete allows a pod to be gracefully deleted. +// CheckGracefulDelete allows a pod to be gracefully deleted. It updates the DeleteOptions to +// reflect the desired grace value. func (podStrategy) CheckGracefulDelete(obj runtime.Object, options *api.DeleteOptions) bool { - return false + if options == nil { + return false + } + period := int64(0) + // user has specified a value + if options.GracePeriodSeconds != nil { + period = *options.GracePeriodSeconds + } else { + // use the default value if set, or deletes the pod immediately (0) + pod := obj.(*api.Pod) + if pod.Spec.TerminationGracePeriodSeconds != nil { + period = *pod.Spec.TerminationGracePeriodSeconds + } + } + // ensure the options and the pod are in sync + options.GracePeriodSeconds = &period + return true } type podStatusStrategy struct { @@ -100,6 +118,7 @@ func (podStatusStrategy) PrepareForUpdate(obj, old runtime.Object) { newPod := obj.(*api.Pod) oldPod := old.(*api.Pod) newPod.Spec = oldPod.Spec + newPod.DeletionTimestamp = nil } func (podStatusStrategy) ValidateUpdate(ctx api.Context, obj, old runtime.Object) fielderrors.ValidationErrorList { diff --git a/pkg/registry/resourcequota/etcd/etcd_test.go b/pkg/registry/resourcequota/etcd/etcd_test.go index 05198ebdf9..d99ae1f091 100644 --- a/pkg/registry/resourcequota/etcd/etcd_test.go +++ b/pkg/registry/resourcequota/etcd/etcd_test.go @@ -389,7 +389,7 @@ func TestEtcdUpdateStatus(t *testing.T) { key, _ := registry.KeyFunc(ctx, "foo") key = etcdtest.AddPrefix(key) resourcequotaStart := validNewResourceQuota() - fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 1) + fakeClient.Set(key, runtime.EncodeOrDie(latest.Codec, resourcequotaStart), 0) resourcequotaIn := &api.ResourceQuota{ ObjectMeta: api.ObjectMeta{ diff --git a/pkg/storage/etcd/etcd_helper.go b/pkg/storage/etcd/etcd_helper.go index 09efa9819c..f4a0747235 100644 --- a/pkg/storage/etcd/etcd_helper.go +++ b/pkg/storage/etcd/etcd_helper.go @@ -394,14 +394,21 @@ func (h *etcdHelper) GuaranteedUpdate(key string, ptrToType runtime.Object, igno ttl := uint64(0) if node != nil { index = node.ModifiedIndex - if node.TTL > 0 { + if node.TTL != 0 { ttl = uint64(node.TTL) } + if node.Expiration != nil && ttl == 0 { + ttl = 1 + } } else if res != nil { index = res.EtcdIndex } if newTTL != nil { + if ttl != 0 && *newTTL == 0 { + // TODO: remove this after we have verified this is no longer an issue + glog.V(4).Infof("GuaranteedUpdate is clearing TTL for %q, may not be intentional", key) + } ttl = *newTTL } diff --git a/pkg/storage/etcd/etcd_helper_test.go b/pkg/storage/etcd/etcd_helper_test.go index 4cb6ef73d3..c84b66f808 100644 --- a/pkg/storage/etcd/etcd_helper_test.go +++ b/pkg/storage/etcd/etcd_helper_test.go @@ -123,28 +123,32 @@ func TestList(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -206,6 +210,7 @@ func TestListAcrossDirectories(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ @@ -213,22 +218,25 @@ func TestListAcrossDirectories(t *testing.T) { { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -278,28 +286,32 @@ func TestListExcludesDirectories(t *testing.T) { }, }, } + grace := int64(30) expect := api.PodList{ ListMeta: api.ListMeta{ResourceVersion: "10"}, Items: []api.Pod{ { ObjectMeta: api.ObjectMeta{Name: "bar", ResourceVersion: "2"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "baz", ResourceVersion: "3"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, { ObjectMeta: api.ObjectMeta{Name: "foo", ResourceVersion: "1"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, }, }, @@ -319,11 +331,13 @@ func TestGet(t *testing.T) { fakeClient := tools.NewFakeEtcdClient(t) helper := newEtcdHelper(fakeClient, testapi.Codec(), etcdtest.PathPrefix()) key := etcdtest.AddPrefix("/some/key") + grace := int64(30) expect := api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, } fakeClient.Set(key, runtime.EncodeOrDie(testapi.Codec(), &expect), 0) diff --git a/pkg/storage/etcd/etcd_watcher.go b/pkg/storage/etcd/etcd_watcher.go index 93fdd16829..4ab51cabbd 100644 --- a/pkg/storage/etcd/etcd_watcher.go +++ b/pkg/storage/etcd/etcd_watcher.go @@ -38,6 +38,7 @@ const ( EtcdSet = "set" EtcdCAS = "compareAndSwap" EtcdDelete = "delete" + EtcdExpire = "expire" ) // TransformFunc attempts to convert an object to another object for use with a watcher. @@ -353,7 +354,7 @@ func (w *etcdWatcher) sendResult(res *etcd.Response) { w.sendAdd(res) case EtcdSet, EtcdCAS: w.sendModify(res) - case EtcdDelete: + case EtcdDelete, EtcdExpire: w.sendDelete(res) default: glog.Errorf("unknown action: %v", res.Action) diff --git a/pkg/tools/fake_etcd_client.go b/pkg/tools/fake_etcd_client.go index 22b8a59e76..87a30bd912 100644 --- a/pkg/tools/fake_etcd_client.go +++ b/pkg/tools/fake_etcd_client.go @@ -20,6 +20,7 @@ import ( "errors" "sort" "sync" + "time" "github.com/coreos/go-etcd/etcd" ) @@ -52,6 +53,8 @@ type FakeEtcdClient struct { TestIndex bool ChangeIndex uint64 LastSetTTL uint64 + // Will avoid setting the expires header on objects to make comparison easier + HideExpires bool Machines []string // Will become valid after Watch is called; tester may write to it. Tester may @@ -175,6 +178,11 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons prevResult := f.Data[key] createdIndex := prevResult.R.Node.CreatedIndex f.t.Logf("updating %v, index %v -> %v (ttl: %d)", key, createdIndex, i, ttl) + var expires *time.Time + if !f.HideExpires && ttl > 0 { + now := time.Now() + expires = &now + } result := EtcdResponseWithError{ R: &etcd.Response{ Node: &etcd.Node{ @@ -182,6 +190,7 @@ func (f *FakeEtcdClient) setLocked(key, value string, ttl uint64) (*etcd.Respons CreatedIndex: createdIndex, ModifiedIndex: i, TTL: int64(ttl), + Expiration: expires, }, }, } diff --git a/plugin/pkg/scheduler/factory/factory_test.go b/plugin/pkg/scheduler/factory/factory_test.go index a5795b4e38..22f05d186d 100644 --- a/plugin/pkg/scheduler/factory/factory_test.go +++ b/plugin/pkg/scheduler/factory/factory_test.go @@ -132,11 +132,13 @@ func PriorityTwo(pod *api.Pod, podLister algorithm.PodLister, minionLister algor } func TestDefaultErrorFunc(t *testing.T) { + grace := int64(30) testPod := &api.Pod{ ObjectMeta: api.ObjectMeta{Name: "foo", Namespace: "bar"}, Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyAlways, - DNSPolicy: api.DNSClusterFirst, + RestartPolicy: api.RestartPolicyAlways, + DNSPolicy: api.DNSClusterFirst, + TerminationGracePeriodSeconds: &grace, }, } handler := util.FakeHandler{ diff --git a/test/e2e/kubectl.go b/test/e2e/kubectl.go index 02d289ba25..a79946bd5e 100644 --- a/test/e2e/kubectl.go +++ b/test/e2e/kubectl.go @@ -791,7 +791,7 @@ func getUDData(jpgExpected string, ns string) func(*client.Client, string) error if strings.Contains(data.Image, jpgExpected) { return nil } else { - return errors.New(fmt.Sprintf("data served up in container is innaccurate, %s didn't contain %s", data, jpgExpected)) + return errors.New(fmt.Sprintf("data served up in container is inaccurate, %s didn't contain %s", data, jpgExpected)) } } } diff --git a/test/e2e/pd.go b/test/e2e/pd.go index 56517247bd..6d22c6e2cd 100644 --- a/test/e2e/pd.go +++ b/test/e2e/pd.go @@ -78,8 +78,8 @@ var _ = Describe("Pod Disks", func() { By("cleaning up PD-RW test environment") // Teardown pods, PD. Ignore errors. // Teardown should do nothing unless test failed. - podClient.Delete(host0Pod.Name, nil) - podClient.Delete(host1Pod.Name, nil) + podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)) + podClient.Delete(host1Pod.Name, api.NewDeleteOptions(0)) detachPD(host0Name, diskName) detachPD(host1Name, diskName) deletePD(diskName) @@ -98,7 +98,7 @@ var _ = Describe("Pod Disks", func() { Logf("Wrote value: %v", testFileContents) By("deleting host0Pod") - expectNoError(podClient.Delete(host0Pod.Name, nil), "Failed to delete host0Pod") + expectNoError(podClient.Delete(host0Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod") By("submitting host1Pod to kubernetes") _, err = podClient.Create(host1Pod) @@ -113,7 +113,7 @@ var _ = Describe("Pod Disks", func() { Expect(strings.TrimSpace(v)).To(Equal(strings.TrimSpace(testFileContents))) By("deleting host1Pod") - expectNoError(podClient.Delete(host1Pod.Name, nil), "Failed to delete host1Pod") + expectNoError(podClient.Delete(host1Pod.Name, api.NewDeleteOptions(0)), "Failed to delete host1Pod") By(fmt.Sprintf("deleting PD %q", diskName)) deletePDWithRetry(diskName) @@ -136,9 +136,9 @@ var _ = Describe("Pod Disks", func() { By("cleaning up PD-RO test environment") // Teardown pods, PD. Ignore errors. // Teardown should do nothing unless test failed. - podClient.Delete(rwPod.Name, nil) - podClient.Delete(host0ROPod.Name, nil) - podClient.Delete(host1ROPod.Name, nil) + podClient.Delete(rwPod.Name, api.NewDeleteOptions(0)) + podClient.Delete(host0ROPod.Name, api.NewDeleteOptions(0)) + podClient.Delete(host1ROPod.Name, api.NewDeleteOptions(0)) detachPD(host0Name, diskName) detachPD(host1Name, diskName) @@ -149,7 +149,7 @@ var _ = Describe("Pod Disks", func() { _, err = podClient.Create(rwPod) expectNoError(err, "Failed to create rwPod") expectNoError(framework.WaitForPodRunning(rwPod.Name)) - expectNoError(podClient.Delete(rwPod.Name, nil), "Failed to delete host0Pod") + expectNoError(podClient.Delete(rwPod.Name, api.NewDeleteOptions(0)), "Failed to delete host0Pod") expectNoError(waitForPDDetach(diskName, host0Name)) By("submitting host0ROPod to kubernetes") @@ -165,10 +165,10 @@ var _ = Describe("Pod Disks", func() { expectNoError(framework.WaitForPodRunning(host1ROPod.Name)) By("deleting host0ROPod") - expectNoError(podClient.Delete(host0ROPod.Name, nil), "Failed to delete host0ROPod") + expectNoError(podClient.Delete(host0ROPod.Name, api.NewDeleteOptions(0)), "Failed to delete host0ROPod") By("deleting host1ROPod") - expectNoError(podClient.Delete(host1ROPod.Name, nil), "Failed to delete host1ROPod") + expectNoError(podClient.Delete(host1ROPod.Name, api.NewDeleteOptions(0)), "Failed to delete host1ROPod") By(fmt.Sprintf("deleting PD %q", diskName)) deletePDWithRetry(diskName) diff --git a/test/e2e/pods.go b/test/e2e/pods.go index 4fc7b2d70d..c28758a36f 100644 --- a/test/e2e/pods.go +++ b/test/e2e/pods.go @@ -43,7 +43,7 @@ func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectResta // At the end of the test, clean up by removing the pod. defer func() { By("deleting the pod") - c.Pods(ns).Delete(podDescr.Name, nil) + c.Pods(ns).Delete(podDescr.Name, api.NewDeleteOptions(0)) }() // Wait until the pod is not pending. (Here we need to check for something other than @@ -86,15 +86,14 @@ func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectResta func testHostIP(c *client.Client, ns string, pod *api.Pod) { podClient := c.Pods(ns) By("creating pod") - defer podClient.Delete(pod.Name, nil) - _, err := podClient.Create(pod) - if err != nil { + defer podClient.Delete(pod.Name, api.NewDeleteOptions(0)) + if _, err := podClient.Create(pod); err != nil { Failf("Failed to create pod: %v", err) } By("ensuring that pod is running and has a hostIP") // Wait for the pods to enter the running state. Waiting loops until the pods // are running so non-running pods cause a timeout for this test. - err = waitForPodRunningInNamespace(c, pod.Name, ns) + err := waitForPodRunningInNamespace(c, pod.Name, ns) Expect(err).NotTo(HaveOccurred()) // Try to make sure we get a hostIP for each pod. hostIPTimeout := 2 * time.Minute @@ -222,7 +221,7 @@ var _ = Describe("Pods", func() { // We call defer here in case there is a problem with // the test so we can ensure that we clean up after // ourselves - defer podClient.Delete(pod.Name, nil) + defer podClient.Delete(pod.Name, api.NewDeleteOptions(0)) _, err = podClient.Create(pod) if err != nil { Failf("Failed to create pod: %v", err) @@ -235,7 +234,7 @@ var _ = Describe("Pods", func() { } Expect(len(pods.Items)).To(Equal(1)) - By("veryfying pod creation was observed") + By("verifying pod creation was observed") select { case event, _ := <-w.ResultChan(): if event.Type != watch.Added { @@ -245,7 +244,7 @@ var _ = Describe("Pods", func() { Fail("Timeout while waiting for pod creation") } - By("deleting the pod") + By("deleting the pod gracefully") if err := podClient.Delete(pod.Name, nil); err != nil { Failf("Failed to delete pod: %v", err) } @@ -253,11 +252,13 @@ var _ = Describe("Pods", func() { By("verifying pod deletion was observed") deleted := false timeout := false + var lastPod *api.Pod timer := time.After(podStartTimeout) for !deleted && !timeout { select { case event, _ := <-w.ResultChan(): if event.Type == watch.Deleted { + lastPod = event.Object.(*api.Pod) deleted = true } case <-timer: @@ -268,6 +269,9 @@ var _ = Describe("Pods", func() { Fail("Failed to observe pod deletion") } + Expect(lastPod.DeletionTimestamp).ToNot(BeNil()) + Expect(lastPod.Spec.TerminationGracePeriodSeconds).ToNot(BeZero()) + pods, err = podClient.List(labels.SelectorFromSet(labels.Set(map[string]string{"time": value})), fields.Everything()) if err != nil { Fail(fmt.Sprintf("Failed to list pods to verify deletion: %v", err)) @@ -312,7 +316,7 @@ var _ = Describe("Pods", func() { By("submitting the pod to kubernetes") defer func() { By("deleting the pod") - podClient.Delete(pod.Name, nil) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() pod, err := podClient.Create(pod) if err != nil { @@ -376,7 +380,7 @@ var _ = Describe("Pods", func() { }, }, } - defer framework.Client.Pods(framework.Namespace.Name).Delete(serverPod.Name, nil) + defer framework.Client.Pods(framework.Namespace.Name).Delete(serverPod.Name, api.NewDeleteOptions(0)) _, err := framework.Client.Pods(framework.Namespace.Name).Create(serverPod) if err != nil { Failf("Failed to create serverPod: %v", err) @@ -600,7 +604,7 @@ var _ = Describe("Pods", func() { // We call defer here in case there is a problem with // the test so we can ensure that we clean up after // ourselves - podClient.Delete(pod.Name) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() By("waiting for the pod to start running") @@ -673,7 +677,7 @@ var _ = Describe("Pods", func() { // We call defer here in case there is a problem with // the test so we can ensure that we clean up after // ourselves - podClient.Delete(pod.Name) + podClient.Delete(pod.Name, api.NewDeleteOptions(0)) }() By("waiting for the pod to start running") diff --git a/test/e2e/util.go b/test/e2e/util.go index af69273ce7..16156a88ac 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -831,20 +831,24 @@ func expectNoError(err error, explain ...interface{}) { ExpectWithOffset(1, err).NotTo(HaveOccurred(), explain...) } -// Stops everything from filePath from namespace ns and checks if everything maching selectors from the given namespace is correctly stopped. +// Stops everything from filePath from namespace ns and checks if everything matching selectors from the given namespace is correctly stopped. func cleanup(filePath string, ns string, selectors ...string) { - By("using stop to clean up resources") + By("using delete to clean up resources") var nsArg string if ns != "" { nsArg = fmt.Sprintf("--namespace=%s", ns) } - runKubectl("stop", "-f", filePath, nsArg) + runKubectl("stop", "--grace-period=0", "-f", filePath, nsArg) for _, selector := range selectors { - resources := runKubectl("get", "pods,rc,svc", "-l", selector, "--no-headers", nsArg) + resources := runKubectl("get", "rc,se", "-l", selector, "--no-headers", nsArg) if resources != "" { Failf("Resources left running after stop:\n%s", resources) } + pods := runKubectl("get", "pods", "-l", selector, nsArg, "-t", "{{ range .items }}{{ if not .metadata.deletionTimestamp }}{{ .metadata.name }}{{ \"\\n\" }}{{ end }}{{ end }}") + if pods != "" { + Failf("Pods left unterminated after stop:\n%s", pods) + } } } diff --git a/test/images/network-tester/webserver.go b/test/images/network-tester/webserver.go index 8f6e6a999f..9f6e906312 100644 --- a/test/images/network-tester/webserver.go +++ b/test/images/network-tester/webserver.go @@ -39,7 +39,9 @@ import ( "math/rand" "net/http" "os" + "os/signal" "sync" + "syscall" "time" "k8s.io/kubernetes/pkg/client" @@ -47,10 +49,11 @@ import ( ) var ( - port = flag.Int("port", 8080, "Port number to serve at.") - peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.") - service = flag.String("service", "nettest", "Service to find other network test pods in.") - namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.") + port = flag.Int("port", 8080, "Port number to serve at.") + peerCount = flag.Int("peers", 8, "Must find at least this many peers for the test to pass.") + service = flag.String("service", "nettest", "Service to find other network test pods in.") + namespace = flag.String("namespace", "default", "Namespace of this pod. TODO: kubernetes should make this discoverable.") + delayShutdown = flag.Int("delay-shutdown", 0, "Number of seconds to delay shutdown when receiving SIGTERM.") ) // State tracks the internal state of our little http server. @@ -178,6 +181,17 @@ func main() { log.Fatalf("Error getting hostname: %v", err) } + if *delayShutdown > 0 { + termCh := make(chan os.Signal) + signal.Notify(termCh, syscall.SIGTERM) + go func() { + <-termCh + log.Printf("Sleeping %d seconds before exit ...", *delayShutdown) + time.Sleep(time.Duration(*delayShutdown) * time.Second) + os.Exit(0) + }() + } + state := State{ Hostname: hostname, StillContactingPeers: true, diff --git a/test/integration/auth_test.go b/test/integration/auth_test.go index 9662724fb6..3c1d931087 100644 --- a/test/integration/auth_test.go +++ b/test/integration/auth_test.go @@ -232,7 +232,7 @@ var deleteNow string = ` { "kind": "DeleteOptions", "apiVersion": "` + testapi.Version() + `", - "gracePeriodSeconds": null%s + "gracePeriodSeconds": 0%s } ` diff --git a/test/integration/etcd_tools_test.go b/test/integration/etcd_tools_test.go index da278a5be6..b3b37bd0bc 100644 --- a/test/integration/etcd_tools_test.go +++ b/test/integration/etcd_tools_test.go @@ -80,6 +80,58 @@ func TestGet(t *testing.T) { }) } +func TestWriteTTL(t *testing.T) { + client := framework.NewEtcdClient() + helper := tools.EtcdHelper{Client: client, Codec: stringCodec{}} + framework.WithEtcdKey(func(key string) { + _, err := client.Set(key, "object", 0) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + s := fakeAPIObject("") + err = helper.GuaranteedUpdate(key, &s, false, func(obj runtime.Object, res tools.ResponseMeta) (runtime.Object, *uint64, error) { + if *(obj.(*fakeAPIObject)) != "object" { + t.Fatalf("unexpected existing object: %v", obj) + } + if res.TTL != 0 { + t.Fatalf("unexpected TTL: %#v", res) + } + ttl := uint64(10) + out := fakeAPIObject("test") + return &out, &ttl, nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if s != "test" { + t.Errorf("unexpected response: %#v", s) + } + if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL != 10 { + t.Fatalf("unexpected get: %v %#v", err, res) + } + + err = helper.GuaranteedUpdate(key, &s, false, func(obj runtime.Object, res tools.ResponseMeta) (runtime.Object, *uint64, error) { + if *(obj.(*fakeAPIObject)) != "test" { + t.Fatalf("unexpected existing object: %v", obj) + } + if res.TTL <= 1 { + t.Fatalf("unexpected TTL: %#v", res) + } + out := fakeAPIObject("test2") + return &out, nil, nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if s != "test2" { + t.Errorf("unexpected response: %#v", s) + } + if res, err := client.Get(key, false, false); err != nil || res == nil || res.Node.TTL <= 1 { + t.Fatalf("unexpected get: %v %#v", err, res) + } + }) +} + func TestWatch(t *testing.T) { client := framework.NewEtcdClient() etcdStorage := etcd.NewEtcdStorage(client, testapi.Codec(), etcdtest.PathPrefix()) diff --git a/test/integration/scheduler_test.go b/test/integration/scheduler_test.go index 796dc4a7a1..0761cbcdf0 100644 --- a/test/integration/scheduler_test.go +++ b/test/integration/scheduler_test.go @@ -277,7 +277,7 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore t.Logf("Test %d: Pod got scheduled on a schedulable node", i) } - err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, nil) + err = restClient.Pods(api.NamespaceDefault).Delete(myPod.Name, api.NewDeleteOptions(0)) if err != nil { t.Errorf("Failed to delete pod: %v", err) }