From 49250c6efcdd57b4798858bbb48e509f62afe84d Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sun, 10 Feb 2019 16:39:42 -0500 Subject: [PATCH] Make container create, start, and stop events consistent The messages for container lifecycle events are subtly inconsistent and should be unified. First, the field format for containers is hard to parse for a human, so include the container name directly in the message for create and start, and for kill remove the container runtime prefix. Second, the pulling image event has inconsistent capitalization, fix that to be sentence without punctuation. Third, the kill container event was unnecessarily wordy and inconsistent with the create and start events. Make the following changes: * Use 'Stopping' instead of 'Killing' since kill is usually reserved for when we decide to hard stop a container * Send the event before we dispatch the prestop hook, since this is an "in-progress" style event vs a "already completed" type event * Remove the 'cri-o://' / 'docker://' prefix by printing the container name instead of id (we already do that replacement at the lower level to prevent high cardinality events) * Use 'message' instead of 'reason' as the argument name since this is a string for humans field, not a string for machines field * Remove the hash values on the container spec changed event because no human will ever be able to do anything with the hash value * Use 'Stopping container %s(, explanation)?' form without periods to follow event conventions The end result is a more pleasant message for humans: ``` 35m Normal Created Pod Created container 35m Normal Started Pod Started container 10m Normal Killing Pod Killing container cri-o://installer:Need to kill Pod 10m Normal Pulling Pod pulling image "registry.svc.ci.openshift.org/openshift/origin-v4.0-2019-02-10-172026@sha256:3da5303d4384d24691721c1cf2333584ba60e8f82c9e782f593623ce8f83ddc5" ``` becomes ``` 35m Normal Created Pod Created container installer 35m Normal Started Pod Started container installer 10m Normal Killing Pod Stopping container installer 10m Normal Pulling Pod Pulling image "registry.svc.ci.openshift.org/openshift/origin-v4.0-2019-02-10-172026@sha256:3da5303d4384d24691721c1cf2333584ba60e8f82c9e782f593623ce8f83ddc5" ``` --- pkg/kubelet/images/image_manager.go | 4 ++-- .../kuberuntime/kuberuntime_container.go | 22 +++++++++---------- .../kuberuntime/kuberuntime_manager.go | 19 ++++++++-------- .../recyclerclient/recycler_client_test.go | 6 ++--- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/pkg/kubelet/images/image_manager.go b/pkg/kubelet/images/image_manager.go index 39c85133cc..dc2f24c615 100644 --- a/pkg/kubelet/images/image_manager.go +++ b/pkg/kubelet/images/image_manager.go @@ -20,7 +20,7 @@ import ( "fmt" dockerref "github.com/docker/distribution/reference" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/flowcontrol" "k8s.io/klog" @@ -127,7 +127,7 @@ func (m *imageManager) EnsureImageExists(pod *v1.Pod, container *v1.Container, p m.logIt(ref, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info) return "", msg, ErrImagePullBackOff } - m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("pulling image %q", container.Image), klog.Info) + m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", container.Image), klog.Info) pullChan := make(chan pullResult) m.puller.pullImage(spec, pullSecrets, pullChan, podSandboxConfig) imagePullResult := <-pullChan diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 4ca0980d4d..8b67ad757c 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -35,7 +35,7 @@ import ( "github.com/armon/circbuf" "k8s.io/klog" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubetypes "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -131,7 +131,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", grpc.ErrorDesc(err)) return grpc.ErrorDesc(err), ErrPreStartHook } - m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, "Created container") + m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, fmt.Sprintf("Created container %s", container.Name)) if ref != nil { m.containerRefManager.SetRef(kubecontainer.ContainerID{ @@ -146,7 +146,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err)) return grpc.ErrorDesc(err), kubecontainer.ErrRunContainer } - m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, "Started container") + m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, fmt.Sprintf("Started container %s", container.Name)) // Symlink container logs to the legacy container log location for cluster logging // support. @@ -532,12 +532,12 @@ func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(containerID // killContainer kills a container through the following steps: // * Run the pre-stop lifecycle hooks (if applicable). // * Stop the container. -func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, reason string, gracePeriodOverride *int64) error { +func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, gracePeriodOverride *int64) error { var containerSpec *v1.Container if pod != nil { if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil { return fmt.Errorf("failed to get containerSpec %q(id=%q) in pod %q when killing container for reason %q", - containerName, containerID.String(), format.Pod(pod), reason) + containerName, containerID.String(), format.Pod(pod), message) } } else { // Restore necessary information if one of the specs is nil. @@ -557,6 +557,11 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec gracePeriod = *pod.Spec.TerminationGracePeriodSeconds } + if len(message) == 0 { + message = fmt.Sprintf("Stopping container %s", containerSpec.Name) + } + m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message) + // Run internal pre-stop lifecycle hook if err := m.internalLifecycle.PreStopContainer(containerID.ID); err != nil { return err @@ -584,11 +589,6 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec klog.V(3).Infof("Container %q exited normally", containerID.String()) } - message := fmt.Sprintf("Killing container %s", containerID.String()) - if reason != "" { - message = fmt.Sprint(message, ":", reason) - } - m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message) m.containerRefManager.ClearRef(containerID) return err @@ -606,7 +606,7 @@ func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, ru defer wg.Done() killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name) - if err := m.killContainer(pod, container.ID, container.Name, "Need to kill Pod", gracePeriodOverride); err != nil { + if err := m.killContainer(pod, container.ID, container.Name, "", gracePeriodOverride); err != nil { killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) } containerResults <- killContainerResult diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 2d5fdf3071..7dbc62b23f 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -26,7 +26,7 @@ import ( cadvisorapi "github.com/google/cadvisor/info/v1" "k8s.io/klog" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubetypes "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -544,16 +544,16 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku continue } // The container is running, but kill the container if any of the following condition is met. - reason := "" + var message string restart := shouldRestartOnFailure(pod) - if expectedHash, actualHash, changed := containerChanged(&container, containerStatus); changed { - reason = fmt.Sprintf("Container spec hash changed (%d vs %d).", actualHash, expectedHash) + if _, _, changed := containerChanged(&container, containerStatus); changed { + message = fmt.Sprintf("Container %s definition changed", container.Name) // Restart regardless of the restart policy because the container // spec changed. restart = true } else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure { // If the container failed the liveness probe, we should kill it. - reason = "Container failed liveness probe." + message = fmt.Sprintf("Container %s failed liveness probe", container.Name) } else { // Keep the container. keepCount++ @@ -563,9 +563,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku // We need to kill the container, but if we also want to restart the // container afterwards, make the intent clear in the message. Also do // not kill the entire pod since we expect container to be running eventually. - message := reason if restart { - message = fmt.Sprintf("%s. Container will be killed and recreated.", message) + message = fmt.Sprintf("%s, will be restarted", message) changes.ContainersToStart = append(changes.ContainersToStart, idx) } @@ -610,10 +609,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine // Step 2: Kill the pod if the sandbox has changed. if podContainerChanges.KillPod { - if !podContainerChanges.CreateSandbox { - klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod)) - } else { + if podContainerChanges.CreateSandbox { klog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod)) + } else { + klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod)) } killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil) diff --git a/pkg/volume/util/recyclerclient/recycler_client_test.go b/pkg/volume/util/recyclerclient/recycler_client_test.go index 64e04fbcc2..e4182dd0f3 100644 --- a/pkg/volume/util/recyclerclient/recycler_client_test.go +++ b/pkg/volume/util/recyclerclient/recycler_client_test.go @@ -20,7 +20,7 @@ import ( "fmt" "testing" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/watch" @@ -88,7 +88,7 @@ func TestRecyclerPod(t *testing.T) { // Pod gets Running and Succeeded newPodEvent(watch.Added, "podRecyclerSuccess", v1.PodPending, ""), newEvent(v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"), - newEvent(v1.EventTypeNormal, "pulling image \"k8s.gcr.io/busybox\""), + newEvent(v1.EventTypeNormal, "Pulling image \"k8s.gcr.io/busybox\""), newEvent(v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""), newEvent(v1.EventTypeNormal, "Created container with docker id 83d929aeac82"), newEvent(v1.EventTypeNormal, "Started container with docker id 83d929aeac82"), @@ -97,7 +97,7 @@ func TestRecyclerPod(t *testing.T) { }, expectedEvents: []mockEvent{ {v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"}, - {v1.EventTypeNormal, "pulling image \"k8s.gcr.io/busybox\""}, + {v1.EventTypeNormal, "Pulling image \"k8s.gcr.io/busybox\""}, {v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""}, {v1.EventTypeNormal, "Created container with docker id 83d929aeac82"}, {v1.EventTypeNormal, "Started container with docker id 83d929aeac82"},