Make container create, start, and stop events consistent

The messages for container lifecycle events are subtly inconsistent and should be unified. First, the field format for containers is hard to parse for a human, so include the container name directly in the message for create and start, and for kill remove the container runtime prefix. Second, the pulling image event has inconsistent capitalization, fix that to be sentence without punctuation. Third, the kill container event was unnecessarily wordy and inconsistent with the create and start events. Make the following changes: * Use 'Stopping' instead of 'Killing' since kill is usually reserved for when we decide to hard stop a container * Send the event before we dispatch the prestop hook, since this is an "in-progress" style event vs a "already completed" type event * Remove the 'cri-o://' / 'docker://' prefix by printing the container name instead of id (we already do that replacement at the lower level to prevent high cardinality events) * Use 'message' instead of 'reason' as the argument name since this is a string for humans field, not a string for machines field * Remove the hash values on the container spec changed event because no human will ever be able to do anything with the hash value * Use 'Stopping container %s(, explanation)?' form without periods to follow event conventions The end result is a more pleasant message for humans: ``` 35m Normal Created Pod Created container 35m Normal Started Pod Started container 10m Normal Killing Pod Killing container cri-o://installer:Need to kill Pod 10m Normal Pulling Pod pulling image "registry.svc.ci.openshift.org/openshift/origin-v4.0-2019-02-10-172026@sha256:3da5303d4384d24691721c1cf2333584ba60e8f82c9e782f593623ce8f83ddc5" ``` becomes ``` 35m Normal Created Pod Created container installer 35m Normal Started Pod Started container installer 10m Normal Killing Pod Stopping container installer 10m Normal Pulling Pod Pulling image "registry.svc.ci.openshift.org/openshift/origin-v4.0-2019-02-10-172026@sha256:3da5303d4384d24691721c1cf2333584ba60e8f82c9e782f593623ce8f83ddc5" ```
2019-02-10 16:39:42 -05:00 · 2019-02-10 16:39:42 -05:00 · 49250c6efc
parent b862590bfc
commit 49250c6efc
4 changed files with 25 additions and 26 deletions
--- a/pkg/kubelet/images/image_manager.go
+++ b/pkg/kubelet/images/image_manager.go
@ -20,7 +20,7 @@ import (
 	"fmt"
 	dockerref "github.com/docker/distribution/reference"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/client-go/util/flowcontrol"
 	"k8s.io/klog"
@ -127,7 +127,7 @@ func (m *imageManager) EnsureImageExists(pod *v1.Pod, container *v1.Container, p
 		m.logIt(ref, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info)
 		return "", msg, ErrImagePullBackOff
 	}
-	m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("pulling image %q", container.Image), klog.Info)
+	m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", container.Image), klog.Info)
 	pullChan := make(chan pullResult)
 	m.puller.pullImage(spec, pullSecrets, pullChan, podSandboxConfig)
 	imagePullResult := <-pullChan
--- a/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go
@ -35,7 +35,7 @@ import (
 	"github.com/armon/circbuf"
 	"k8s.io/klog"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	kubetypes "k8s.io/apimachinery/pkg/types"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@ -131,7 +131,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
 		m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", grpc.ErrorDesc(err))
 		return grpc.ErrorDesc(err), ErrPreStartHook
 	}
-	m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, "Created container")
+	m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, fmt.Sprintf("Created container %s", container.Name))
 	if ref != nil {
 		m.containerRefManager.SetRef(kubecontainer.ContainerID{
@ -146,7 +146,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
 		m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err))
 		return grpc.ErrorDesc(err), kubecontainer.ErrRunContainer
 	}
-	m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, "Started container")
+	m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, fmt.Sprintf("Started container %s", container.Name))
 	// Symlink container logs to the legacy container log location for cluster logging
 	// support.
@ -532,12 +532,12 @@ func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(containerID
 // killContainer kills a container through the following steps:
 // * Run the pre-stop lifecycle hooks (if applicable).
 // * Stop the container.
-func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, reason string, gracePeriodOverride *int64) error {
+func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, gracePeriodOverride *int64) error {
 	var containerSpec *v1.Container
 	if pod != nil {
 		if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil {
 			return fmt.Errorf("failed to get containerSpec %q(id=%q) in pod %q when killing container for reason %q",
-				containerName, containerID.String(), format.Pod(pod), reason)
+				containerName, containerID.String(), format.Pod(pod), message)
 		}
 	} else {
 		// Restore necessary information if one of the specs is nil.
@ -557,6 +557,11 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
 		gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
 	}
 	if len(message) == 0 {
 		message = fmt.Sprintf("Stopping container %s", containerSpec.Name)
 	}
 	m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
 	// Run internal pre-stop lifecycle hook
 	if err := m.internalLifecycle.PreStopContainer(containerID.ID); err != nil {
 		return err
@ -584,11 +589,6 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
 		klog.V(3).Infof("Container %q exited normally", containerID.String())
 	}
 	message := fmt.Sprintf("Killing container %s", containerID.String())
 	if reason != "" {
 		message = fmt.Sprint(message, ":", reason)
 	}
 	m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
 	m.containerRefManager.ClearRef(containerID)
 	return err
@ -606,7 +606,7 @@ func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, ru
 			defer wg.Done()
 			killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
-			if err := m.killContainer(pod, container.ID, container.Name, "Need to kill Pod", gracePeriodOverride); err != nil {
+			if err := m.killContainer(pod, container.ID, container.Name, "", gracePeriodOverride); err != nil {
 				killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
 			}
 			containerResults <- killContainerResult
--- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go
@ -26,7 +26,7 @@ import (
 	cadvisorapi "github.com/google/cadvisor/info/v1"
 	"k8s.io/klog"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	kubetypes "k8s.io/apimachinery/pkg/types"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@ -544,16 +544,16 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
 			continue
 		}
 		// The container is running, but kill the container if any of the following condition is met.
-		reason := ""
+		var message string
 		restart := shouldRestartOnFailure(pod)
-		if expectedHash, actualHash, changed := containerChanged(&container, containerStatus); changed {
+		if _, _, changed := containerChanged(&container, containerStatus); changed {
-			reason = fmt.Sprintf("Container spec hash changed (%d vs %d).", actualHash, expectedHash)
+			message = fmt.Sprintf("Container %s definition changed", container.Name)
 			// Restart regardless of the restart policy because the container
 			// spec changed.
 			restart = true
 		} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
 			// If the container failed the liveness probe, we should kill it.
-			reason = "Container failed liveness probe."
+			message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
 		} else {
 			// Keep the container.
 			keepCount++
@ -563,9 +563,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
 		// We need to kill the container, but if we also want to restart the
 		// container afterwards, make the intent clear in the message. Also do
 		// not kill the entire pod since we expect container to be running eventually.
 		message := reason
 		if restart {
-			message = fmt.Sprintf("%s. Container will be killed and recreated.", message)
+			message = fmt.Sprintf("%s, will be restarted", message)
 			changes.ContainersToStart = append(changes.ContainersToStart, idx)
 		}
@ -610,10 +609,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
 	// Step 2: Kill the pod if the sandbox has changed.
 	if podContainerChanges.KillPod {
-		if !podContainerChanges.CreateSandbox {
+		if podContainerChanges.CreateSandbox {
 			klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
 		} else {
 			klog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
 		} else {
 			klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
 		}
 		killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
--- a/pkg/volume/util/recyclerclient/recycler_client_test.go
+++ b/pkg/volume/util/recyclerclient/recycler_client_test.go
@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
-	"k8s.io/api/core/v1"
+	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/watch"
@ -88,7 +88,7 @@ func TestRecyclerPod(t *testing.T) {
 				// Pod gets Running and Succeeded
 				newPodEvent(watch.Added, "podRecyclerSuccess", v1.PodPending, ""),
 				newEvent(v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"),
-				newEvent(v1.EventTypeNormal, "pulling image \"k8s.gcr.io/busybox\""),
+				newEvent(v1.EventTypeNormal, "Pulling image \"k8s.gcr.io/busybox\""),
 				newEvent(v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""),
 				newEvent(v1.EventTypeNormal, "Created container with docker id 83d929aeac82"),
 				newEvent(v1.EventTypeNormal, "Started container with docker id 83d929aeac82"),
@ -97,7 +97,7 @@ func TestRecyclerPod(t *testing.T) {
 			},
 			expectedEvents: []mockEvent{
 				{v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"},
-				{v1.EventTypeNormal, "pulling image \"k8s.gcr.io/busybox\""},
+				{v1.EventTypeNormal, "Pulling image \"k8s.gcr.io/busybox\""},
 				{v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""},
 				{v1.EventTypeNormal, "Created container with docker id 83d929aeac82"},
 				{v1.EventTypeNormal, "Started container with docker id 83d929aeac82"},