Make container create, start, and stop events consistent

The messages for container lifecycle events are subtly inconsistent
and should be unified.

First, the field format for containers is hard to parse for a human,
so include the container name directly in the message for create
and start, and for kill remove the container runtime prefix.

Second, the pulling image event has inconsistent capitalization, fix
that to be sentence without punctuation.

Third, the kill container event was unnecessarily wordy and inconsistent
with the create and start events. Make the following changes:

* Use 'Stopping' instead of 'Killing' since kill is usually reserved for
  when we decide to hard stop a container
* Send the event before we dispatch the prestop hook, since this is an
  "in-progress" style event vs a "already completed" type event
* Remove the 'cri-o://' / 'docker://' prefix by printing the container
  name instead of id (we already do that replacement at the lower level
  to prevent high cardinality events)
* Use 'message' instead of 'reason' as the argument name since this is a
  string for humans field, not a string for machines field
* Remove the hash values on the container spec changed event because no
  human will ever be able to do anything with the hash value
* Use 'Stopping container %s(, explanation)?' form without periods to
  follow event conventions

The end result is a more pleasant message for humans:

```
35m         Normal    Created                       Pod    Created container
35m         Normal    Started                       Pod    Started container
10m         Normal    Killing                       Pod    Killing container cri-o://installer:Need to kill Pod
10m         Normal    Pulling                       Pod    pulling image "registry.svc.ci.openshift.org/openshift/origin-v4.0-2019-02-10-172026@sha256:3da5303d4384d24691721c1cf2333584ba60e8f82c9e782f593623ce8f83ddc5"
```

becomes

```
35m         Normal    Created                       Pod    Created container installer
35m         Normal    Started                       Pod    Started container installer
10m         Normal    Killing                       Pod    Stopping container installer
10m         Normal    Pulling                       Pod    Pulling image "registry.svc.ci.openshift.org/openshift/origin-v4.0-2019-02-10-172026@sha256:3da5303d4384d24691721c1cf2333584ba60e8f82c9e782f593623ce8f83ddc5"
```
pull/564/head
Clayton Coleman 2019-02-10 16:39:42 -05:00
parent b862590bfc
commit 49250c6efc
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
4 changed files with 25 additions and 26 deletions

View File

@ -20,7 +20,7 @@ import (
"fmt" "fmt"
dockerref "github.com/docker/distribution/reference" dockerref "github.com/docker/distribution/reference"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/flowcontrol"
"k8s.io/klog" "k8s.io/klog"
@ -127,7 +127,7 @@ func (m *imageManager) EnsureImageExists(pod *v1.Pod, container *v1.Container, p
m.logIt(ref, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info) m.logIt(ref, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info)
return "", msg, ErrImagePullBackOff return "", msg, ErrImagePullBackOff
} }
m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("pulling image %q", container.Image), klog.Info) m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", container.Image), klog.Info)
pullChan := make(chan pullResult) pullChan := make(chan pullResult)
m.puller.pullImage(spec, pullSecrets, pullChan, podSandboxConfig) m.puller.pullImage(spec, pullSecrets, pullChan, podSandboxConfig)
imagePullResult := <-pullChan imagePullResult := <-pullChan

View File

@ -35,7 +35,7 @@ import (
"github.com/armon/circbuf" "github.com/armon/circbuf"
"k8s.io/klog" "k8s.io/klog"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types" kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@ -131,7 +131,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", grpc.ErrorDesc(err)) m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Internal PreStartContainer hook failed: %v", grpc.ErrorDesc(err))
return grpc.ErrorDesc(err), ErrPreStartHook return grpc.ErrorDesc(err), ErrPreStartHook
} }
m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, "Created container") m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.CreatedContainer, fmt.Sprintf("Created container %s", container.Name))
if ref != nil { if ref != nil {
m.containerRefManager.SetRef(kubecontainer.ContainerID{ m.containerRefManager.SetRef(kubecontainer.ContainerID{
@ -146,7 +146,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err)) m.recordContainerEvent(pod, container, containerID, v1.EventTypeWarning, events.FailedToStartContainer, "Error: %v", grpc.ErrorDesc(err))
return grpc.ErrorDesc(err), kubecontainer.ErrRunContainer return grpc.ErrorDesc(err), kubecontainer.ErrRunContainer
} }
m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, "Started container") m.recordContainerEvent(pod, container, containerID, v1.EventTypeNormal, events.StartedContainer, fmt.Sprintf("Started container %s", container.Name))
// Symlink container logs to the legacy container log location for cluster logging // Symlink container logs to the legacy container log location for cluster logging
// support. // support.
@ -532,12 +532,12 @@ func (m *kubeGenericRuntimeManager) restoreSpecsFromContainerLabels(containerID
// killContainer kills a container through the following steps: // killContainer kills a container through the following steps:
// * Run the pre-stop lifecycle hooks (if applicable). // * Run the pre-stop lifecycle hooks (if applicable).
// * Stop the container. // * Stop the container.
func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, reason string, gracePeriodOverride *int64) error { func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubecontainer.ContainerID, containerName string, message string, gracePeriodOverride *int64) error {
var containerSpec *v1.Container var containerSpec *v1.Container
if pod != nil { if pod != nil {
if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil { if containerSpec = kubecontainer.GetContainerSpec(pod, containerName); containerSpec == nil {
return fmt.Errorf("failed to get containerSpec %q(id=%q) in pod %q when killing container for reason %q", return fmt.Errorf("failed to get containerSpec %q(id=%q) in pod %q when killing container for reason %q",
containerName, containerID.String(), format.Pod(pod), reason) containerName, containerID.String(), format.Pod(pod), message)
} }
} else { } else {
// Restore necessary information if one of the specs is nil. // Restore necessary information if one of the specs is nil.
@ -557,6 +557,11 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
} }
if len(message) == 0 {
message = fmt.Sprintf("Stopping container %s", containerSpec.Name)
}
m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
// Run internal pre-stop lifecycle hook // Run internal pre-stop lifecycle hook
if err := m.internalLifecycle.PreStopContainer(containerID.ID); err != nil { if err := m.internalLifecycle.PreStopContainer(containerID.ID); err != nil {
return err return err
@ -584,11 +589,6 @@ func (m *kubeGenericRuntimeManager) killContainer(pod *v1.Pod, containerID kubec
klog.V(3).Infof("Container %q exited normally", containerID.String()) klog.V(3).Infof("Container %q exited normally", containerID.String())
} }
message := fmt.Sprintf("Killing container %s", containerID.String())
if reason != "" {
message = fmt.Sprint(message, ":", reason)
}
m.recordContainerEvent(pod, containerSpec, containerID.ID, v1.EventTypeNormal, events.KillingContainer, message)
m.containerRefManager.ClearRef(containerID) m.containerRefManager.ClearRef(containerID)
return err return err
@ -606,7 +606,7 @@ func (m *kubeGenericRuntimeManager) killContainersWithSyncResult(pod *v1.Pod, ru
defer wg.Done() defer wg.Done()
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name) killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
if err := m.killContainer(pod, container.ID, container.Name, "Need to kill Pod", gracePeriodOverride); err != nil { if err := m.killContainer(pod, container.ID, container.Name, "", gracePeriodOverride); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error()) killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
} }
containerResults <- killContainerResult containerResults <- killContainerResult

View File

@ -26,7 +26,7 @@ import (
cadvisorapi "github.com/google/cadvisor/info/v1" cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/klog" "k8s.io/klog"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types" kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@ -544,16 +544,16 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
continue continue
} }
// The container is running, but kill the container if any of the following condition is met. // The container is running, but kill the container if any of the following condition is met.
reason := "" var message string
restart := shouldRestartOnFailure(pod) restart := shouldRestartOnFailure(pod)
if expectedHash, actualHash, changed := containerChanged(&container, containerStatus); changed { if _, _, changed := containerChanged(&container, containerStatus); changed {
reason = fmt.Sprintf("Container spec hash changed (%d vs %d).", actualHash, expectedHash) message = fmt.Sprintf("Container %s definition changed", container.Name)
// Restart regardless of the restart policy because the container // Restart regardless of the restart policy because the container
// spec changed. // spec changed.
restart = true restart = true
} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure { } else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
// If the container failed the liveness probe, we should kill it. // If the container failed the liveness probe, we should kill it.
reason = "Container failed liveness probe." message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
} else { } else {
// Keep the container. // Keep the container.
keepCount++ keepCount++
@ -563,9 +563,8 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
// We need to kill the container, but if we also want to restart the // We need to kill the container, but if we also want to restart the
// container afterwards, make the intent clear in the message. Also do // container afterwards, make the intent clear in the message. Also do
// not kill the entire pod since we expect container to be running eventually. // not kill the entire pod since we expect container to be running eventually.
message := reason
if restart { if restart {
message = fmt.Sprintf("%s. Container will be killed and recreated.", message) message = fmt.Sprintf("%s, will be restarted", message)
changes.ContainersToStart = append(changes.ContainersToStart, idx) changes.ContainersToStart = append(changes.ContainersToStart, idx)
} }
@ -610,10 +609,10 @@ func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, podStatus *kubecontaine
// Step 2: Kill the pod if the sandbox has changed. // Step 2: Kill the pod if the sandbox has changed.
if podContainerChanges.KillPod { if podContainerChanges.KillPod {
if !podContainerChanges.CreateSandbox { if podContainerChanges.CreateSandbox {
klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
} else {
klog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod)) klog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
} else {
klog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
} }
killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil) killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)

View File

@ -20,7 +20,7 @@ import (
"fmt" "fmt"
"testing" "testing"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/watch" "k8s.io/apimachinery/pkg/watch"
@ -88,7 +88,7 @@ func TestRecyclerPod(t *testing.T) {
// Pod gets Running and Succeeded // Pod gets Running and Succeeded
newPodEvent(watch.Added, "podRecyclerSuccess", v1.PodPending, ""), newPodEvent(watch.Added, "podRecyclerSuccess", v1.PodPending, ""),
newEvent(v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"), newEvent(v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"),
newEvent(v1.EventTypeNormal, "pulling image \"k8s.gcr.io/busybox\""), newEvent(v1.EventTypeNormal, "Pulling image \"k8s.gcr.io/busybox\""),
newEvent(v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""), newEvent(v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""),
newEvent(v1.EventTypeNormal, "Created container with docker id 83d929aeac82"), newEvent(v1.EventTypeNormal, "Created container with docker id 83d929aeac82"),
newEvent(v1.EventTypeNormal, "Started container with docker id 83d929aeac82"), newEvent(v1.EventTypeNormal, "Started container with docker id 83d929aeac82"),
@ -97,7 +97,7 @@ func TestRecyclerPod(t *testing.T) {
}, },
expectedEvents: []mockEvent{ expectedEvents: []mockEvent{
{v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"}, {v1.EventTypeNormal, "Successfully assigned recycler-for-podRecyclerSuccess to 127.0.0.1"},
{v1.EventTypeNormal, "pulling image \"k8s.gcr.io/busybox\""}, {v1.EventTypeNormal, "Pulling image \"k8s.gcr.io/busybox\""},
{v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""}, {v1.EventTypeNormal, "Successfully pulled image \"k8s.gcr.io/busybox\""},
{v1.EventTypeNormal, "Created container with docker id 83d929aeac82"}, {v1.EventTypeNormal, "Created container with docker id 83d929aeac82"},
{v1.EventTypeNormal, "Started container with docker id 83d929aeac82"}, {v1.EventTypeNormal, "Started container with docker id 83d929aeac82"},