Merge pull request #6711 from yujuhong/container_status

Kubelet: surface the container creation/start failure reason
pull/6/head
Dawn Chen 2015-04-10 17:19:32 -07:00
commit fff6cebb5f
5 changed files with 231 additions and 96 deletions

View File

@ -243,7 +243,7 @@ func TestPull(t *testing.T) {
func TestDockerKeyringLookupFails(t *testing.T) {
fakeKeyring := &credentialprovider.FakeKeyring{}
fakeClient := &FakeDockerClient{
Err: fmt.Errorf("test error"),
Errors: map[string]error{"pull": fmt.Errorf("test error")},
}
dp := dockerPuller{
@ -394,7 +394,7 @@ func TestIsImagePresent(t *testing.T) {
}
func TestGetRunningContainers(t *testing.T) {
fakeDocker := &FakeDockerClient{}
fakeDocker := &FakeDockerClient{Errors: make(map[string]error)}
fakeRecorder := &record.FakeRecorder{}
containerManager := NewDockerManager(fakeDocker, fakeRecorder, PodInfraContainerImage)
tests := []struct {
@ -478,14 +478,16 @@ func TestGetRunningContainers(t *testing.T) {
}
for _, test := range tests {
fakeDocker.ContainerMap = test.containers
fakeDocker.Err = test.err
if test.err != nil {
fakeDocker.Errors["inspect_container"] = test.err
}
if results, err := containerManager.GetRunningContainers(test.inputIDs); err == nil {
resultIDs := []string{}
for _, result := range results {
resultIDs = append(resultIDs, result.ID)
}
if !reflect.DeepEqual(resultIDs, test.expectedIDs) {
t.Errorf("expected: %v, saw: %v", test.expectedIDs, resultIDs)
t.Errorf("expected: %#v, saw: %#v", test.expectedIDs, resultIDs)
}
if err != nil {
t.Errorf("unexpected error: %v", err)

View File

@ -38,7 +38,7 @@ type FakeDockerClient struct {
ContainerMap map[string]*docker.Container
Image *docker.Image
Images []docker.APIImages
Err error
Errors map[string]error
called []string
Stopped []string
pulled []string
@ -118,17 +118,30 @@ func (f *FakeDockerClient) AssertUnorderedCalls(calls []string) (err error) {
return
}
func (f *FakeDockerClient) popError(op string) error {
if f.Errors == nil {
return nil
}
err, ok := f.Errors[op]
if ok {
delete(f.Errors, op)
return err
} else {
return nil
}
}
// ListContainers is a test-spy implementation of DockerInterface.ListContainers.
// It adds an entry "list" to the internal method call record.
func (f *FakeDockerClient) ListContainers(options docker.ListContainersOptions) ([]docker.APIContainers, error) {
f.Lock()
defer f.Unlock()
f.called = append(f.called, "list")
err := f.popError("list")
if options.All {
return append(f.ContainerList, f.ExitedContainerList...), f.Err
return append(f.ContainerList, f.ExitedContainerList...), err
}
return f.ContainerList, f.Err
return f.ContainerList, err
}
// InspectContainer is a test-spy implementation of DockerInterface.InspectContainer.
@ -137,12 +150,13 @@ func (f *FakeDockerClient) InspectContainer(id string) (*docker.Container, error
f.Lock()
defer f.Unlock()
f.called = append(f.called, "inspect_container")
err := f.popError("inspect_container")
if f.ContainerMap != nil {
if container, ok := f.ContainerMap[id]; ok {
return container, f.Err
return container, err
}
}
return f.Container, f.Err
return f.Container, err
}
// InspectImage is a test-spy implementation of DockerInterface.InspectImage.
@ -151,7 +165,8 @@ func (f *FakeDockerClient) InspectImage(name string) (*docker.Image, error) {
f.Lock()
defer f.Unlock()
f.called = append(f.called, "inspect_image")
return f.Image, f.Err
err := f.popError("inspect_image")
return f.Image, err
}
// CreateContainer is a test-spy implementation of DockerInterface.CreateContainer.
@ -160,6 +175,8 @@ func (f *FakeDockerClient) CreateContainer(c docker.CreateContainerOptions) (*do
f.Lock()
defer f.Unlock()
f.called = append(f.called, "create")
err := f.popError("create")
if err == nil {
f.Created = append(f.Created, c.Name)
// This is not a very good fake. We'll just add this container's name to the list.
// Docker likes to add a '/', so copy that behavior.
@ -167,6 +184,8 @@ func (f *FakeDockerClient) CreateContainer(c docker.CreateContainerOptions) (*do
f.ContainerList = append(f.ContainerList, docker.APIContainers{ID: name, Names: []string{name}, Image: c.Config.Image})
return &docker.Container{ID: name}, nil
}
return nil, err
}
// StartContainer is a test-spy implementation of DockerInterface.StartContainer.
// It adds an entry "start" to the internal method call record.
@ -174,6 +193,9 @@ func (f *FakeDockerClient) StartContainer(id string, hostConfig *docker.HostConf
f.Lock()
defer f.Unlock()
f.called = append(f.called, "start")
err := f.popError("start")
if err == nil {
f.Container = &docker.Container{
ID: id,
Name: id, // For testing purpose, we set name to id
@ -185,7 +207,8 @@ func (f *FakeDockerClient) StartContainer(id string, hostConfig *docker.HostConf
},
NetworkSettings: &docker.NetworkSettings{IPAddress: "1.2.3.4"},
}
return f.Err
}
return err
}
// StopContainer is a test-spy implementation of DockerInterface.StopContainer.
@ -194,6 +217,8 @@ func (f *FakeDockerClient) StopContainer(id string, timeout uint) error {
f.Lock()
defer f.Unlock()
f.called = append(f.called, "stop")
err := f.popError("stop")
if err == nil {
f.Stopped = append(f.Stopped, id)
var newList []docker.APIContainers
for _, container := range f.ContainerList {
@ -202,15 +227,19 @@ func (f *FakeDockerClient) StopContainer(id string, timeout uint) error {
}
}
f.ContainerList = newList
return f.Err
}
return err
}
func (f *FakeDockerClient) RemoveContainer(opts docker.RemoveContainerOptions) error {
f.Lock()
defer f.Unlock()
f.called = append(f.called, "remove")
err := f.popError("remove")
if err == nil {
f.Removed = append(f.Removed, opts.ID)
return f.Err
}
return err
}
// Logs is a test-spy implementation of DockerInterface.Logs.
@ -219,7 +248,7 @@ func (f *FakeDockerClient) Logs(opts docker.LogsOptions) error {
f.Lock()
defer f.Unlock()
f.called = append(f.called, "logs")
return f.Err
return f.popError("logs")
}
// PullImage is a test-spy implementation of DockerInterface.StopContainer.
@ -228,12 +257,15 @@ func (f *FakeDockerClient) PullImage(opts docker.PullImageOptions, auth docker.A
f.Lock()
defer f.Unlock()
f.called = append(f.called, "pull")
err := f.popError("pull")
if err == nil {
registry := opts.Registry
if len(registry) != 0 {
registry = registry + "/"
}
f.pulled = append(f.pulled, fmt.Sprintf("%s%s:%s", registry, opts.Repository, opts.Tag))
return f.Err
}
return err
}
func (f *FakeDockerClient) Version() (*docker.Env, error) {
@ -249,12 +281,16 @@ func (f *FakeDockerClient) StartExec(_ string, _ docker.StartExecOptions) error
}
func (f *FakeDockerClient) ListImages(opts docker.ListImagesOptions) ([]docker.APIImages, error) {
return f.Images, f.Err
err := f.popError("list_images")
return f.Images, err
}
func (f *FakeDockerClient) RemoveImage(image string) error {
err := f.popError("remove_image")
if err == nil {
f.RemovedImages.Insert(image)
return f.Err
}
return err
}
// FakeDockerPuller is a stub implementation of DockerPuller.

View File

@ -25,14 +25,21 @@ import (
"path"
"strconv"
"strings"
"sync"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/capabilities"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
"github.com/GoogleCloudPlatform/kubernetes/pkg/types"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
"github.com/fsouza/go-dockerclient"
"github.com/golang/glog"
"github.com/golang/groupcache/lru"
)
const (
maxReasonCacheEntries = 200
)
// Implements kubecontainer.ContainerRunner.
@ -44,13 +51,60 @@ type DockerManager struct {
// TODO(yifan): PodInfraContainerImage can be unexported once
// we move createPodInfraContainer into dockertools.
PodInfraContainerImage string
// reasonCache stores the failure reason of the last container creation
// and/or start in a string, keyed by <pod_UID>_<container_name>. The goal
// is to propagate this reason to the container status. This endeavor is
// "best-effort" for two reasons:
// 1. The cache is not persisted.
// 2. We use an LRU cache to avoid extra garbage collection work. This
// means that some entries may be recycled before a pod has been
// deleted.
reasonCache stringCache
}
// Ensures DockerManager implements ConatinerRunner.
var _ kubecontainer.ContainerRunner = new(DockerManager)
func NewDockerManager(client DockerInterface, recorder record.EventRecorder, podInfraContainerImage string) *DockerManager {
return &DockerManager{client: client, recorder: recorder, PodInfraContainerImage: podInfraContainerImage}
reasonCache := stringCache{cache: lru.New(maxReasonCacheEntries)}
return &DockerManager{
client: client,
recorder: recorder,
PodInfraContainerImage: podInfraContainerImage,
reasonCache: reasonCache}
}
// A cache which stores strings keyed by <pod_UID>_<container_name>.
type stringCache struct {
lock sync.RWMutex
cache *lru.Cache
}
func (self *stringCache) composeKey(uid types.UID, name string) string {
return fmt.Sprintf("%s_%s", uid, name)
}
func (self *stringCache) Add(uid types.UID, name string, value string) {
self.lock.Lock()
defer self.lock.Unlock()
self.cache.Add(self.composeKey(uid, name), value)
}
func (self *stringCache) Remove(uid types.UID, name string) {
self.lock.Lock()
defer self.lock.Unlock()
self.cache.Remove(self.composeKey(uid, name))
}
func (self *stringCache) Get(uid types.UID, name string) (string, bool) {
self.lock.RLock()
defer self.lock.RUnlock()
value, ok := self.cache.Get(self.composeKey(uid, name))
if ok {
return value.(string), ok
} else {
return "", ok
}
}
// GetKubeletDockerContainerLogs returns logs of a specific container. By
@ -118,7 +172,6 @@ func (self *DockerManager) inspectContainer(dockerID, containerName, tPath strin
ContainerID: DockerPrefix + dockerID,
}
waiting := true
if inspectResult.State.Running {
result.status.State.Running = &api.ContainerStateRunning{
StartedAt: util.NewTime(inspectResult.State.StartedAt),
@ -126,7 +179,6 @@ func (self *DockerManager) inspectContainer(dockerID, containerName, tPath strin
if containerName == PodInfraContainerName && inspectResult.NetworkSettings != nil {
result.ip = inspectResult.NetworkSettings.IPAddress
}
waiting = false
} else if !inspectResult.State.FinishedAt.IsZero() {
reason := ""
// Note: An application might handle OOMKilled gracefully.
@ -155,13 +207,8 @@ func (self *DockerManager) inspectContainer(dockerID, containerName, tPath strin
}
}
}
waiting = false
}
if waiting {
} else {
// TODO(dchen1107): Separate issue docker/docker#8294 was filed
// TODO(dchen1107): Need to figure out why we are still waiting
// Check any issue to run container
result.status.State.Waiting = &api.ContainerStateWaiting{
Reason: ErrContainerCannotRun.Error(),
}
@ -233,31 +280,27 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
return nil, result.err
}
// Add user container information
if dockerContainerName == PodInfraContainerName &&
result.status.State.Running != nil {
if dockerContainerName == PodInfraContainerName {
// Found network container
if result.status.State.Running != nil {
podStatus.PodIP = result.ip
}
} else {
// Add user container information.
statuses[dockerContainerName] = result.status
}
}
if len(statuses) == 0 && podStatus.PodIP == "" {
return nil, ErrNoContainersInPod
}
// Not all containers expected are created, check if there are
// image related issues
if len(statuses) < len(manifest.Containers) {
var containerStatus api.ContainerStatus
for _, container := range manifest.Containers {
if _, found := statuses[container.Name]; found {
continue
}
var containerStatus api.ContainerStatus
if status, found := statuses[container.Name]; found {
containerStatus = status
} else {
// The container has not been created yet. Check image is ready on
// the node or not.
// TODO: If we integrate DockerPuller into DockerManager, we can
// record the pull failure and eliminate the image checking below.
image := container.Image
// Check image is ready on the node or not
// TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists
_, err := self.client.InspectImage(image)
if err == nil {
@ -268,15 +311,16 @@ func (self *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
containerStatus.State.Waiting = &api.ContainerStateWaiting{
Reason: fmt.Sprintf("Image: %s is not ready on the node", image),
}
} else {
containerStatus.State.Waiting = &api.ContainerStateWaiting{
Reason: "",
}
}
if containerStatus.State.Waiting != nil {
// For containers in the waiting state, fill in a specific reason if it is recorded.
if reason, ok := self.reasonCache.Get(uid, container.Name); ok {
containerStatus.State.Waiting.Reason = reason
}
}
statuses[container.Name] = containerStatus
}
}
podStatus.ContainerStatuses = make([]api.ContainerStatus, 0)
for _, status := range statuses {
@ -304,6 +348,19 @@ func (self *DockerManager) GetRunningContainers(ids []string) ([]*docker.Contain
}
func (self *DockerManager) RunContainer(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions) (string, error) {
dockerID, err := self.runContainer(pod, container, opts)
if err != nil {
errString := err.Error()
if errString != "" {
self.reasonCache.Add(pod.UID, container.Name, errString)
} else {
self.reasonCache.Remove(pod.UID, container.Name)
}
}
return dockerID, err
}
func (self *DockerManager) runContainer(pod *api.Pod, container *api.Container, opts *kubecontainer.RunContainerOptions) (string, error) {
ref, err := kubecontainer.GenerateContainerRef(pod, container)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err)

View File

@ -1698,18 +1698,16 @@ func (kl *Kubelet) validateContainerStatus(podStatus *api.PodStatus, containerNa
func (kl *Kubelet) GetKubeletContainerLogs(podFullName, containerName, tail string, follow bool, stdout, stderr io.Writer) error {
podStatus, err := kl.GetPodStatus(podFullName)
if err != nil {
if err == dockertools.ErrNoContainersInPod {
return fmt.Errorf("pod %q not found\n", podFullName)
} else {
return fmt.Errorf("failed to get status for pod %q - %v", podFullName, err)
}
}
if err := kl.validatePodPhase(&podStatus); err != nil {
// No log is available if pod is not in a "known" phase (e.g. Unknown).
return err
}
dockerContainerID, err := kl.validateContainerStatus(&podStatus, containerName)
if err != nil {
// No log is available if the container status is missing or is in the
// waiting state.
return err
}
return kl.containerManager.GetKubeletDockerContainerLogs(dockerContainerID, tail, follow, stdout, stderr)

View File

@ -69,7 +69,7 @@ type TestKubelet struct {
}
func newTestKubelet(t *testing.T) *TestKubelet {
fakeDocker := &dockertools.FakeDockerClient{RemovedImages: util.StringSet{}}
fakeDocker := &dockertools.FakeDockerClient{Errors: make(map[string]error), RemovedImages: util.StringSet{}}
fakeDockerCache := dockertools.NewFakeDockerCache(fakeDocker)
fakeRecorder := &record.FakeRecorder{}
fakeKubeClient := &testclient.Fake{}
@ -366,7 +366,7 @@ func TestKillContainerWithError(t *testing.T) {
},
}
fakeDocker := &dockertools.FakeDockerClient{
Err: fmt.Errorf("sample error"),
Errors: make(map[string]error),
ContainerList: append([]docker.APIContainers{}, containers...),
}
testKubelet := newTestKubelet(t)
@ -376,6 +376,7 @@ func TestKillContainerWithError(t *testing.T) {
}
kubelet.dockerClient = fakeDocker
c := apiContainerToContainer(fakeDocker.ContainerList[0])
fakeDocker.Errors["stop"] = fmt.Errorf("sample error")
err := kubelet.killContainer(&c)
if err == nil {
t.Errorf("expected error, found nil")
@ -512,7 +513,7 @@ func TestSyncPodsWithTerminationLog(t *testing.T) {
}
waitGroup.Wait()
verifyCalls(t, fakeDocker, []string{
"list", "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
"list", "list", "list", "inspect_image", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
fakeDocker.Lock()
parts := strings.Split(fakeDocker.Container.HostConfig.Binds[0], ":")
@ -564,7 +565,7 @@ func TestSyncPodsCreatesNetAndContainer(t *testing.T) {
waitGroup.Wait()
verifyCalls(t, fakeDocker, []string{
"list", "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
"list", "list", "list", "inspect_image", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
fakeDocker.Lock()
@ -619,7 +620,7 @@ func TestSyncPodsCreatesNetAndContainerPullsImage(t *testing.T) {
waitGroup.Wait()
verifyCalls(t, fakeDocker, []string{
"list", "list", "list", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
"list", "list", "list", "inspect_image", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
fakeDocker.Lock()
@ -671,7 +672,7 @@ func TestSyncPodsWithPodInfraCreatesContainer(t *testing.T) {
waitGroup.Wait()
verifyCalls(t, fakeDocker, []string{
"list", "list", "list", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
"list", "list", "list", "inspect_container", "inspect_image", "create", "start", "list", "inspect_container", "inspect_container"})
fakeDocker.Lock()
if len(fakeDocker.Created) != 1 ||
@ -730,7 +731,7 @@ func TestSyncPodsWithPodInfraCreatesContainerCallsHandler(t *testing.T) {
waitGroup.Wait()
verifyCalls(t, fakeDocker, []string{
"list", "list", "list", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
"list", "list", "list", "inspect_container", "inspect_image", "create", "start", "list", "inspect_container", "inspect_container"})
fakeDocker.Lock()
if len(fakeDocker.Created) != 1 ||
@ -948,7 +949,7 @@ func TestSyncPodDeletesDuplicate(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
verifyCalls(t, fakeDocker, []string{"list", "stop", "list"})
verifyCalls(t, fakeDocker, []string{"list", "inspect_image", "stop", "list", "inspect_image"})
// Expect one of the duplicates to be killed.
if len(fakeDocker.Stopped) != 1 || (fakeDocker.Stopped[0] != "1234" && fakeDocker.Stopped[0] != "4567") {
t.Errorf("Wrong containers were stopped: %v", fakeDocker.Stopped)
@ -990,8 +991,8 @@ func TestSyncPodBadHash(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
//verifyCalls(t, fakeDocker, []string{"list", "stop", "list", "create", "start", "stop", "create", "start", "inspect_container"})
verifyCalls(t, fakeDocker, []string{"list", "stop", "stop", "create", "start", "inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
verifyCalls(t, fakeDocker, []string{"list", "inspect_image", "stop", "stop", "create", "start",
"inspect_container", "create", "start", "list", "inspect_container", "inspect_container"})
// A map interation is used to delete containers, so must not depend on
// order here.
@ -1045,7 +1046,7 @@ func TestSyncPodUnhealthy(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
verifyCalls(t, fakeDocker, []string{"list", "stop", "create", "start", "list", "inspect_container"})
verifyCalls(t, fakeDocker, []string{"list", "inspect_image", "stop", "create", "start", "list", "inspect_container"})
// A map interation is used to delete containers, so must not depend on
// order here.
@ -1638,7 +1639,7 @@ func TestSyncPodEventHandlerFails(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
verifyCalls(t, fakeDocker, []string{"list", "create", "start", "stop", "list"})
verifyCalls(t, fakeDocker, []string{"list", "inspect_image", "create", "start", "stop", "list", "inspect_image"})
if len(fakeDocker.Stopped) != 1 {
t.Errorf("Wrong containers were stopped: %v", fakeDocker.Stopped)
@ -3792,3 +3793,44 @@ func TestGetPodStatusWithLastTermination(t *testing.T) {
}
}
}
func TestGetPodCreationFailureReason(t *testing.T) {
testKubelet := newTestKubelet(t)
kubelet := testKubelet.kubelet
fakeDocker := testKubelet.fakeDocker
failureReason := "creation failure"
fakeDocker.Errors["create"] = fmt.Errorf("%s", failureReason)
fakeDocker.ContainerList = []docker.APIContainers{}
pod := api.Pod{
ObjectMeta: api.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: api.PodSpec{
Containers: []api.Container{
{Name: "foo"},
},
},
}
pods := []api.Pod{pod}
kubelet.podManager.SetPods(pods)
_, err := kubelet.runContainer(&pod, &pod.Spec.Containers[0], make(map[string]volume.Volume), "", "")
if err == nil {
t.Errorf("expected error, found nil")
}
status, err := kubelet.GetPodStatus(kubecontainer.GetPodFullName(&pod))
if err != nil {
t.Errorf("unexpected error %v", err)
}
if len(status.ContainerStatuses) < 1 {
t.Errorf("expected 1 container status, got %d", len(status.ContainerStatuses))
} else {
state := status.ContainerStatuses[0].State
if state.Waiting == nil {
t.Errorf("expected waiting state, got %#v", state)
} else if state.Waiting.Reason != failureReason {
t.Errorf("expected reason %q, got %q", state.Waiting.Reason)
}
}
}