diff --git a/pkg/util/oom/oom_linux.go b/pkg/util/oom/oom_linux.go index 90a753c169..c054682bdd 100644 --- a/pkg/util/oom/oom_linux.go +++ b/pkg/util/oom/oom_linux.go @@ -20,7 +20,6 @@ package oom import ( "fmt" - "io/ioutil" "os" "path" "strconv" @@ -49,16 +48,6 @@ func getPids(cgroupName string) ([]int, error) { return fsManager.GetPids() } -func syscallNotExists(err error) bool { - if err == nil { - return false - } - if e, ok := err.(*os.SyscallError); ok && os.IsNotExist(e) { - return true - } - return false -} - // Writes 'value' to /proc//oom_score_adj. PID = 0 means self // Returns os.ErrNotExist if the `pid` does not exist. func applyOOMScoreAdj(pid int, oomScoreAdj int) error { @@ -78,12 +67,19 @@ func applyOOMScoreAdj(pid int, oomScoreAdj int) error { value := strconv.Itoa(oomScoreAdj) var err error for i := 0; i < maxTries; i++ { - if err = ioutil.WriteFile(oomScoreAdjPath, []byte(value), 0700); err != nil { - if syscallNotExists(err) { + f, err := os.Open(oomScoreAdjPath) + if err != nil { + if os.IsNotExist(err) { return os.ErrNotExist } err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err) + continue } + if _, err := f.Write([]byte(value)); err != nil { + err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err) + continue + } + return nil } return err } @@ -96,20 +92,26 @@ func (oomAdjuster *OOMAdjuster) applyOOMScoreAdjContainer(cgroupName string, oom continueAdjusting := false pidList, err := oomAdjuster.pidLister(cgroupName) if err != nil { - if syscallNotExists(err) { + if os.IsNotExist(err) { // Nothing to do since the container doesn't exist anymore. return os.ErrNotExist } continueAdjusting = true - glog.Errorf("Error getting process list for cgroup %s: %+v", cgroupName, err) + glog.V(10).Infof("Error getting process list for cgroup %s: %+v", cgroupName, err) } else if len(pidList) == 0 { + glog.V(10).Infof("Pid list is empty") continueAdjusting = true } else { for _, pid := range pidList { if !adjustedProcessSet[pid] { - continueAdjusting = true + glog.V(10).Infof("pid %d needs to be set", pid) if err = oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err == nil { adjustedProcessSet[pid] = true + } else if err == os.ErrNotExist { + continue + } else { + glog.V(10).Infof("cannot adjust oom score for pid %d - %v", pid, err) + continueAdjusting = true } // Processes can come and go while we try to apply oom score adjust value. So ignore errors here. } diff --git a/pkg/util/oom/oom_linux_test.go b/pkg/util/oom/oom_linux_test.go index c3a850d91b..10e9f3d9ec 100644 --- a/pkg/util/oom/oom_linux_test.go +++ b/pkg/util/oom/oom_linux_test.go @@ -19,7 +19,10 @@ limitations under the License. package oom import ( + "os" "testing" + + "github.com/stretchr/testify/assert" ) // Converts a sequence of PID lists into a PID lister. @@ -62,10 +65,9 @@ func applyOOMScoreAdjContainerTester(pidListSequence [][]int, maxTries int, appl } else if err != nil { return } - // Check that OOM scores were applied to the right processes. if len(appliedPids) != len(pidOOMs) { - t.Errorf("Applied OOM scores to incorrect number of processes") + t.Errorf("Applied OOM scores to incorrect number of processes - %+v vs %v", appliedPids, pidOOMs) return } for _, pid := range appliedPids { @@ -82,29 +84,21 @@ func TestOOMScoreAdjContainer(t *testing.T) { pidListSequence1 := [][]int{ {1, 2}, } - applyOOMScoreAdjContainerTester(pidListSequence1, 1, nil, true, t) - applyOOMScoreAdjContainerTester(pidListSequence1, 2, []int{1, 2}, false, t) - applyOOMScoreAdjContainerTester(pidListSequence1, 3, []int{1, 2}, false, t) - - pidListSequence3 := [][]int{ - {1, 2}, - {1, 2, 4, 5}, - {2, 1, 4, 5, 3}, - } - applyOOMScoreAdjContainerTester(pidListSequence3, 1, nil, true, t) - applyOOMScoreAdjContainerTester(pidListSequence3, 2, nil, true, t) - applyOOMScoreAdjContainerTester(pidListSequence3, 3, nil, true, t) - applyOOMScoreAdjContainerTester(pidListSequence3, 4, []int{1, 2, 3, 4, 5}, false, t) + applyOOMScoreAdjContainerTester(pidListSequence1, 1, []int{1, 2}, false, t) pidListSequenceLag := [][]int{ {}, {}, {}, {1, 2, 4}, - {1, 2, 4, 5}, } - for i := 1; i < 5; i++ { + for i := 1; i < 4; i++ { applyOOMScoreAdjContainerTester(pidListSequenceLag, i, nil, true, t) } - applyOOMScoreAdjContainerTester(pidListSequenceLag, 6, []int{1, 2, 4, 5}, false, t) + applyOOMScoreAdjContainerTester(pidListSequenceLag, 4, []int{1, 2, 4}, false, t) +} + +func TestPidListerFailure(t *testing.T) { + _, err := getPids("/does/not/exist") + assert.True(t, os.IsNotExist(err), "expected getPids to return not exists error. Got %v", err) } diff --git a/pkg/util/procfs/procfs.go b/pkg/util/procfs/procfs.go index cc432255fb..d0c38bf21e 100644 --- a/pkg/util/procfs/procfs.go +++ b/pkg/util/procfs/procfs.go @@ -49,7 +49,7 @@ func (pfs *ProcFS) GetFullContainerName(pid int) (string, error) { filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup") content, err := ioutil.ReadFile(filePath) if err != nil { - if e, ok := err.(*os.SyscallError); ok && os.IsNotExist(e) { + if os.IsNotExist(err) { return "", os.ErrNotExist } return "", err diff --git a/test/e2e_node/container_manager_test.go b/test/e2e_node/container_manager_test.go new file mode 100644 index 0000000000..0ae0887556 --- /dev/null +++ b/test/e2e_node/container_manager_test.go @@ -0,0 +1,92 @@ +/* +Copyright 2016 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e_node + +import ( + "fmt" + "time" + + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/client/restclient" + client "k8s.io/kubernetes/pkg/client/unversioned" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = Describe("Kubelet Container Manager", func() { + var cl *client.Client + BeforeEach(func() { + // Setup the apiserver client + cl = client.NewOrDie(&restclient.Config{Host: *apiServerAddress}) + }) + Describe("oom score adjusting", func() { + namespace := "oom-adj" + Context("when scheduling a busybox command that always fails in a pod", func() { + podName := "bin-false" + It("it should return succes", func() { + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: podName, + Namespace: namespace, + }, + Spec: api.PodSpec{ + // Force the Pod to schedule to the node without a scheduler running + NodeName: *nodeName, + // Don't restart the Pod since it is expected to exit + RestartPolicy: api.RestartPolicyNever, + Containers: []api.Container{ + { + Image: "gcr.io/google_containers/busybox:1.24", + Name: podName, + Command: []string{"/bin/false"}, + }, + }, + }, + } + _, err := cl.Pods(namespace).Create(pod) + Expect(err).To(BeNil(), fmt.Sprintf("Error creating Pod %v", err)) + }) + + It("it should have an error terminated reason", func() { + Eventually(func() error { + podData, err := cl.Pods(namespace).Get(podName) + if err != nil { + return err + } + if len(podData.Status.ContainerStatuses) != 1 { + return fmt.Errorf("expected only one container in the pod %q", podName) + } + contTerminatedState := podData.Status.ContainerStatuses[0].State.Terminated + if contTerminatedState == nil { + return fmt.Errorf("expected state to be terminated. Got pod status: %+v", podData.Status) + } + if contTerminatedState.Reason != "Error" { + return fmt.Errorf("expected terminated state reason to be error. Got %+v", contTerminatedState) + } + return nil + }, time.Minute, time.Second*4).Should(BeNil()) + }) + + It("it should be possible to delete", func() { + err := cl.Pods(namespace).Delete(podName, &api.DeleteOptions{}) + Expect(err).To(BeNil(), fmt.Sprintf("Error deleting Pod %v", err)) + }) + }) + }) + +})