mirror of https://github.com/k3s-io/k3s
Merge pull request #23894 from vishh/oom-score-adj-error
Automatic merge from submit-queue Do not throw creation errors for containers that fail immediately after being started Fixes (hopefully) #23607 cc @dchen1107pull/6/head
commit
2382fec956
|
@ -20,7 +20,6 @@ package oom
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
@ -49,16 +48,6 @@ func getPids(cgroupName string) ([]int, error) {
|
||||||
return fsManager.GetPids()
|
return fsManager.GetPids()
|
||||||
}
|
}
|
||||||
|
|
||||||
func syscallNotExists(err error) bool {
|
|
||||||
if err == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if e, ok := err.(*os.SyscallError); ok && os.IsNotExist(e) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
|
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
|
||||||
// Returns os.ErrNotExist if the `pid` does not exist.
|
// Returns os.ErrNotExist if the `pid` does not exist.
|
||||||
func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
|
func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
|
||||||
|
@ -78,12 +67,19 @@ func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
|
||||||
value := strconv.Itoa(oomScoreAdj)
|
value := strconv.Itoa(oomScoreAdj)
|
||||||
var err error
|
var err error
|
||||||
for i := 0; i < maxTries; i++ {
|
for i := 0; i < maxTries; i++ {
|
||||||
if err = ioutil.WriteFile(oomScoreAdjPath, []byte(value), 0700); err != nil {
|
f, err := os.Open(oomScoreAdjPath)
|
||||||
if syscallNotExists(err) {
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
return os.ErrNotExist
|
return os.ErrNotExist
|
||||||
}
|
}
|
||||||
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
if _, err := f.Write([]byte(value)); err != nil {
|
||||||
|
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -96,20 +92,26 @@ func (oomAdjuster *OOMAdjuster) applyOOMScoreAdjContainer(cgroupName string, oom
|
||||||
continueAdjusting := false
|
continueAdjusting := false
|
||||||
pidList, err := oomAdjuster.pidLister(cgroupName)
|
pidList, err := oomAdjuster.pidLister(cgroupName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if syscallNotExists(err) {
|
if os.IsNotExist(err) {
|
||||||
// Nothing to do since the container doesn't exist anymore.
|
// Nothing to do since the container doesn't exist anymore.
|
||||||
return os.ErrNotExist
|
return os.ErrNotExist
|
||||||
}
|
}
|
||||||
continueAdjusting = true
|
continueAdjusting = true
|
||||||
glog.Errorf("Error getting process list for cgroup %s: %+v", cgroupName, err)
|
glog.V(10).Infof("Error getting process list for cgroup %s: %+v", cgroupName, err)
|
||||||
} else if len(pidList) == 0 {
|
} else if len(pidList) == 0 {
|
||||||
|
glog.V(10).Infof("Pid list is empty")
|
||||||
continueAdjusting = true
|
continueAdjusting = true
|
||||||
} else {
|
} else {
|
||||||
for _, pid := range pidList {
|
for _, pid := range pidList {
|
||||||
if !adjustedProcessSet[pid] {
|
if !adjustedProcessSet[pid] {
|
||||||
continueAdjusting = true
|
glog.V(10).Infof("pid %d needs to be set", pid)
|
||||||
if err = oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err == nil {
|
if err = oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err == nil {
|
||||||
adjustedProcessSet[pid] = true
|
adjustedProcessSet[pid] = true
|
||||||
|
} else if err == os.ErrNotExist {
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
glog.V(10).Infof("cannot adjust oom score for pid %d - %v", pid, err)
|
||||||
|
continueAdjusting = true
|
||||||
}
|
}
|
||||||
// Processes can come and go while we try to apply oom score adjust value. So ignore errors here.
|
// Processes can come and go while we try to apply oom score adjust value. So ignore errors here.
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,10 @@ limitations under the License.
|
||||||
package oom
|
package oom
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Converts a sequence of PID lists into a PID lister.
|
// Converts a sequence of PID lists into a PID lister.
|
||||||
|
@ -62,10 +65,9 @@ func applyOOMScoreAdjContainerTester(pidListSequence [][]int, maxTries int, appl
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that OOM scores were applied to the right processes.
|
// Check that OOM scores were applied to the right processes.
|
||||||
if len(appliedPids) != len(pidOOMs) {
|
if len(appliedPids) != len(pidOOMs) {
|
||||||
t.Errorf("Applied OOM scores to incorrect number of processes")
|
t.Errorf("Applied OOM scores to incorrect number of processes - %+v vs %v", appliedPids, pidOOMs)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, pid := range appliedPids {
|
for _, pid := range appliedPids {
|
||||||
|
@ -82,29 +84,21 @@ func TestOOMScoreAdjContainer(t *testing.T) {
|
||||||
pidListSequence1 := [][]int{
|
pidListSequence1 := [][]int{
|
||||||
{1, 2},
|
{1, 2},
|
||||||
}
|
}
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence1, 1, nil, true, t)
|
applyOOMScoreAdjContainerTester(pidListSequence1, 1, []int{1, 2}, false, t)
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence1, 2, []int{1, 2}, false, t)
|
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence1, 3, []int{1, 2}, false, t)
|
|
||||||
|
|
||||||
pidListSequence3 := [][]int{
|
|
||||||
{1, 2},
|
|
||||||
{1, 2, 4, 5},
|
|
||||||
{2, 1, 4, 5, 3},
|
|
||||||
}
|
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence3, 1, nil, true, t)
|
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence3, 2, nil, true, t)
|
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence3, 3, nil, true, t)
|
|
||||||
applyOOMScoreAdjContainerTester(pidListSequence3, 4, []int{1, 2, 3, 4, 5}, false, t)
|
|
||||||
|
|
||||||
pidListSequenceLag := [][]int{
|
pidListSequenceLag := [][]int{
|
||||||
{},
|
{},
|
||||||
{},
|
{},
|
||||||
{},
|
{},
|
||||||
{1, 2, 4},
|
{1, 2, 4},
|
||||||
{1, 2, 4, 5},
|
|
||||||
}
|
}
|
||||||
for i := 1; i < 5; i++ {
|
for i := 1; i < 4; i++ {
|
||||||
applyOOMScoreAdjContainerTester(pidListSequenceLag, i, nil, true, t)
|
applyOOMScoreAdjContainerTester(pidListSequenceLag, i, nil, true, t)
|
||||||
}
|
}
|
||||||
applyOOMScoreAdjContainerTester(pidListSequenceLag, 6, []int{1, 2, 4, 5}, false, t)
|
applyOOMScoreAdjContainerTester(pidListSequenceLag, 4, []int{1, 2, 4}, false, t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPidListerFailure(t *testing.T) {
|
||||||
|
_, err := getPids("/does/not/exist")
|
||||||
|
assert.True(t, os.IsNotExist(err), "expected getPids to return not exists error. Got %v", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ func (pfs *ProcFS) GetFullContainerName(pid int) (string, error) {
|
||||||
filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup")
|
filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup")
|
||||||
content, err := ioutil.ReadFile(filePath)
|
content, err := ioutil.ReadFile(filePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if e, ok := err.(*os.SyscallError); ok && os.IsNotExist(e) {
|
if os.IsNotExist(err) {
|
||||||
return "", os.ErrNotExist
|
return "", os.ErrNotExist
|
||||||
}
|
}
|
||||||
return "", err
|
return "", err
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016 The Kubernetes Authors All rights reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package e2e_node
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/kubernetes/pkg/api"
|
||||||
|
"k8s.io/kubernetes/pkg/client/restclient"
|
||||||
|
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||||
|
|
||||||
|
. "github.com/onsi/ginkgo"
|
||||||
|
. "github.com/onsi/gomega"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ = Describe("Kubelet Container Manager", func() {
|
||||||
|
var cl *client.Client
|
||||||
|
BeforeEach(func() {
|
||||||
|
// Setup the apiserver client
|
||||||
|
cl = client.NewOrDie(&restclient.Config{Host: *apiServerAddress})
|
||||||
|
})
|
||||||
|
Describe("oom score adjusting", func() {
|
||||||
|
namespace := "oom-adj"
|
||||||
|
Context("when scheduling a busybox command that always fails in a pod", func() {
|
||||||
|
podName := "bin-false"
|
||||||
|
It("it should return succes", func() {
|
||||||
|
pod := &api.Pod{
|
||||||
|
ObjectMeta: api.ObjectMeta{
|
||||||
|
Name: podName,
|
||||||
|
Namespace: namespace,
|
||||||
|
},
|
||||||
|
Spec: api.PodSpec{
|
||||||
|
// Force the Pod to schedule to the node without a scheduler running
|
||||||
|
NodeName: *nodeName,
|
||||||
|
// Don't restart the Pod since it is expected to exit
|
||||||
|
RestartPolicy: api.RestartPolicyNever,
|
||||||
|
Containers: []api.Container{
|
||||||
|
{
|
||||||
|
Image: "gcr.io/google_containers/busybox:1.24",
|
||||||
|
Name: podName,
|
||||||
|
Command: []string{"/bin/false"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_, err := cl.Pods(namespace).Create(pod)
|
||||||
|
Expect(err).To(BeNil(), fmt.Sprintf("Error creating Pod %v", err))
|
||||||
|
})
|
||||||
|
|
||||||
|
It("it should have an error terminated reason", func() {
|
||||||
|
Eventually(func() error {
|
||||||
|
podData, err := cl.Pods(namespace).Get(podName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(podData.Status.ContainerStatuses) != 1 {
|
||||||
|
return fmt.Errorf("expected only one container in the pod %q", podName)
|
||||||
|
}
|
||||||
|
contTerminatedState := podData.Status.ContainerStatuses[0].State.Terminated
|
||||||
|
if contTerminatedState == nil {
|
||||||
|
return fmt.Errorf("expected state to be terminated. Got pod status: %+v", podData.Status)
|
||||||
|
}
|
||||||
|
if contTerminatedState.Reason != "Error" {
|
||||||
|
return fmt.Errorf("expected terminated state reason to be error. Got %+v", contTerminatedState)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, time.Minute, time.Second*4).Should(BeNil())
|
||||||
|
})
|
||||||
|
|
||||||
|
It("it should be possible to delete", func() {
|
||||||
|
err := cl.Pods(namespace).Delete(podName, &api.DeleteOptions{})
|
||||||
|
Expect(err).To(BeNil(), fmt.Sprintf("Error deleting Pod %v", err))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
})
|
Loading…
Reference in New Issue