mirror of https://github.com/k3s-io/k3s
Revert "[kubelet] Fix oom-score-adj policy in kubelet"
parent
0d8db69660
commit
492ca3bc9c
2
Makefile
2
Makefile
|
@ -156,7 +156,7 @@ test-e2e: ginkgo generated_files
|
||||||
# DELETE_INSTANCES: For REMOTE=true only. Delete any instances created as
|
# DELETE_INSTANCES: For REMOTE=true only. Delete any instances created as
|
||||||
# part of this test run. Defaults to false.
|
# part of this test run. Defaults to false.
|
||||||
# ARTIFACTS: For REMOTE=true only. Local directory to scp test artifacts into
|
# ARTIFACTS: For REMOTE=true only. Local directory to scp test artifacts into
|
||||||
# from the remote hosts. Defaults to "/tmp/_artifacts".
|
# from the remote hosts. Defaults to ""/tmp/_artifacts".
|
||||||
# REPORT: For REMOTE=false only. Local directory to write juntil xml results
|
# REPORT: For REMOTE=false only. Local directory to write juntil xml results
|
||||||
# to. Defaults to "/tmp/".
|
# to. Defaults to "/tmp/".
|
||||||
# CLEANUP: For REMOTE=true only. If false, do not stop processes or delete
|
# CLEANUP: For REMOTE=true only. If false, do not stop processes or delete
|
||||||
|
|
|
@ -226,7 +226,6 @@ Pod OOM score configuration
|
||||||
|
|
||||||
*Pod infra containers* or *Special Pod init process*
|
*Pod infra containers* or *Special Pod init process*
|
||||||
- OOM_SCORE_ADJ: -998
|
- OOM_SCORE_ADJ: -998
|
||||||
|
|
||||||
*Kubelet, Docker*
|
*Kubelet, Docker*
|
||||||
- OOM_SCORE_ADJ: -999 (won’t be OOM killed)
|
- OOM_SCORE_ADJ: -999 (won’t be OOM killed)
|
||||||
- Hack, because these critical tasks might die if they conflict with guaranteed containers. In the future, we should place all user-pods into a separate cgroup, and set a limit on the memory they can consume.
|
- Hack, because these critical tasks might die if they conflict with guaranteed containers. In the future, we should place all user-pods into a separate cgroup, and set a limit on the memory they can consume.
|
||||||
|
|
|
@ -365,7 +365,7 @@ The `kubelet` will set the following:
|
||||||
The `kubelet` at bootstrapping will set the `oom_score_adj` value for Kubernetes
|
The `kubelet` at bootstrapping will set the `oom_score_adj` value for Kubernetes
|
||||||
daemons, and any dependent container-runtime daemons.
|
daemons, and any dependent container-runtime daemons.
|
||||||
|
|
||||||
If `container-runtime` is set to `docker`, then set its `oom_score_adj=-999`
|
If `container-runtime` is set to `docker`, then set its `oom_score_adj=-900`
|
||||||
|
|
||||||
## Implementation concerns
|
## Implementation concerns
|
||||||
|
|
||||||
|
|
|
@ -100,8 +100,6 @@ pkg/kubelet/api/v1alpha1/runtime/api.pb.go: ContainerPort *int32 `protobuf:"vari
|
||||||
pkg/kubelet/api/v1alpha1/runtime/api.pb.go: OomScoreAdj *int64 `protobuf:"varint,5,opt,name=oom_score_adj,json=oomScoreAdj" json:"oom_score_adj,omitempty"`
|
pkg/kubelet/api/v1alpha1/runtime/api.pb.go: OomScoreAdj *int64 `protobuf:"varint,5,opt,name=oom_score_adj,json=oomScoreAdj" json:"oom_score_adj,omitempty"`
|
||||||
pkg/kubelet/api/v1alpha1/runtime/api.proto: optional int32 container_port = 3;
|
pkg/kubelet/api/v1alpha1/runtime/api.proto: optional int32 container_port = 3;
|
||||||
pkg/kubelet/api/v1alpha1/runtime/api.proto: optional int64 oom_score_adj = 5;
|
pkg/kubelet/api/v1alpha1/runtime/api.proto: optional int64 oom_score_adj = 5;
|
||||||
pkg/kubelet/cm/container_manager_linux.go: glog.V(3).Infof("Failed to apply oom_score_adj %d for pid %d: %v", oomScoreAdj, pid, err)
|
|
||||||
pkg/kubelet/cm/container_manager_linux.go: glog.V(5).Infof("attempting to apply oom_score_adj of %d to pid %d", oomScoreAdj, pid)
|
|
||||||
pkg/kubelet/network/hairpin/hairpin.go: hairpinModeRelativePath = "hairpin_mode"
|
pkg/kubelet/network/hairpin/hairpin.go: hairpinModeRelativePath = "hairpin_mode"
|
||||||
pkg/kubelet/qos/policy_test.go: t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOOMScoreAdj, test.highOOMScoreAdj, oomScoreAdj)
|
pkg/kubelet/qos/policy_test.go: t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOOMScoreAdj, test.highOOMScoreAdj, oomScoreAdj)
|
||||||
pkg/kubelet/qos/policy_test.go: highOOMScoreAdj int // The min oom_score_adj score the container should be assigned.
|
pkg/kubelet/qos/policy_test.go: highOOMScoreAdj int // The min oom_score_adj score the container should be assigned.
|
||||||
|
@ -119,12 +117,6 @@ test/e2e/common/host_path.go: fmt.Sprintf("--retry_time=%d", retryDuration),
|
||||||
test/e2e/es_cluster_logging.go: framework.Failf("No cluster_name field in Elasticsearch response: %v", esResponse)
|
test/e2e/es_cluster_logging.go: framework.Failf("No cluster_name field in Elasticsearch response: %v", esResponse)
|
||||||
test/e2e/es_cluster_logging.go: // Check to see if have a cluster_name field.
|
test/e2e/es_cluster_logging.go: // Check to see if have a cluster_name field.
|
||||||
test/e2e/es_cluster_logging.go: clusterName, ok := esResponse["cluster_name"]
|
test/e2e/es_cluster_logging.go: clusterName, ok := esResponse["cluster_name"]
|
||||||
test/e2e_node/container_manager_test.go: return fmt.Errorf("expected pid %d's oom_score_adj to be %d; found %d", pid, expectedOOMScoreAdj, oomScore)
|
|
||||||
test/e2e_node/container_manager_test.go: return fmt.Errorf("expected pid %d's oom_score_adj to be < %d; found %d", pid, expectedMaxOOMScoreAdj, oomScore)
|
|
||||||
test/e2e_node/container_manager_test.go: return fmt.Errorf("expected pid %d's oom_score_adj to be >= %d; found %d", pid, expectedMinOOMScoreAdj, oomScore)
|
|
||||||
test/e2e_node/container_manager_test.go: return fmt.Errorf("failed to get oom_score_adj for %d", pid)
|
|
||||||
test/e2e_node/container_manager_test.go: return fmt.Errorf("failed to get oom_score_adj for %d: %v", pid, err)
|
|
||||||
test/e2e_node/container_manager_test.go: procfsPath := path.Join("/proc", strconv.Itoa(pid), "oom_score_adj")
|
|
||||||
test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)")
|
test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)")
|
||||||
test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop")
|
test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop")
|
||||||
test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from")
|
test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from")
|
||||||
|
|
|
@ -326,7 +326,6 @@ func (cm *containerManagerImpl) setupNode() error {
|
||||||
|
|
||||||
systemContainers := []*systemContainer{}
|
systemContainers := []*systemContainer{}
|
||||||
if cm.ContainerRuntime == "docker" {
|
if cm.ContainerRuntime == "docker" {
|
||||||
dockerVersion := getDockerVersion(cm.cadvisorInterface)
|
|
||||||
if cm.RuntimeCgroupsName != "" {
|
if cm.RuntimeCgroupsName != "" {
|
||||||
cont := newSystemCgroups(cm.RuntimeCgroupsName)
|
cont := newSystemCgroups(cm.RuntimeCgroupsName)
|
||||||
var capacity = api.ResourceList{}
|
var capacity = api.ResourceList{}
|
||||||
|
@ -352,17 +351,13 @@ func (cm *containerManagerImpl) setupNode() error {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
dockerVersion := getDockerVersion(cm.cadvisorInterface)
|
||||||
cont.ensureStateFunc = func(manager *fs.Manager) error {
|
cont.ensureStateFunc = func(manager *fs.Manager) error {
|
||||||
return ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
|
return ensureDockerInContainer(dockerVersion, -900, dockerContainer)
|
||||||
}
|
}
|
||||||
systemContainers = append(systemContainers, cont)
|
systemContainers = append(systemContainers, cont)
|
||||||
} else {
|
} else {
|
||||||
cm.periodicTasks = append(cm.periodicTasks, func() {
|
cm.periodicTasks = append(cm.periodicTasks, func() {
|
||||||
glog.V(10).Infof("Adding docker daemon periodic tasks")
|
|
||||||
if err := ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
|
|
||||||
glog.Error(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
|
cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Error(err)
|
glog.Error(err)
|
||||||
|
@ -406,15 +401,11 @@ func (cm *containerManagerImpl) setupNode() error {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
cont.ensureStateFunc = func(_ *fs.Manager) error {
|
cont.ensureStateFunc = func(_ *fs.Manager) error {
|
||||||
return ensureProcessInContainerWithOOMScore(os.Getpid(), qos.KubeletOOMScoreAdj, &manager)
|
return manager.Apply(os.Getpid())
|
||||||
}
|
}
|
||||||
systemContainers = append(systemContainers, cont)
|
systemContainers = append(systemContainers, cont)
|
||||||
} else {
|
} else {
|
||||||
cm.periodicTasks = append(cm.periodicTasks, func() {
|
cm.periodicTasks = append(cm.periodicTasks, func() {
|
||||||
if err := ensureProcessInContainerWithOOMScore(os.Getpid(), qos.KubeletOOMScoreAdj, nil); err != nil {
|
|
||||||
glog.Error(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
cont, err := getContainer(os.Getpid())
|
cont, err := getContainer(os.Getpid())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
glog.Errorf("failed to find cgroups of kubelet - %v", err)
|
glog.Errorf("failed to find cgroups of kubelet - %v", err)
|
||||||
|
@ -525,18 +516,16 @@ func (cm *containerManagerImpl) SystemCgroupsLimit() api.ResourceList {
|
||||||
}
|
}
|
||||||
|
|
||||||
func isProcessRunningInHost(pid int) (bool, error) {
|
func isProcessRunningInHost(pid int) (bool, error) {
|
||||||
// Get init pid namespace.
|
// Get init mount namespace. Mount namespace is unique for all containers.
|
||||||
initPidNs, err := os.Readlink("/proc/1/ns/pid")
|
initMntNs, err := os.Readlink("/proc/1/ns/mnt")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to find pid namespace of init process")
|
return false, fmt.Errorf("failed to find mount namespace of init process")
|
||||||
}
|
}
|
||||||
glog.V(10).Infof("init pid ns is %q", initPidNs)
|
processMntNs, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/mnt", pid))
|
||||||
processPidNs, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", pid))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to find pid namespace of process %q", pid)
|
return false, fmt.Errorf("failed to find mount namespace of process %q", pid)
|
||||||
}
|
}
|
||||||
glog.V(10).Infof("Pid %d pid ns is %q", pid, processPidNs)
|
return initMntNs == processMntNs, nil
|
||||||
return initPidNs == processPidNs, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPidFromPidFile(pidFile string) (int, error) {
|
func getPidFromPidFile(pidFile string) (int, error) {
|
||||||
|
@ -578,6 +567,7 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
|
||||||
if dockerVersion.GTE(containerdVersion) {
|
if dockerVersion.GTE(containerdVersion) {
|
||||||
dockerProcs = append(dockerProcs, process{containerdProcessName, containerdPidFile})
|
dockerProcs = append(dockerProcs, process{containerdProcessName, containerdPidFile})
|
||||||
}
|
}
|
||||||
|
|
||||||
var errs []error
|
var errs []error
|
||||||
for _, proc := range dockerProcs {
|
for _, proc := range dockerProcs {
|
||||||
pids, err := getPidsForProcess(proc.name, proc.file)
|
pids, err := getPidsForProcess(proc.name, proc.file)
|
||||||
|
@ -588,7 +578,7 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
|
||||||
|
|
||||||
// Move if the pid is not already in the desired container.
|
// Move if the pid is not already in the desired container.
|
||||||
for _, pid := range pids {
|
for _, pid := range pids {
|
||||||
if err := ensureProcessInContainerWithOOMScore(pid, oomScoreAdj, manager); err != nil {
|
if err := ensureProcessInContainer(pid, oomScoreAdj, manager); err != nil {
|
||||||
errs = append(errs, fmt.Errorf("errors moving %q pid: %v", proc.name, err))
|
errs = append(errs, fmt.Errorf("errors moving %q pid: %v", proc.name, err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -596,37 +586,32 @@ func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, mana
|
||||||
return utilerrors.NewAggregate(errs)
|
return utilerrors.NewAggregate(errs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ensureProcessInContainerWithOOMScore(pid int, oomScoreAdj int, manager *fs.Manager) error {
|
func ensureProcessInContainer(pid int, oomScoreAdj int, manager *fs.Manager) error {
|
||||||
if runningInHost, err := isProcessRunningInHost(pid); err != nil {
|
if runningInHost, err := isProcessRunningInHost(pid); err != nil {
|
||||||
// Err on the side of caution. Avoid moving the docker daemon unless we are able to identify its context.
|
// Err on the side of caution. Avoid moving the docker daemon unless we are able to identify its context.
|
||||||
return err
|
return err
|
||||||
} else if !runningInHost {
|
} else if !runningInHost {
|
||||||
// Process is running inside a container. Don't touch that.
|
// Process is running inside a container. Don't touch that.
|
||||||
glog.V(2).Infof("pid %d is not running in the host namespaces", pid)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var errs []error
|
var errs []error
|
||||||
if manager != nil {
|
cont, err := getContainer(pid)
|
||||||
cont, err := getContainer(pid)
|
if err != nil {
|
||||||
if err != nil {
|
errs = append(errs, fmt.Errorf("failed to find container of PID %d: %v", pid, err))
|
||||||
errs = append(errs, fmt.Errorf("failed to find container of PID %d: %v", pid, err))
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if cont != manager.Cgroups.Name {
|
if cont != manager.Cgroups.Name {
|
||||||
err = manager.Apply(pid)
|
err = manager.Apply(pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q: %v", pid, cont, manager.Cgroups.Name, err))
|
errs = append(errs, fmt.Errorf("failed to move PID %d (in %q) to %q", pid, cont, manager.Cgroups.Name))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also apply oom-score-adj to processes
|
// Also apply oom-score-adj to processes
|
||||||
oomAdjuster := oom.NewOOMAdjuster()
|
oomAdjuster := oom.NewOOMAdjuster()
|
||||||
glog.V(5).Infof("attempting to apply oom_score_adj of %d to pid %d", oomScoreAdj, pid)
|
|
||||||
if err := oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err != nil {
|
if err := oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err != nil {
|
||||||
glog.V(3).Infof("Failed to apply oom_score_adj %d for pid %d: %v", oomScoreAdj, pid, err)
|
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d", oomScoreAdj, pid))
|
||||||
errs = append(errs, fmt.Errorf("failed to apply oom score %d to PID %d: %v", oomScoreAdj, pid, err))
|
|
||||||
}
|
}
|
||||||
return utilerrors.NewAggregate(errs)
|
return utilerrors.NewAggregate(errs)
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,8 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
PodInfraOOMAdj int = -998
|
PodInfraOOMAdj int = -999
|
||||||
KubeletOOMScoreAdj int = -999
|
KubeletOOMScoreAdj int = -999
|
||||||
DockerOOMScoreAdj int = -999
|
|
||||||
KubeProxyOOMScoreAdj int = -999
|
KubeProxyOOMScoreAdj int = -999
|
||||||
guaranteedOOMScoreAdj int = -998
|
guaranteedOOMScoreAdj int = -998
|
||||||
besteffortOOMScoreAdj int = 1000
|
besteffortOOMScoreAdj int = 1000
|
||||||
|
@ -54,10 +53,10 @@ func GetContainerOOMScoreAdjust(pod *api.Pod, container *api.Container, memoryCa
|
||||||
// Note that this is a heuristic, it won't work if a container has many small processes.
|
// Note that this is a heuristic, it won't work if a container has many small processes.
|
||||||
memoryRequest := container.Resources.Requests.Memory().Value()
|
memoryRequest := container.Resources.Requests.Memory().Value()
|
||||||
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
|
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
|
||||||
// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
|
// A guaranteed pod using 100% of memory can have an OOM score of 1. Ensure
|
||||||
// that burstable pods have a higher OOM score adjustment.
|
// that burstable pods have a higher OOM score adjustment.
|
||||||
if int(oomScoreAdjust) < (1000 + guaranteedOOMScoreAdj) {
|
if oomScoreAdjust < 2 {
|
||||||
return (1000 + guaranteedOOMScoreAdj)
|
return 2
|
||||||
}
|
}
|
||||||
// Give burstable pods a higher chance of survival over besteffort pods.
|
// Give burstable pods a higher chance of survival over besteffort pods.
|
||||||
if int(oomScoreAdjust) == besteffortOOMScoreAdj {
|
if int(oomScoreAdjust) == besteffortOOMScoreAdj {
|
||||||
|
|
|
@ -20,7 +20,6 @@ package oom
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
@ -66,24 +65,28 @@ func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
|
||||||
maxTries := 2
|
maxTries := 2
|
||||||
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
|
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
|
||||||
value := strconv.Itoa(oomScoreAdj)
|
value := strconv.Itoa(oomScoreAdj)
|
||||||
glog.V(4).Infof("attempting to set %q to %q", oomScoreAdjPath, value)
|
|
||||||
var err error
|
var err error
|
||||||
for i := 0; i < maxTries; i++ {
|
for i := 0; i < maxTries; i++ {
|
||||||
err = ioutil.WriteFile(oomScoreAdjPath, []byte(value), 0700)
|
f, err := os.Open(oomScoreAdjPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
glog.V(2).Infof("%q does not exist", oomScoreAdjPath)
|
|
||||||
return os.ErrNotExist
|
return os.ErrNotExist
|
||||||
}
|
}
|
||||||
|
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||||
glog.V(3).Info(err)
|
continue
|
||||||
|
}
|
||||||
|
if _, err := f.Write([]byte(value)); err != nil {
|
||||||
|
// we can ignore the return value of f.Close() here.
|
||||||
|
f.Close()
|
||||||
|
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err = f.Close(); err != nil {
|
||||||
|
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", pid, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if err != nil {
|
|
||||||
glog.V(2).Infof("failed to set %q to %q: %v", oomScoreAdjPath, value, err)
|
|
||||||
}
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,14 +18,9 @@ package e2e_node
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
|
||||||
"path"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/api/resource"
|
|
||||||
"k8s.io/kubernetes/pkg/util/uuid"
|
"k8s.io/kubernetes/pkg/util/uuid"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
|
|
||||||
|
@ -33,193 +28,63 @@ import (
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
var _ = framework.KubeDescribe("Kubelet Container Manager", func() {
|
||||||
kubeletProcessname = "kubelet"
|
|
||||||
)
|
|
||||||
|
|
||||||
func getOOMScoreForPid(pid int) (int, error) {
|
|
||||||
procfsPath := path.Join("/proc", strconv.Itoa(pid), "oom_score_adj")
|
|
||||||
out, err := exec.Command("sudo", "cat", procfsPath).CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
return strconv.Atoi(strings.TrimSpace(string(out)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateOOMScoreAdjSetting(pid int, expectedOOMScoreAdj int) error {
|
|
||||||
oomScore, err := getOOMScoreForPid(pid)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get oom_score_adj for %d: %v", pid, err)
|
|
||||||
}
|
|
||||||
if expectedOOMScoreAdj != oomScore {
|
|
||||||
return fmt.Errorf("expected pid %d's oom_score_adj to be %d; found %d", pid, expectedOOMScoreAdj, oomScore)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateOOMScoreAdjSettingIsInRange(pid int, expectedMinOOMScoreAdj, expectedMaxOOMScoreAdj int) error {
|
|
||||||
oomScore, err := getOOMScoreForPid(pid)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get oom_score_adj for %d", pid)
|
|
||||||
}
|
|
||||||
if oomScore < expectedMinOOMScoreAdj {
|
|
||||||
return fmt.Errorf("expected pid %d's oom_score_adj to be >= %d; found %d", pid, expectedMinOOMScoreAdj, oomScore)
|
|
||||||
}
|
|
||||||
if oomScore < expectedMaxOOMScoreAdj {
|
|
||||||
return fmt.Errorf("expected pid %d's oom_score_adj to be < %d; found %d", pid, expectedMaxOOMScoreAdj, oomScore)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var _ = framework.KubeDescribe("Kubelet Container Manager [Serial]", func() {
|
|
||||||
f := framework.NewDefaultFramework("kubelet-container-manager")
|
f := framework.NewDefaultFramework("kubelet-container-manager")
|
||||||
|
var podClient *framework.PodClient
|
||||||
|
|
||||||
Describe("Validate OOM score adjustments", func() {
|
BeforeEach(func() {
|
||||||
Context("once the node is setup", func() {
|
podClient = f.PodClient()
|
||||||
It("docker daemon's oom-score-adj should be -999", func() {
|
})
|
||||||
dockerPids, err := getPidsForProcess(dockerProcessName, dockerPidFile)
|
|
||||||
Expect(err).To(BeNil(), "failed to get list of docker daemon pids")
|
Describe("oom score adjusting", func() {
|
||||||
for _, pid := range dockerPids {
|
Context("when scheduling a busybox command that always fails in a pod", func() {
|
||||||
Eventually(func() error {
|
var podName string
|
||||||
return validateOOMScoreAdjSetting(pid, -999)
|
|
||||||
}, 5*time.Minute, 30*time.Second).Should(BeNil())
|
BeforeEach(func() {
|
||||||
}
|
podName = "bin-false" + string(uuid.NewUUID())
|
||||||
})
|
|
||||||
It("Kubelet's oom-score-adj should be -999", func() {
|
|
||||||
kubeletPids, err := getPidsForProcess(kubeletProcessName, "")
|
|
||||||
Expect(err).To(BeNil(), "failed to get list of kubelet pids")
|
|
||||||
Expect(len(kubeletPids)).To(Equal(1), "expected only one kubelet process; found %d", len(kubeletPids))
|
|
||||||
Eventually(func() error {
|
|
||||||
return validateOOMScoreAdjSetting(kubeletPids[0], -999)
|
|
||||||
}, 5*time.Minute, 30*time.Second).Should(BeNil())
|
|
||||||
})
|
|
||||||
It("pod infra containers oom-score-adj should be -998 and best effort container's should be 1000", func() {
|
|
||||||
var err error
|
|
||||||
podClient := f.PodClient()
|
|
||||||
podName := "besteffort" + string(uuid.NewUUID())
|
|
||||||
podClient.Create(&api.Pod{
|
podClient.Create(&api.Pod{
|
||||||
ObjectMeta: api.ObjectMeta{
|
ObjectMeta: api.ObjectMeta{
|
||||||
Name: podName,
|
Name: podName,
|
||||||
},
|
},
|
||||||
Spec: api.PodSpec{
|
Spec: api.PodSpec{
|
||||||
|
// Don't restart the Pod since it is expected to exit
|
||||||
|
RestartPolicy: api.RestartPolicyNever,
|
||||||
Containers: []api.Container{
|
Containers: []api.Container{
|
||||||
{
|
{
|
||||||
Image: ImageRegistry[serveHostnameImage],
|
Image: ImageRegistry[busyBoxImage],
|
||||||
Name: podName,
|
Name: podName,
|
||||||
|
Command: []string{"/bin/false"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
var pausePids []int
|
|
||||||
By("checking infra container's oom-score-adj")
|
|
||||||
Eventually(func() error {
|
|
||||||
pausePids, err = getPidsForProcess("pause", "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get list of pause pids: %v", err)
|
|
||||||
}
|
|
||||||
for _, pid := range pausePids {
|
|
||||||
if err := validateOOMScoreAdjSetting(pid, -998); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}, 2*time.Minute, time.Second*4).Should(BeNil())
|
|
||||||
var shPids []int
|
|
||||||
By("checking besteffort container's oom-score-adj")
|
|
||||||
Eventually(func() error {
|
|
||||||
shPids, err = getPidsForProcess("serve_hostname", "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get list of serve hostname process pids: %v", err)
|
|
||||||
}
|
|
||||||
if len(shPids) != 1 {
|
|
||||||
return fmt.Errorf("expected only one serve_hostname process; found %d", len(shPids))
|
|
||||||
}
|
|
||||||
return validateOOMScoreAdjSetting(shPids[0], 1000)
|
|
||||||
}, 2*time.Minute, time.Second*4).Should(BeNil())
|
|
||||||
|
|
||||||
})
|
})
|
||||||
It("guaranteed container's oom-score-adj should be -998", func() {
|
|
||||||
podClient := f.PodClient()
|
It("should have an error terminated reason", func() {
|
||||||
podName := "guaranteed" + string(uuid.NewUUID())
|
|
||||||
podClient.Create(&api.Pod{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: podName,
|
|
||||||
},
|
|
||||||
Spec: api.PodSpec{
|
|
||||||
Containers: []api.Container{
|
|
||||||
{
|
|
||||||
Image: ImageRegistry[nginxImage],
|
|
||||||
Name: podName,
|
|
||||||
Resources: api.ResourceRequirements{
|
|
||||||
Limits: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse("100m"),
|
|
||||||
"memory": resource.MustParse("50Mi"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
var (
|
|
||||||
ngPids []int
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
Eventually(func() error {
|
Eventually(func() error {
|
||||||
ngPids, err = getPidsForProcess("nginx", "")
|
podData, err := podClient.Get(podName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get list of nginx process pids: %v", err)
|
return err
|
||||||
}
|
}
|
||||||
for _, pid := range ngPids {
|
if len(podData.Status.ContainerStatuses) != 1 {
|
||||||
if err := validateOOMScoreAdjSetting(pid, -998); err != nil {
|
return fmt.Errorf("expected only one container in the pod %q", podName)
|
||||||
return err
|
}
|
||||||
}
|
contTerminatedState := podData.Status.ContainerStatuses[0].State.Terminated
|
||||||
|
if contTerminatedState == nil {
|
||||||
|
return fmt.Errorf("expected state to be terminated. Got pod status: %+v", podData.Status)
|
||||||
|
}
|
||||||
|
if contTerminatedState.Reason != "Error" {
|
||||||
|
return fmt.Errorf("expected terminated state reason to be error. Got %+v", contTerminatedState)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}, 2*time.Minute, time.Second*4).Should(BeNil())
|
}, time.Minute, time.Second*4).Should(BeNil())
|
||||||
|
|
||||||
})
|
})
|
||||||
It("burstable container's oom-score-adj should be between [2, 1000)", func() {
|
|
||||||
podClient := f.PodClient()
|
|
||||||
podName := "burstable" + string(uuid.NewUUID())
|
|
||||||
podClient.Create(&api.Pod{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: podName,
|
|
||||||
},
|
|
||||||
Spec: api.PodSpec{
|
|
||||||
Containers: []api.Container{
|
|
||||||
{
|
|
||||||
Image: ImageRegistry[testWebServer],
|
|
||||||
Name: podName,
|
|
||||||
Resources: api.ResourceRequirements{
|
|
||||||
Requests: api.ResourceList{
|
|
||||||
"cpu": resource.MustParse("100m"),
|
|
||||||
"memory": resource.MustParse("50Mi"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
var (
|
|
||||||
wsPids []int
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
Eventually(func() error {
|
|
||||||
wsPids, err = getPidsForProcess("test-webserver", "")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get list of test-webserver process pids: %v", err)
|
|
||||||
}
|
|
||||||
for _, pid := range wsPids {
|
|
||||||
if err := validateOOMScoreAdjSettingIsInRange(pid, 2, 1000); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}, 2*time.Minute, time.Second*4).Should(BeNil())
|
|
||||||
|
|
||||||
// TODO: Test the oom-score-adj logic for burstable more accurately.
|
It("should be possible to delete", func() {
|
||||||
|
err := podClient.Delete(podName, &api.DeleteOptions{})
|
||||||
|
Expect(err).To(BeNil(), fmt.Sprintf("Error deleting Pod %v", err))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
|
|
|
@ -75,54 +75,7 @@ var _ = framework.KubeDescribe("Kubelet", func() {
|
||||||
}, time.Minute, time.Second*4).Should(Equal("Hello World\n"))
|
}, time.Minute, time.Second*4).Should(Equal("Hello World\n"))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
Context("when scheduling a busybox command that always fails in a pod", func() {
|
|
||||||
var podName string
|
|
||||||
|
|
||||||
BeforeEach(func() {
|
|
||||||
podName = "bin-false" + string(uuid.NewUUID())
|
|
||||||
podClient.Create(&api.Pod{
|
|
||||||
ObjectMeta: api.ObjectMeta{
|
|
||||||
Name: podName,
|
|
||||||
},
|
|
||||||
Spec: api.PodSpec{
|
|
||||||
// Don't restart the Pod since it is expected to exit
|
|
||||||
RestartPolicy: api.RestartPolicyNever,
|
|
||||||
Containers: []api.Container{
|
|
||||||
{
|
|
||||||
Image: ImageRegistry[busyBoxImage],
|
|
||||||
Name: podName,
|
|
||||||
Command: []string{"/bin/false"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
})
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should have an error terminated reason", func() {
|
|
||||||
Eventually(func() error {
|
|
||||||
podData, err := podClient.Get(podName)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if len(podData.Status.ContainerStatuses) != 1 {
|
|
||||||
return fmt.Errorf("expected only one container in the pod %q", podName)
|
|
||||||
}
|
|
||||||
contTerminatedState := podData.Status.ContainerStatuses[0].State.Terminated
|
|
||||||
if contTerminatedState == nil {
|
|
||||||
return fmt.Errorf("expected state to be terminated. Got pod status: %+v", podData.Status)
|
|
||||||
}
|
|
||||||
if contTerminatedState.Reason != "Error" {
|
|
||||||
return fmt.Errorf("expected terminated state reason to be error. Got %+v", contTerminatedState)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}, time.Minute, time.Second*4).Should(BeNil())
|
|
||||||
})
|
|
||||||
|
|
||||||
It("should be possible to delete", func() {
|
|
||||||
err := podClient.Delete(podName, &api.DeleteOptions{})
|
|
||||||
Expect(err).To(BeNil(), fmt.Sprintf("Error deleting Pod %v", err))
|
|
||||||
})
|
|
||||||
})
|
|
||||||
Context("when scheduling a read only busybox container", func() {
|
Context("when scheduling a read only busybox container", func() {
|
||||||
podName := "busybox-readonly-fs" + string(uuid.NewUUID())
|
podName := "busybox-readonly-fs" + string(uuid.NewUUID())
|
||||||
It("it should not write to root filesystem", func() {
|
It("it should not write to root filesystem", func() {
|
||||||
|
|
|
@ -24,6 +24,7 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -37,7 +38,6 @@ import (
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
|
||||||
"k8s.io/kubernetes/pkg/labels"
|
"k8s.io/kubernetes/pkg/labels"
|
||||||
"k8s.io/kubernetes/pkg/util/procfs"
|
|
||||||
"k8s.io/kubernetes/pkg/util/runtime"
|
"k8s.io/kubernetes/pkg/util/runtime"
|
||||||
"k8s.io/kubernetes/pkg/util/uuid"
|
"k8s.io/kubernetes/pkg/util/uuid"
|
||||||
"k8s.io/kubernetes/pkg/util/wait"
|
"k8s.io/kubernetes/pkg/util/wait"
|
||||||
|
@ -450,16 +450,19 @@ const (
|
||||||
containerdPidFile = "/run/docker/libcontainerd/docker-containerd.pid"
|
containerdPidFile = "/run/docker/libcontainerd/docker-containerd.pid"
|
||||||
)
|
)
|
||||||
|
|
||||||
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
func getContainerNameForProcess(name, pidFile string) (string, error) {
|
||||||
if len(pidFile) > 0 {
|
pids, err := getPidsForProcess(name, pidFile)
|
||||||
if pid, err := getPidFromPidFile(pidFile); err == nil {
|
if err != nil {
|
||||||
return []int{pid}, nil
|
return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
|
||||||
} else {
|
|
||||||
// log the error and fall back to pidof
|
|
||||||
runtime.HandleError(err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return procfs.PidOf(name)
|
if len(pids) == 0 {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
cont, err := getContainer(pids[0])
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return cont, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPidFromPidFile(pidFile string) (int, error) {
|
func getPidFromPidFile(pidFile string) (int, error) {
|
||||||
|
@ -482,19 +485,31 @@ func getPidFromPidFile(pidFile string) (int, error) {
|
||||||
return pid, nil
|
return pid, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getContainerNameForProcess(name, pidFile string) (string, error) {
|
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
||||||
pids, err := getPidsForProcess(name, pidFile)
|
if len(pidFile) > 0 {
|
||||||
|
if pid, err := getPidFromPidFile(pidFile); err == nil {
|
||||||
|
return []int{pid}, nil
|
||||||
|
} else {
|
||||||
|
// log the error and fall back to pidof
|
||||||
|
runtime.HandleError(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := exec.Command("pidof", name).Output()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
|
return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err)
|
||||||
}
|
}
|
||||||
if len(pids) == 0 {
|
|
||||||
return "", nil
|
// The output of pidof is a list of pids.
|
||||||
|
pids := []int{}
|
||||||
|
for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") {
|
||||||
|
pid, err := strconv.Atoi(pidStr)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pids = append(pids, pid)
|
||||||
}
|
}
|
||||||
cont, err := getContainer(pids[0])
|
return pids, nil
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return cont, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// getContainer returns the cgroup associated with the specified pid.
|
// getContainer returns the cgroup associated with the specified pid.
|
||||||
|
|
Loading…
Reference in New Issue