Merge pull request #62541 from filbranden/cgroupname1

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Use a []string for CgroupName, which is a more accurate internal representation

**What this PR does / why we need it**:

This is purely a refactoring and should bring no essential change in behavior.

It does clarify the cgroup handling code quite a bit.

It is preparation for further changes we might want to do in the cgroup hierarchy. (But it's useful on its own, so even if we don't do any, it should still be considered.)

**Special notes for your reviewer**:

The slice of strings more precisely captures the hierarchic nature of the cgroup paths we use to represent pods and their groupings.

It also ensures we're reducing the chances of passing an incorrect path format to a cgroup driver that requires a different path naming, since now explicit conversions are always needed.

The new constructor `NewCgroupName` starts from an existing `CgroupName`, which enforces a hierarchy where a root is always needed. It also performs checking on the component names to ensure invalid characters ("/" and "_") are not in use.

A `RootCgroupName` for the top of the cgroup hierarchy tree is introduced.

This refactor results in a net reduction of around 30 lines of code,
mainly with the demise of ConvertCgroupNameToSystemd which had fairly
complicated logic in it and was doing just too many things.

There's a small TODO in a helper `updateSystemdCgroupInfo` that was introduced to make this commit possible. That logic really belongs in libcontainer, I'm planning to send a PR there to include it there. (The API already takes a field with that information, only that field is only processed in cgroupfs and not systemd driver, we should fix that.)

Tested: By running the e2e-node tests on both Ubuntu 16.04 (with cgroupfs driver) and CentOS 7 (with systemd driver.)

**NOTE**: I only tested this with dockershim, we should double-check that this works with the CRI endpoints too, both in cgroupfs and systemd modes.

/assign @derekwaynecarr 
/assign @dashpole 
/assign @Random-Liu 

**Release note**:

```release-note
NONE
```
pull/8/head
Kubernetes Submit Queue 2018-05-03 08:16:45 -07:00 committed by GitHub
commit 592c39bccc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 252 additions and 272 deletions

View File

@ -53,74 +53,79 @@ const (
// which is what is expected when interacting with libcontainer
var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"}
// ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name.
// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice
// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form.
// For example, it will return /Burstable.slice/Burstable-pod_123_456.slice in above scenario.
func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string {
name := string(cgroupName)
result := ""
if name != "" && name != "/" {
parts := strings.Split(name, "/")
results := []string{}
for _, part := range parts {
// ignore leading stuff
if part == "" {
continue
var RootCgroupName = CgroupName([]string{})
// NewCgroupName composes a new cgroup name.
// Use RootCgroupName as base to start at the root.
// This function does some basic check for invalid characters at the name.
func NewCgroupName(base CgroupName, components ...string) CgroupName {
for _, component := range components {
// Forbit using "_" in internal names. When remapping internal
// names to systemd cgroup driver, we want to remap "-" => "_",
// so we forbid "_" so that we can always reverse the mapping.
if strings.Contains(component, "/") || strings.Contains(component, "_") {
panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component))
}
// detect if we are given a systemd style name.
// if so, we do not want to do double encoding.
if IsSystemdStyleName(part) {
part = strings.TrimSuffix(part, systemdSuffix)
separatorIndex := strings.LastIndex(part, "-")
if separatorIndex >= 0 && separatorIndex < len(part) {
part = part[separatorIndex+1:]
}
} else {
// systemd treats - as a step in the hierarchy, we convert all - to _
part = strings.Replace(part, "-", "_", -1)
return CgroupName(append(base, components...))
}
func escapeSystemdCgroupName(part string) string {
return strings.Replace(part, "-", "_", -1)
}
func unescapeSystemdCgroupName(part string) string {
return strings.Replace(part, "_", "-", -1)
}
// cgroupName.ToSystemd converts the internal cgroup name to a systemd name.
// For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes
// "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice"
// This function always expands the systemd name into the cgroupfs form. If only
// the last part is needed, use path.Base(...) on it to discard the rest.
func (cgroupName CgroupName) ToSystemd() string {
if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") {
return "/"
}
results = append(results, part)
}
// each part is appended with systemd style -
result = strings.Join(results, "-")
} else {
// root converts to -
result = "-"
}
// always have a .slice suffix
if !IsSystemdStyleName(result) {
result = result + systemdSuffix
newparts := []string{}
for _, part := range cgroupName {
part = escapeSystemdCgroupName(part)
newparts = append(newparts, part)
}
// if the caller desired the result in cgroupfs format...
if outputToCgroupFs {
var err error
result, err = cgroupsystemd.ExpandSlice(result)
result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix)
if err != nil {
panic(fmt.Errorf("error adapting cgroup name, input: %v, err: %v", name, err))
}
// Should never happen...
panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err))
}
return result
}
// ConvertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name.
// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice
// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those
// code areas could use something from libcontainer if we get this style function upstream.
func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) {
// TODO: see if libcontainer systemd implementation could use something similar, and if so, move
// this function up to that library. At that time, it would most likely do validation specific to systemd
// above and beyond the simple assumption here that the base of the path encodes the hierarchy
// per systemd convention.
return path.Base(cgroupfsName), nil
func ParseSystemdToCgroupName(name string) CgroupName {
driverName := path.Base(name)
driverName = strings.TrimSuffix(driverName, systemdSuffix)
parts := strings.Split(driverName, "-")
result := []string{}
for _, part := range parts {
result = append(result, unescapeSystemdCgroupName(part))
}
return CgroupName(result)
}
func (cgroupName CgroupName) ToCgroupfs() string {
return "/" + path.Join(cgroupName...)
}
func ParseCgroupfsToCgroupName(name string) CgroupName {
components := strings.Split(strings.TrimPrefix(name, "/"), "/")
if len(components) == 1 && components[0] == "" {
components = []string{}
}
return CgroupName(components)
}
func IsSystemdStyleName(name string) bool {
if strings.HasSuffix(name, systemdSuffix) {
return true
}
return false
return strings.HasSuffix(name, systemdSuffix)
}
// libcontainerAdapter provides a simplified interface to libcontainer based on libcontainer type.
@ -156,34 +161,6 @@ func (l *libcontainerAdapter) newManager(cgroups *libcontainerconfigs.Cgroup, pa
return nil, fmt.Errorf("invalid cgroup manager configuration")
}
func (l *libcontainerAdapter) revertName(name string) CgroupName {
if l.cgroupManagerType != libcontainerSystemd {
return CgroupName(name)
}
return CgroupName(RevertFromSystemdToCgroupStyleName(name))
}
func RevertFromSystemdToCgroupStyleName(name string) string {
driverName, err := ConvertCgroupFsNameToSystemd(name)
if err != nil {
panic(err)
}
driverName = strings.TrimSuffix(driverName, systemdSuffix)
driverName = strings.Replace(driverName, "-", "/", -1)
driverName = strings.Replace(driverName, "_", "-", -1)
return driverName
}
// adaptName converts a CgroupName identifier to a driver specific conversion value.
// if outputToCgroupFs is true, the result is returned in the cgroupfs format rather than the driver specific form.
func (l *libcontainerAdapter) adaptName(cgroupName CgroupName, outputToCgroupFs bool) string {
if l.cgroupManagerType != libcontainerSystemd {
name := string(cgroupName)
return name
}
return ConvertCgroupNameToSystemd(cgroupName, outputToCgroupFs)
}
// CgroupSubsystems holds information about the mounted cgroup subsystems
type CgroupSubsystems struct {
// Cgroup subsystem mounts.
@ -223,13 +200,22 @@ func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager {
}
// Name converts the cgroup to the driver specific value in cgroupfs form.
// This always returns a valid cgroupfs path even when systemd driver is in use!
func (m *cgroupManagerImpl) Name(name CgroupName) string {
return m.adapter.adaptName(name, true)
if m.adapter.cgroupManagerType == libcontainerSystemd {
return name.ToSystemd()
} else {
return name.ToCgroupfs()
}
}
// CgroupName converts the literal cgroupfs name on the host to an internal identifier.
func (m *cgroupManagerImpl) CgroupName(name string) CgroupName {
return m.adapter.revertName(name)
if m.adapter.cgroupManagerType == libcontainerSystemd {
return ParseSystemdToCgroupName(name)
} else {
return ParseCgroupfsToCgroupName(name)
}
}
// buildCgroupPaths builds a path to each cgroup subsystem for the specified name.
@ -242,6 +228,22 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string
return cgroupPaths
}
// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
// and split it appropriately, using essentially the logic below.
// This was done for cgroupfs in opencontainers/runc#497 but a counterpart
// for systemd was never introduced.
func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupName CgroupName) {
dir, base := path.Split(cgroupName.ToSystemd())
if dir == "/" {
dir = "-.slice"
} else {
dir = path.Base(dir)
}
cgroupConfig.Parent = dir
cgroupConfig.Name = base
}
// Exists checks if all subsystem cgroups already exist
func (m *cgroupManagerImpl) Exists(name CgroupName) bool {
// Get map of all cgroup paths on the system for the particular cgroup
@ -278,23 +280,13 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error {
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
// we take the location in traditional cgroupfs format.
abstractCgroupFsName := string(cgroupConfig.Name)
abstractParent := CgroupName(path.Dir(abstractCgroupFsName))
abstractName := CgroupName(path.Base(abstractCgroupFsName))
driverParent := m.adapter.adaptName(abstractParent, false)
driverName := m.adapter.adaptName(abstractName, false)
// this is an ugly abstraction bleed, but systemd cgroup driver requires full paths...
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
driverName = m.adapter.adaptName(cgroupConfig.Name, false)
}
// Initialize libcontainer's cgroup config with driver specific naming.
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Name: driverName,
Parent: driverParent,
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths)
@ -418,26 +410,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)
// we take the location in traditional cgroupfs format.
abstractCgroupFsName := string(cgroupConfig.Name)
abstractParent := CgroupName(path.Dir(abstractCgroupFsName))
abstractName := CgroupName(path.Base(abstractCgroupFsName))
driverParent := m.adapter.adaptName(abstractParent, false)
driverName := m.adapter.adaptName(abstractName, false)
// this is an ugly abstraction bleed, but systemd cgroup driver requires full paths...
if m.adapter.cgroupManagerType == libcontainerSystemd {
driverName = m.adapter.adaptName(cgroupConfig.Name, false)
}
// Initialize libcontainer's cgroup config
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Name: driverName,
Parent: driverParent,
Resources: resources,
Paths: cgroupPaths,
}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit
@ -456,25 +439,18 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error {
metrics.CgroupManagerLatency.WithLabelValues("create").Observe(metrics.SinceInMicroseconds(start))
}()
// we take the location in traditional cgroupfs format.
abstractCgroupFsName := string(cgroupConfig.Name)
abstractParent := CgroupName(path.Dir(abstractCgroupFsName))
abstractName := CgroupName(path.Base(abstractCgroupFsName))
driverParent := m.adapter.adaptName(abstractParent, false)
driverName := m.adapter.adaptName(abstractName, false)
// this is an ugly abstraction bleed, but systemd cgroup driver requires full paths...
if m.adapter.cgroupManagerType == libcontainerSystemd {
driverName = m.adapter.adaptName(cgroupConfig.Name, false)
}
resources := m.toResources(cgroupConfig.ResourceParameters)
// Initialize libcontainer's cgroup config with driver specific naming.
libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Name: driverName,
Parent: driverParent,
Resources: resources,
}
// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs()
}
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit

View File

@ -18,119 +18,105 @@ limitations under the License.
package cm
import "testing"
import (
"path"
"testing"
)
func TestLibcontainerAdapterAdaptToSystemd(t *testing.T) {
func TestCgroupNameToSystemdBasename(t *testing.T) {
testCases := []struct {
input string
input CgroupName
expected string
}{
{
input: "/",
expected: "-.slice",
input: RootCgroupName,
expected: "/",
},
{
input: "/system.slice",
input: NewCgroupName(RootCgroupName, "system"),
expected: "system.slice",
},
{
input: "/system.slice/Burstable",
input: NewCgroupName(RootCgroupName, "system", "Burstable"),
expected: "system-Burstable.slice",
},
{
input: "/Burstable.slice/Burstable-pod_123.slice",
input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"),
expected: "Burstable-pod_123.slice",
},
{
input: "/test.slice/test-a.slice/test-a-b.slice",
input: NewCgroupName(RootCgroupName, "test", "a", "b"),
expected: "test-a-b.slice",
},
{
input: "/test.slice/test-a.slice/test-a-b.slice/Burstable",
input: NewCgroupName(RootCgroupName, "test", "a", "b", "Burstable"),
expected: "test-a-b-Burstable.slice",
},
{
input: "/Burstable",
input: NewCgroupName(RootCgroupName, "Burstable"),
expected: "Burstable.slice",
},
{
input: "/Burstable/pod_123",
expected: "Burstable-pod_123.slice",
},
{
input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d",
input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"),
expected: "BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice",
},
}
for _, testCase := range testCases {
f := newLibcontainerAdapter(libcontainerSystemd)
if actual := f.adaptName(CgroupName(testCase.input), false); actual != testCase.expected {
if actual := path.Base(testCase.input.ToSystemd()); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
}
}
func TestLibcontainerAdapterAdaptToSystemdAsCgroupFs(t *testing.T) {
func TestCgroupNameToSystemd(t *testing.T) {
testCases := []struct {
input string
input CgroupName
expected string
}{
{
input: "/",
input: RootCgroupName,
expected: "/",
},
{
input: "/Burstable",
input: NewCgroupName(RootCgroupName, "Burstable"),
expected: "/Burstable.slice",
},
{
input: "/Burstable/pod_123",
input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"),
expected: "/Burstable.slice/Burstable-pod_123.slice",
},
{
input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d",
input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"),
expected: "/BestEffort.slice/BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice",
},
{
input: "/kubepods",
input: NewCgroupName(RootCgroupName, "kubepods"),
expected: "/kubepods.slice",
},
}
for _, testCase := range testCases {
f := newLibcontainerAdapter(libcontainerSystemd)
if actual := f.adaptName(CgroupName(testCase.input), true); actual != testCase.expected {
if actual := testCase.input.ToSystemd(); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
}
}
func TestLibcontainerAdapterNotAdaptToSystemd(t *testing.T) {
cgroupfs := newLibcontainerAdapter(libcontainerCgroupfs)
otherAdatper := newLibcontainerAdapter(libcontainerCgroupManagerType("test"))
func TestCgroupNameToCgroupfs(t *testing.T) {
testCases := []struct {
input string
input CgroupName
expected string
}{
{
input: "/",
input: RootCgroupName,
expected: "/",
},
{
input: "/Burstable",
input: NewCgroupName(RootCgroupName, "Burstable"),
expected: "/Burstable",
},
{
input: "",
expected: "",
},
}
for _, testCase := range testCases {
if actual := cgroupfs.adaptName(CgroupName(testCase.input), true); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
if actual := otherAdatper.adaptName(CgroupName(testCase.input), true); actual != testCase.expected {
if actual := testCase.input.ToCgroupfs(); actual != testCase.expected {
t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual)
}
}

View File

@ -63,25 +63,35 @@ func (m *unsupportedCgroupManager) Pids(_ CgroupName) []int {
}
func (m *unsupportedCgroupManager) CgroupName(name string) CgroupName {
return ""
return CgroupName([]string{})
}
func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error {
return nil
}
func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) {
return "", nil
var RootCgroupName = CgroupName([]string{})
func NewCgroupName(base CgroupName, components ...string) CgroupName {
return CgroupName(append(base, components...))
}
func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string {
func (cgroupName CgroupName) ToSystemd() string {
return ""
}
func RevertFromSystemdToCgroupStyleName(name string) string {
func ParseSystemdToCgroupName(name string) CgroupName {
return nil
}
func (cgroupName CgroupName) ToCgroupfs() string {
return ""
}
func ParseCgroupfsToCgroupName(name string) CgroupName {
return nil
}
func IsSystemdStyleName(name string) bool {
return false
}

View File

@ -123,7 +123,7 @@ type containerManagerImpl struct {
capacity v1.ResourceList
// Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under.
// This path include a top level container for enforcing Node Allocatable.
cgroupRoot string
cgroupRoot CgroupName
// Event recorder interface.
recorder record.EventRecorder
// Interface for QoS cgroup management
@ -223,7 +223,8 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
}
capacity = cadvisor.CapacityFromMachineInfo(machineInfo)
cgroupRoot := nodeConfig.CgroupRoot
// Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName
cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot)
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
// Check if Cgroup-root actually exists on the node
if nodeConfig.CgroupsPerQOS {
@ -236,13 +237,13 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// of note, we always use the cgroupfs driver when performing this check since
// the input is provided in that format.
// this is important because we do not want any name conversion to occur.
if !cgroupManager.Exists(CgroupName(cgroupRoot)) {
if !cgroupManager.Exists(cgroupRoot) {
return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist: %v", cgroupRoot, err)
}
glog.Infof("container manager verified user specified cgroup-root exists: %v", cgroupRoot)
// Include the top level cgroup for enforcing node allocatable into cgroup-root.
// This way, all sub modules can avoid having to understand the concept of node allocatable.
cgroupRoot = path.Join(cgroupRoot, defaultNodeAllocatableCgroupName)
cgroupRoot = NewCgroupName(cgroupRoot, defaultNodeAllocatableCgroupName)
}
glog.Infof("Creating Container Manager object based on Node Config: %+v", nodeConfig)
@ -305,7 +306,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
}
}
return &podContainerManagerNoop{
cgroupRoot: CgroupName(cm.cgroupRoot),
cgroupRoot: cm.cgroupRoot,
}
}
@ -503,7 +504,7 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
// GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods.
func (cm *containerManagerImpl) GetPodCgroupRoot() string {
return cm.cgroupManager.Name(CgroupName(cm.cgroupRoot))
return cm.cgroupManager.Name(cm.cgroupRoot)
}
func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems {

View File

@ -42,7 +42,7 @@ const (
//createNodeAllocatableCgroups creates Node Allocatable Cgroup when CgroupsPerQOS flag is specified as true
func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
Name: cm.cgroupRoot,
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
ResourceParameters: getCgroupConfig(cm.capacity),
}
@ -71,7 +71,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
Name: cm.cgroupRoot,
ResourceParameters: getCgroupConfig(nodeAllocatable),
}
@ -88,7 +88,8 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
// Pod Evictions are expected to bring down memory usage to below Node Allocatable limits.
// Until evictions happen retry cgroup updates.
// Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low.
if cm.cgroupRoot != "/" {
// Check if cgroupRoot is set to a non-empty value (empty would be the root container)
if len(cm.cgroupRoot) > 0 {
go func() {
for {
err := cm.cgroupManager.Update(cgroupConfig)
@ -105,7 +106,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
// Now apply kube reserved and system reserved limits if required.
if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedEnforcementKey) {
glog.V(2).Infof("Enforcing System reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.SystemReservedCgroupName), nc.SystemReserved); err != nil {
message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
@ -114,7 +115,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
}
if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedEnforcementKey) {
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.KubeReservedCgroupName), nc.KubeReserved); err != nil {
message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
@ -125,9 +126,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
}
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cName),
Name: cName,
ResourceParameters: getCgroupConfig(rl),
}
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)

View File

@ -104,7 +104,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error {
func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, string) {
podQOS := v1qos.GetPodQOS(pod)
// Get the parent QOS container name
var parentContainer string
var parentContainer CgroupName
switch podQOS {
case v1.PodQOSGuaranteed:
parentContainer = m.qosContainersInfo.Guaranteed
@ -116,7 +116,7 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName,
podContainer := GetPodCgroupNameSuffix(pod.UID)
// Get the absolute path of the cgroup
cgroupName := (CgroupName)(path.Join(parentContainer, podContainer))
cgroupName := NewCgroupName(parentContainer, podContainer)
// Get the literal cgroupfs name
cgroupfsName := m.cgroupManager.Name(cgroupName)
@ -189,7 +189,7 @@ func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error {
func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
// Map for storing all the found pods on the disk
foundPods := make(map[types.UID]CgroupName)
qosContainersList := [3]string{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
// Scan through all the subsystem mounts
// and through each QoS cgroup directory for each subsystem mount
// If a pod cgroup exists in even a single subsystem mount
@ -197,7 +197,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
for _, val := range m.subsystems.MountPoints {
for _, qosContainerName := range qosContainersList {
// get the subsystems QoS cgroup absolute name
qcConversion := m.cgroupManager.Name(CgroupName(qosContainerName))
qcConversion := m.cgroupManager.Name(qosContainerName)
qc := path.Join(val, qcConversion)
dirInfo, err := ioutil.ReadDir(qc)
if err != nil {
@ -219,7 +219,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
internalPath := m.cgroupManager.CgroupName(cgroupfsPath)
// we only care about base segment of the converted path since that
// is what we are reading currently to know if it is a pod or not.
basePath := path.Base(string(internalPath))
basePath := internalPath[len(internalPath)-1]
if !strings.Contains(basePath, podCgroupNamePrefix) {
continue
}
@ -259,7 +259,7 @@ func (m *podContainerManagerNoop) EnsureExists(_ *v1.Pod) error {
}
func (m *podContainerManagerNoop) GetPodContainerName(_ *v1.Pod) (CgroupName, string) {
return m.cgroupRoot, string(m.cgroupRoot)
return m.cgroupRoot, m.cgroupRoot.ToCgroupfs()
}
func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *v1.Pod) string {

View File

@ -35,7 +35,7 @@ func (m *podContainerManagerStub) EnsureExists(_ *v1.Pod) error {
}
func (m *podContainerManagerStub) GetPodContainerName(_ *v1.Pod) (CgroupName, string) {
return "", ""
return nil, ""
}
func (m *podContainerManagerStub) Destroy(_ CgroupName) error {

View File

@ -18,7 +18,6 @@ package cm
import (
"fmt"
"path"
"strings"
"sync"
"time"
@ -60,17 +59,17 @@ type qosContainerManagerImpl struct {
qosReserved map[v1.ResourceName]int64
}
func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot string, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) {
func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot CgroupName, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) {
if !nodeConfig.CgroupsPerQOS {
return &qosContainerManagerNoop{
cgroupRoot: CgroupName(cgroupRoot),
cgroupRoot: cgroupRoot,
}, nil
}
return &qosContainerManagerImpl{
subsystems: subsystems,
cgroupManager: cgroupManager,
cgroupRoot: CgroupName(cgroupRoot),
cgroupRoot: cgroupRoot,
qosReserved: nodeConfig.QOSReserved,
}, nil
}
@ -81,23 +80,20 @@ func (m *qosContainerManagerImpl) GetQOSContainersInfo() QOSContainersInfo {
func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceList, activePods ActivePodsFunc) error {
cm := m.cgroupManager
rootContainer := string(m.cgroupRoot)
if !cm.Exists(CgroupName(rootContainer)) {
return fmt.Errorf("root container %s doesn't exist", rootContainer)
rootContainer := m.cgroupRoot
if !cm.Exists(rootContainer) {
return fmt.Errorf("root container %v doesn't exist", rootContainer)
}
// Top level for Qos containers are created only for Burstable
// and Best Effort classes
qosClasses := map[v1.PodQOSClass]string{
v1.PodQOSBurstable: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))),
v1.PodQOSBestEffort: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))),
qosClasses := map[v1.PodQOSClass]CgroupName{
v1.PodQOSBurstable: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))),
v1.PodQOSBestEffort: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))),
}
// Create containers for both qos classes
for qosClass, containerName := range qosClasses {
// get the container's absolute name
absoluteContainerName := CgroupName(containerName)
resourceParameters := &ResourceConfig{}
// the BestEffort QoS class has a statically configured minShares value
if qosClass == v1.PodQOSBestEffort {
@ -107,7 +103,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
// containerConfig object stores the cgroup specifications
containerConfig := &CgroupConfig{
Name: absoluteContainerName,
Name: containerName,
ResourceParameters: resourceParameters,
}
@ -117,7 +113,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis
}
// check if it exists
if !cm.Exists(absoluteContainerName) {
if !cm.Exists(containerName) {
if err := cm.Create(containerConfig); err != nil {
return fmt.Errorf("failed to create top level %v QOS cgroup : %v", qosClass, err)
}
@ -279,11 +275,11 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error {
qosConfigs := map[v1.PodQOSClass]*CgroupConfig{
v1.PodQOSBurstable: {
Name: CgroupName(m.qosContainersInfo.Burstable),
Name: m.qosContainersInfo.Burstable,
ResourceParameters: &ResourceConfig{},
},
v1.PodQOSBestEffort: {
Name: CgroupName(m.qosContainersInfo.BestEffort),
Name: m.qosContainersInfo.BestEffort,
ResourceParameters: &ResourceConfig{},
},
}

View File

@ -38,7 +38,9 @@ type ResourceConfig struct {
}
// CgroupName is the abstract name of a cgroup prior to any driver specific conversion.
type CgroupName string
// It is specified as a list of strings from its individual components, such as:
// {"kubepods", "burstable", "pod1234-abcd-5678-efgh"}
type CgroupName []string
// CgroupConfig holds the cgroup configuration information.
// This is common object which is used to specify
@ -78,7 +80,7 @@ type CgroupManager interface {
Exists(name CgroupName) bool
// Name returns the literal cgroupfs name on the host after any driver specific conversions.
// We would expect systemd implementation to make appropriate name conversion.
// For example, if we pass /foo/bar
// For example, if we pass {"foo", "bar"}
// then systemd should convert the name to something like
// foo.slice/foo-bar.slice
Name(name CgroupName) string
@ -94,9 +96,9 @@ type CgroupManager interface {
// QOSContainersInfo stores the names of containers per qos
type QOSContainersInfo struct {
Guaranteed string
BestEffort string
Burstable string
Guaranteed CgroupName
BestEffort CgroupName
Burstable CgroupName
}
// PodContainerManager stores and manages pod level containers

View File

@ -85,7 +85,6 @@ go_library(
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
"//pkg/kubelet/checkpointmanager/errors:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/dockershim/cm:go_default_library",
"//pkg/kubelet/dockershim/libdocker:go_default_library",

View File

@ -20,6 +20,7 @@ import (
"context"
"fmt"
"net/http"
"path"
"path/filepath"
"sync"
"time"
@ -32,7 +33,6 @@ import (
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
"k8s.io/kubernetes/pkg/kubelet/dockershim/network"
@ -432,17 +432,14 @@ func (ds *dockerService) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// GenerateExpectedCgroupParent returns cgroup parent in syntax expected by cgroup driver
func (ds *dockerService) GenerateExpectedCgroupParent(cgroupParent string) (string, error) {
if len(cgroupParent) > 0 {
if cgroupParent != "" {
// if docker uses the systemd cgroup driver, it expects *.slice style names for cgroup parent.
// if we configured kubelet to use --cgroup-driver=cgroupfs, and docker is configured to use systemd driver
// docker will fail to launch the container because the name we provide will not be a valid slice.
// this is a very good thing.
if ds.cgroupDriver == "systemd" {
systemdCgroupParent, err := kubecm.ConvertCgroupFsNameToSystemd(cgroupParent)
if err != nil {
return "", err
}
cgroupParent = systemdCgroupParent
// Pass only the last component of the cgroup path to systemd.
cgroupParent = path.Base(cgroupParent)
}
}
glog.V(3).Infof("Setting cgroup parent to: %q", cgroupParent)

View File

@ -255,9 +255,14 @@ func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool {
func getCadvisorPodInfoFromPodUID(podUID types.UID, infos map[string]cadvisorapiv2.ContainerInfo) *cadvisorapiv2.ContainerInfo {
for key, info := range infos {
if cm.IsSystemdStyleName(key) {
key = cm.RevertFromSystemdToCgroupStyleName(key)
// Convert to internal cgroup name and take the last component only.
internalCgroupName := cm.ParseSystemdToCgroupName(key)
key = internalCgroupName[len(internalCgroupName)-1]
} else {
// Take last component only.
key = path.Base(key)
}
if cm.GetPodCgroupNameSuffix(podUID) == path.Base(key) {
if cm.GetPodCgroupNameSuffix(podUID) == key {
return &info
}
}

View File

@ -19,7 +19,6 @@ package e2e_node
import (
"fmt"
"os/exec"
"path"
"strconv"
"strings"
"time"
@ -38,14 +37,14 @@ import (
)
// makePodToVerifyHugePages returns a pod that verifies specified cgroup with hugetlb
func makePodToVerifyHugePages(cgroupName cm.CgroupName, hugePagesLimit resource.Quantity) *apiv1.Pod {
func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity) *apiv1.Pod {
// convert the cgroup name to its literal form
cgroupFsName := ""
cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))
cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName)
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
cgroupFsName = cm.ConvertCgroupNameToSystemd(cgroupName, true)
cgroupFsName = cgroupName.ToSystemd()
} else {
cgroupFsName = string(cgroupName)
cgroupFsName = cgroupName.ToCgroupfs()
}
// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.limit_in_bytes
@ -184,7 +183,7 @@ func runHugePagesTests(f *framework.Framework) {
})
podUID := string(pod.UID)
By("checking if the expected hugetlb settings were applied")
verifyPod := makePodToVerifyHugePages(cm.CgroupName("pod"+podUID), resource.MustParse("50Mi"))
verifyPod := makePodToVerifyHugePages("pod"+podUID, resource.MustParse("50Mi"))
f.PodClient().Create(verifyPod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name)
Expect(err).NotTo(HaveOccurred())

View File

@ -21,7 +21,6 @@ package e2e_node
import (
"fmt"
"io/ioutil"
"path"
"path/filepath"
"strconv"
"strings"
@ -99,8 +98,8 @@ func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resour
}
const (
kubeReservedCgroup = "/kube_reserved"
systemReservedCgroup = "/system_reserved"
kubeReservedCgroup = "kube_reserved"
systemReservedCgroup = "system_reserved"
)
func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error {
@ -115,13 +114,13 @@ func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error
func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
// Create kube reserved cgroup
cgroupConfig := &cm.CgroupConfig{
Name: cm.CgroupName(kubeReservedCgroup),
Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup),
}
if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil {
return err
}
// Create system reserved cgroup
cgroupConfig.Name = cm.CgroupName(systemReservedCgroup)
cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)
return createIfNotExists(cgroupManager, cgroupConfig)
}
@ -129,12 +128,12 @@ func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error
func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
// Create kube reserved cgroup
cgroupConfig := &cm.CgroupConfig{
Name: cm.CgroupName(kubeReservedCgroup),
Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup),
}
if err := cgroupManager.Destroy(cgroupConfig); err != nil {
return err
}
cgroupConfig.Name = cm.CgroupName(systemReservedCgroup)
cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)
return cgroupManager.Destroy(cgroupConfig)
}
@ -173,8 +172,9 @@ func runTest(f *framework.Framework) error {
// Set new config and current config.
currentConfig := newCfg
expectedNAPodCgroup := path.Join(currentConfig.CgroupRoot, "kubepods")
if !cgroupManager.Exists(cm.CgroupName(expectedNAPodCgroup)) {
expectedNAPodCgroup := cm.ParseCgroupfsToCgroupName(currentConfig.CgroupRoot)
expectedNAPodCgroup = cm.NewCgroupName(expectedNAPodCgroup, "kubepods")
if !cgroupManager.Exists(expectedNAPodCgroup) {
return fmt.Errorf("Expected Node Allocatable Cgroup Does not exist")
}
// TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings.
@ -218,30 +218,32 @@ func runTest(f *framework.Framework) error {
return nil
}, time.Minute, 5*time.Second).Should(BeNil())
if !cgroupManager.Exists(cm.CgroupName(kubeReservedCgroup)) {
kubeReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup)
if !cgroupManager.Exists(kubeReservedCgroupName) {
return fmt.Errorf("Expected kube reserved cgroup Does not exist")
}
// Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`.
kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceCPU)])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], kubeReservedCgroup, "cpu.shares"), int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())), 10); err != nil {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupManager.Name(kubeReservedCgroupName), "cpu.shares"), int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())), 10); err != nil {
return err
}
// Expect Memory limit kube reserved cgroup to equal configured value `100Mi`.
kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceMemory)])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], kubeReservedCgroup, "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(kubeReservedCgroupName), "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil {
return err
}
if !cgroupManager.Exists(cm.CgroupName(systemReservedCgroup)) {
systemReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup)
if !cgroupManager.Exists(systemReservedCgroupName) {
return fmt.Errorf("Expected system reserved cgroup Does not exist")
}
// Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`.
systemReservedCPU := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceCPU)])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], systemReservedCgroup, "cpu.shares"), int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())), 10); err != nil {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupManager.Name(systemReservedCgroupName), "cpu.shares"), int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())), 10); err != nil {
return err
}
// Expect Memory limit on node allocatable cgroup to equal allocatable.
systemReservedMemory := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceMemory)])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], systemReservedCgroup, "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil {
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(systemReservedCgroupName), "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil {
return err
}
return nil

View File

@ -17,7 +17,7 @@ limitations under the License.
package e2e_node
import (
"path"
"strings"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
@ -61,12 +61,15 @@ const (
)
// makePodToVerifyCgroups returns a pod that verifies the existence of the specified cgroups.
func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
func makePodToVerifyCgroups(cgroupNames []string) *v1.Pod {
// convert the names to their literal cgroupfs forms...
cgroupFsNames := []string{}
for _, cgroupName := range cgroupNames {
rootCgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup)
for _, baseName := range cgroupNames {
// Add top level cgroup used to enforce node allocatable.
cgroupFsNames = append(cgroupFsNames, toCgroupFsName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName))))
cgroupComponents := strings.Split(baseName, "/")
cgroupName := cm.NewCgroupName(rootCgroupName, cgroupComponents...)
cgroupFsNames = append(cgroupFsNames, toCgroupFsName(cgroupName))
}
glog.Infof("expecting %v cgroups to be found", cgroupFsNames)
// build the pod command to either verify cgroups exist
@ -109,8 +112,10 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
}
// makePodToVerifyCgroupRemoved verfies the specified cgroup does not exist.
func makePodToVerifyCgroupRemoved(cgroupName cm.CgroupName) *v1.Pod {
cgroupFsName := toCgroupFsName(string(cgroupName))
func makePodToVerifyCgroupRemoved(baseName string) *v1.Pod {
components := strings.Split(baseName, "/")
cgroupName := cm.NewCgroupName(cm.RootCgroupName, components...)
cgroupFsName := toCgroupFsName(cgroupName)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod" + string(uuid.NewUUID()),
@ -151,7 +156,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
if !framework.TestContext.KubeletConfig.CgroupsPerQOS {
return
}
cgroupsToVerify := []cm.CgroupName{cm.CgroupName(burstableCgroup), cm.CgroupName(bestEffortCgroup)}
cgroupsToVerify := []string{burstableCgroup, bestEffortCgroup}
pod := makePodToVerifyCgroups(cgroupsToVerify)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
@ -189,7 +194,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
podUID = string(guaranteedPod.UID)
})
By("Checking if the pod cgroup was created", func() {
cgroupsToVerify := []cm.CgroupName{cm.CgroupName("pod" + podUID)}
cgroupsToVerify := []string{"pod" + podUID}
pod := makePodToVerifyCgroups(cgroupsToVerify)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
@ -198,7 +203,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
By("Checking if the pod cgroup was deleted", func() {
gp := int64(1)
Expect(f.PodClient().Delete(guaranteedPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred())
pod := makePodToVerifyCgroupRemoved(cm.CgroupName("pod" + podUID))
pod := makePodToVerifyCgroupRemoved("pod" + podUID)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
Expect(err).NotTo(HaveOccurred())
@ -233,7 +238,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
podUID = string(bestEffortPod.UID)
})
By("Checking if the pod cgroup was created", func() {
cgroupsToVerify := []cm.CgroupName{cm.CgroupName("besteffort/pod" + podUID)}
cgroupsToVerify := []string{"besteffort/pod" + podUID}
pod := makePodToVerifyCgroups(cgroupsToVerify)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
@ -242,7 +247,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
By("Checking if the pod cgroup was deleted", func() {
gp := int64(1)
Expect(f.PodClient().Delete(bestEffortPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred())
pod := makePodToVerifyCgroupRemoved(cm.CgroupName("besteffort/pod" + podUID))
pod := makePodToVerifyCgroupRemoved("besteffort/pod" + podUID)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
Expect(err).NotTo(HaveOccurred())
@ -277,7 +282,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
podUID = string(burstablePod.UID)
})
By("Checking if the pod cgroup was created", func() {
cgroupsToVerify := []cm.CgroupName{cm.CgroupName("burstable/pod" + podUID)}
cgroupsToVerify := []string{"burstable/pod" + podUID}
pod := makePodToVerifyCgroups(cgroupsToVerify)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
@ -286,7 +291,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() {
By("Checking if the pod cgroup was deleted", func() {
gp := int64(1)
Expect(f.PodClient().Delete(burstablePod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred())
pod := makePodToVerifyCgroupRemoved(cm.CgroupName("burstable/pod" + podUID))
pod := makePodToVerifyCgroupRemoved("burstable/pod" + podUID)
f.PodClient().Create(pod)
err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name)
Expect(err).NotTo(HaveOccurred())

View File

@ -387,18 +387,19 @@ func restartKubelet() {
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
}
func toCgroupFsName(cgroup string) string {
func toCgroupFsName(cgroupName cm.CgroupName) string {
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true)
return cgroupName.ToSystemd()
} else {
return cgroupName.ToCgroupfs()
}
return cgroup
}
// reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/)
// to make the kernel reclaim memory in the allocatable cgroup
// the time to reduce pressure may be unbounded, but usually finishes within a second
func reduceAllocatableMemoryUsage() {
cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup))
cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup)))
_, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput()
framework.ExpectNoError(err)
}