From b230fb8ac4070c3c8b5f0388599e7b9c240b3191 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Tue, 27 Mar 2018 21:11:00 -0700 Subject: [PATCH] Use a []string for CgroupName, which is a more accurate internal representation The slice of strings more precisely captures the hierarchic nature of the cgroup paths we use to represent pods and their groupings. It also ensures we're reducing the chances of passing an incorrect path format to a cgroup driver that requires a different path naming, since now explicit conversions are always needed. The new constructor NewCgroupName starts from an existing CgroupName, which enforces a hierarchy where a root is always needed. It also performs checking on the component names to ensure invalid characters ("/" and "_") are not in use. A RootCgroupName for the top of the cgroup hierarchy tree is introduced. This refactor results in a net reduction of around 30 lines of code, mainly with the demise of ConvertCgroupNameToSystemd which had fairly complicated logic in it and was doing just too many things. There's a small TODO in a helper updateSystemdCgroupInfo that was introduced to make this commit possible. That logic really belongs in libcontainer, I'm planning to send a PR there to include it there. (The API already takes a field with that information, only that field is only processed in cgroupfs and not systemd driver, we should fix that.) Tested by running the e2e-node tests on both Ubuntu 16.04 (with cgroupfs driver) and CentOS 7 (with systemd driver.) --- pkg/kubelet/cm/cgroup_manager_linux.go | 244 ++++++++---------- pkg/kubelet/cm/cgroup_manager_linux_test.go | 72 +++--- pkg/kubelet/cm/cgroup_manager_unsupported.go | 20 +- pkg/kubelet/cm/container_manager_linux.go | 13 +- pkg/kubelet/cm/node_container_manager.go | 15 +- pkg/kubelet/cm/pod_container_manager_linux.go | 12 +- pkg/kubelet/cm/pod_container_manager_stub.go | 2 +- pkg/kubelet/cm/qos_container_manager_linux.go | 30 +-- pkg/kubelet/cm/types.go | 12 +- pkg/kubelet/dockershim/BUILD | 1 - pkg/kubelet/dockershim/docker_service.go | 11 +- pkg/kubelet/stats/cadvisor_stats_provider.go | 9 +- test/e2e_node/hugepages_test.go | 11 +- test/e2e_node/node_container_manager_test.go | 32 +-- test/e2e_node/pods_container_manager_test.go | 31 ++- test/e2e_node/util.go | 9 +- 16 files changed, 252 insertions(+), 272 deletions(-) diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 9f23817f16..6a4d65cd85 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -53,74 +53,79 @@ const ( // which is what is expected when interacting with libcontainer var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"} -// ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name. -// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice -// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form. -// For example, it will return /Burstable.slice/Burstable-pod_123_456.slice in above scenario. -func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string { - name := string(cgroupName) - result := "" - if name != "" && name != "/" { - parts := strings.Split(name, "/") - results := []string{} - for _, part := range parts { - // ignore leading stuff - if part == "" { - continue - } - // detect if we are given a systemd style name. - // if so, we do not want to do double encoding. - if IsSystemdStyleName(part) { - part = strings.TrimSuffix(part, systemdSuffix) - separatorIndex := strings.LastIndex(part, "-") - if separatorIndex >= 0 && separatorIndex < len(part) { - part = part[separatorIndex+1:] - } - } else { - // systemd treats - as a step in the hierarchy, we convert all - to _ - part = strings.Replace(part, "-", "_", -1) - } - results = append(results, part) +var RootCgroupName = CgroupName([]string{}) + +// NewCgroupName composes a new cgroup name. +// Use RootCgroupName as base to start at the root. +// This function does some basic check for invalid characters at the name. +func NewCgroupName(base CgroupName, components ...string) CgroupName { + for _, component := range components { + // Forbit using "_" in internal names. When remapping internal + // names to systemd cgroup driver, we want to remap "-" => "_", + // so we forbid "_" so that we can always reverse the mapping. + if strings.Contains(component, "/") || strings.Contains(component, "_") { + panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component)) } - // each part is appended with systemd style - - result = strings.Join(results, "-") - } else { - // root converts to - - result = "-" } - // always have a .slice suffix - if !IsSystemdStyleName(result) { - result = result + systemdSuffix + return CgroupName(append(base, components...)) +} + +func escapeSystemdCgroupName(part string) string { + return strings.Replace(part, "-", "_", -1) +} + +func unescapeSystemdCgroupName(part string) string { + return strings.Replace(part, "_", "-", -1) +} + +// cgroupName.ToSystemd converts the internal cgroup name to a systemd name. +// For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes +// "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice" +// This function always expands the systemd name into the cgroupfs form. If only +// the last part is needed, use path.Base(...) on it to discard the rest. +func (cgroupName CgroupName) ToSystemd() string { + if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") { + return "/" + } + newparts := []string{} + for _, part := range cgroupName { + part = escapeSystemdCgroupName(part) + newparts = append(newparts, part) } - // if the caller desired the result in cgroupfs format... - if outputToCgroupFs { - var err error - result, err = cgroupsystemd.ExpandSlice(result) - if err != nil { - panic(fmt.Errorf("error adapting cgroup name, input: %v, err: %v", name, err)) - } + result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix) + if err != nil { + // Should never happen... + panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err)) } return result } -// ConvertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name. -// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice -// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those -// code areas could use something from libcontainer if we get this style function upstream. -func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) { - // TODO: see if libcontainer systemd implementation could use something similar, and if so, move - // this function up to that library. At that time, it would most likely do validation specific to systemd - // above and beyond the simple assumption here that the base of the path encodes the hierarchy - // per systemd convention. - return path.Base(cgroupfsName), nil +func ParseSystemdToCgroupName(name string) CgroupName { + driverName := path.Base(name) + driverName = strings.TrimSuffix(driverName, systemdSuffix) + parts := strings.Split(driverName, "-") + result := []string{} + for _, part := range parts { + result = append(result, unescapeSystemdCgroupName(part)) + } + return CgroupName(result) +} + +func (cgroupName CgroupName) ToCgroupfs() string { + return "/" + path.Join(cgroupName...) +} + +func ParseCgroupfsToCgroupName(name string) CgroupName { + components := strings.Split(strings.TrimPrefix(name, "/"), "/") + if len(components) == 1 && components[0] == "" { + components = []string{} + } + return CgroupName(components) } func IsSystemdStyleName(name string) bool { - if strings.HasSuffix(name, systemdSuffix) { - return true - } - return false + return strings.HasSuffix(name, systemdSuffix) } // libcontainerAdapter provides a simplified interface to libcontainer based on libcontainer type. @@ -156,34 +161,6 @@ func (l *libcontainerAdapter) newManager(cgroups *libcontainerconfigs.Cgroup, pa return nil, fmt.Errorf("invalid cgroup manager configuration") } -func (l *libcontainerAdapter) revertName(name string) CgroupName { - if l.cgroupManagerType != libcontainerSystemd { - return CgroupName(name) - } - return CgroupName(RevertFromSystemdToCgroupStyleName(name)) -} - -func RevertFromSystemdToCgroupStyleName(name string) string { - driverName, err := ConvertCgroupFsNameToSystemd(name) - if err != nil { - panic(err) - } - driverName = strings.TrimSuffix(driverName, systemdSuffix) - driverName = strings.Replace(driverName, "-", "/", -1) - driverName = strings.Replace(driverName, "_", "-", -1) - return driverName -} - -// adaptName converts a CgroupName identifier to a driver specific conversion value. -// if outputToCgroupFs is true, the result is returned in the cgroupfs format rather than the driver specific form. -func (l *libcontainerAdapter) adaptName(cgroupName CgroupName, outputToCgroupFs bool) string { - if l.cgroupManagerType != libcontainerSystemd { - name := string(cgroupName) - return name - } - return ConvertCgroupNameToSystemd(cgroupName, outputToCgroupFs) -} - // CgroupSubsystems holds information about the mounted cgroup subsystems type CgroupSubsystems struct { // Cgroup subsystem mounts. @@ -223,13 +200,22 @@ func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { } // Name converts the cgroup to the driver specific value in cgroupfs form. +// This always returns a valid cgroupfs path even when systemd driver is in use! func (m *cgroupManagerImpl) Name(name CgroupName) string { - return m.adapter.adaptName(name, true) + if m.adapter.cgroupManagerType == libcontainerSystemd { + return name.ToSystemd() + } else { + return name.ToCgroupfs() + } } // CgroupName converts the literal cgroupfs name on the host to an internal identifier. func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { - return m.adapter.revertName(name) + if m.adapter.cgroupManagerType == libcontainerSystemd { + return ParseSystemdToCgroupName(name) + } else { + return ParseCgroupfsToCgroupName(name) + } } // buildCgroupPaths builds a path to each cgroup subsystem for the specified name. @@ -242,6 +228,22 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string return cgroupPaths } +// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead. +// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent) +// and split it appropriately, using essentially the logic below. +// This was done for cgroupfs in opencontainers/runc#497 but a counterpart +// for systemd was never introduced. +func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupName CgroupName) { + dir, base := path.Split(cgroupName.ToSystemd()) + if dir == "/" { + dir = "-.slice" + } else { + dir = path.Base(dir) + } + cgroupConfig.Parent = dir + cgroupConfig.Name = base +} + // Exists checks if all subsystem cgroups already exist func (m *cgroupManagerImpl) Exists(name CgroupName) bool { // Get map of all cgroup paths on the system for the particular cgroup @@ -278,23 +280,13 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name) - // we take the location in traditional cgroupfs format. - abstractCgroupFsName := string(cgroupConfig.Name) - abstractParent := CgroupName(path.Dir(abstractCgroupFsName)) - abstractName := CgroupName(path.Base(abstractCgroupFsName)) - - driverParent := m.adapter.adaptName(abstractParent, false) - driverName := m.adapter.adaptName(abstractName, false) - - // this is an ugly abstraction bleed, but systemd cgroup driver requires full paths... + libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{} + // libcontainer consumes a different field and expects a different syntax + // depending on the cgroup driver in use, so we need this conditional here. if m.adapter.cgroupManagerType == libcontainerSystemd { - driverName = m.adapter.adaptName(cgroupConfig.Name, false) - } - - // Initialize libcontainer's cgroup config with driver specific naming. - libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Name: driverName, - Parent: driverParent, + updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) + } else { + libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() } manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths) @@ -418,26 +410,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name) - // we take the location in traditional cgroupfs format. - abstractCgroupFsName := string(cgroupConfig.Name) - abstractParent := CgroupName(path.Dir(abstractCgroupFsName)) - abstractName := CgroupName(path.Base(abstractCgroupFsName)) - - driverParent := m.adapter.adaptName(abstractParent, false) - driverName := m.adapter.adaptName(abstractName, false) - - // this is an ugly abstraction bleed, but systemd cgroup driver requires full paths... - if m.adapter.cgroupManagerType == libcontainerSystemd { - driverName = m.adapter.adaptName(cgroupConfig.Name, false) - } - - // Initialize libcontainer's cgroup config libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Name: driverName, - Parent: driverParent, Resources: resources, Paths: cgroupPaths, } + // libcontainer consumes a different field and expects a different syntax + // depending on the cgroup driver in use, so we need this conditional here. + if m.adapter.cgroupManagerType == libcontainerSystemd { + updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) + } else { + libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() + } if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit @@ -456,25 +439,18 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { metrics.CgroupManagerLatency.WithLabelValues("create").Observe(metrics.SinceInMicroseconds(start)) }() - // we take the location in traditional cgroupfs format. - abstractCgroupFsName := string(cgroupConfig.Name) - abstractParent := CgroupName(path.Dir(abstractCgroupFsName)) - abstractName := CgroupName(path.Base(abstractCgroupFsName)) - - driverParent := m.adapter.adaptName(abstractParent, false) - driverName := m.adapter.adaptName(abstractName, false) - // this is an ugly abstraction bleed, but systemd cgroup driver requires full paths... - if m.adapter.cgroupManagerType == libcontainerSystemd { - driverName = m.adapter.adaptName(cgroupConfig.Name, false) - } - resources := m.toResources(cgroupConfig.ResourceParameters) - // Initialize libcontainer's cgroup config with driver specific naming. + libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Name: driverName, - Parent: driverParent, Resources: resources, } + // libcontainer consumes a different field and expects a different syntax + // depending on the cgroup driver in use, so we need this conditional here. + if m.adapter.cgroupManagerType == libcontainerSystemd { + updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) + } else { + libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() + } if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit diff --git a/pkg/kubelet/cm/cgroup_manager_linux_test.go b/pkg/kubelet/cm/cgroup_manager_linux_test.go index 9947c15dac..eb71014c5a 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux_test.go +++ b/pkg/kubelet/cm/cgroup_manager_linux_test.go @@ -18,119 +18,105 @@ limitations under the License. package cm -import "testing" +import ( + "path" + "testing" +) -func TestLibcontainerAdapterAdaptToSystemd(t *testing.T) { +func TestCgroupNameToSystemdBasename(t *testing.T) { testCases := []struct { - input string + input CgroupName expected string }{ { - input: "/", - expected: "-.slice", + input: RootCgroupName, + expected: "/", }, { - input: "/system.slice", + input: NewCgroupName(RootCgroupName, "system"), expected: "system.slice", }, { - input: "/system.slice/Burstable", + input: NewCgroupName(RootCgroupName, "system", "Burstable"), expected: "system-Burstable.slice", }, { - input: "/Burstable.slice/Burstable-pod_123.slice", + input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"), expected: "Burstable-pod_123.slice", }, { - input: "/test.slice/test-a.slice/test-a-b.slice", + input: NewCgroupName(RootCgroupName, "test", "a", "b"), expected: "test-a-b.slice", }, { - input: "/test.slice/test-a.slice/test-a-b.slice/Burstable", + input: NewCgroupName(RootCgroupName, "test", "a", "b", "Burstable"), expected: "test-a-b-Burstable.slice", }, { - input: "/Burstable", + input: NewCgroupName(RootCgroupName, "Burstable"), expected: "Burstable.slice", }, { - input: "/Burstable/pod_123", - expected: "Burstable-pod_123.slice", - }, - { - input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d", + input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"), expected: "BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice", }, } for _, testCase := range testCases { - f := newLibcontainerAdapter(libcontainerSystemd) - if actual := f.adaptName(CgroupName(testCase.input), false); actual != testCase.expected { + if actual := path.Base(testCase.input.ToSystemd()); actual != testCase.expected { t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) } } } -func TestLibcontainerAdapterAdaptToSystemdAsCgroupFs(t *testing.T) { +func TestCgroupNameToSystemd(t *testing.T) { testCases := []struct { - input string + input CgroupName expected string }{ { - input: "/", + input: RootCgroupName, expected: "/", }, { - input: "/Burstable", + input: NewCgroupName(RootCgroupName, "Burstable"), expected: "/Burstable.slice", }, { - input: "/Burstable/pod_123", + input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"), expected: "/Burstable.slice/Burstable-pod_123.slice", }, { - input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d", + input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"), expected: "/BestEffort.slice/BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice", }, { - input: "/kubepods", + input: NewCgroupName(RootCgroupName, "kubepods"), expected: "/kubepods.slice", }, } for _, testCase := range testCases { - f := newLibcontainerAdapter(libcontainerSystemd) - if actual := f.adaptName(CgroupName(testCase.input), true); actual != testCase.expected { + if actual := testCase.input.ToSystemd(); actual != testCase.expected { t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) } } } -func TestLibcontainerAdapterNotAdaptToSystemd(t *testing.T) { - cgroupfs := newLibcontainerAdapter(libcontainerCgroupfs) - otherAdatper := newLibcontainerAdapter(libcontainerCgroupManagerType("test")) - +func TestCgroupNameToCgroupfs(t *testing.T) { testCases := []struct { - input string + input CgroupName expected string }{ { - input: "/", + input: RootCgroupName, expected: "/", }, { - input: "/Burstable", + input: NewCgroupName(RootCgroupName, "Burstable"), expected: "/Burstable", }, - { - input: "", - expected: "", - }, } for _, testCase := range testCases { - if actual := cgroupfs.adaptName(CgroupName(testCase.input), true); actual != testCase.expected { - t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) - } - - if actual := otherAdatper.adaptName(CgroupName(testCase.input), true); actual != testCase.expected { + if actual := testCase.input.ToCgroupfs(); actual != testCase.expected { t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) } } diff --git a/pkg/kubelet/cm/cgroup_manager_unsupported.go b/pkg/kubelet/cm/cgroup_manager_unsupported.go index 34345985cf..5d77ed7a45 100644 --- a/pkg/kubelet/cm/cgroup_manager_unsupported.go +++ b/pkg/kubelet/cm/cgroup_manager_unsupported.go @@ -63,25 +63,35 @@ func (m *unsupportedCgroupManager) Pids(_ CgroupName) []int { } func (m *unsupportedCgroupManager) CgroupName(name string) CgroupName { - return "" + return CgroupName([]string{}) } func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error { return nil } -func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) { - return "", nil +var RootCgroupName = CgroupName([]string{}) + +func NewCgroupName(base CgroupName, components ...string) CgroupName { + return CgroupName(append(base, components...)) } -func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string { +func (cgroupName CgroupName) ToSystemd() string { return "" } -func RevertFromSystemdToCgroupStyleName(name string) string { +func ParseSystemdToCgroupName(name string) CgroupName { + return nil +} + +func (cgroupName CgroupName) ToCgroupfs() string { return "" } +func ParseCgroupfsToCgroupName(name string) CgroupName { + return nil +} + func IsSystemdStyleName(name string) bool { return false } diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 236ac71dc1..b9979f254a 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -123,7 +123,7 @@ type containerManagerImpl struct { capacity v1.ResourceList // Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under. // This path include a top level container for enforcing Node Allocatable. - cgroupRoot string + cgroupRoot CgroupName // Event recorder interface. recorder record.EventRecorder // Interface for QoS cgroup management @@ -223,7 +223,8 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I } capacity = cadvisor.CapacityFromMachineInfo(machineInfo) - cgroupRoot := nodeConfig.CgroupRoot + // Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName + cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot) cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver) // Check if Cgroup-root actually exists on the node if nodeConfig.CgroupsPerQOS { @@ -236,13 +237,13 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I // of note, we always use the cgroupfs driver when performing this check since // the input is provided in that format. // this is important because we do not want any name conversion to occur. - if !cgroupManager.Exists(CgroupName(cgroupRoot)) { + if !cgroupManager.Exists(cgroupRoot) { return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist: %v", cgroupRoot, err) } glog.Infof("container manager verified user specified cgroup-root exists: %v", cgroupRoot) // Include the top level cgroup for enforcing node allocatable into cgroup-root. // This way, all sub modules can avoid having to understand the concept of node allocatable. - cgroupRoot = path.Join(cgroupRoot, defaultNodeAllocatableCgroupName) + cgroupRoot = NewCgroupName(cgroupRoot, defaultNodeAllocatableCgroupName) } glog.Infof("Creating Container Manager object based on Node Config: %+v", nodeConfig) @@ -305,7 +306,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { } } return &podContainerManagerNoop{ - cgroupRoot: CgroupName(cm.cgroupRoot), + cgroupRoot: cm.cgroupRoot, } } @@ -503,7 +504,7 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig { // GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods. func (cm *containerManagerImpl) GetPodCgroupRoot() string { - return cm.cgroupManager.Name(CgroupName(cm.cgroupRoot)) + return cm.cgroupManager.Name(cm.cgroupRoot) } func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems { diff --git a/pkg/kubelet/cm/node_container_manager.go b/pkg/kubelet/cm/node_container_manager.go index b43176b1a3..6c1e022c2f 100644 --- a/pkg/kubelet/cm/node_container_manager.go +++ b/pkg/kubelet/cm/node_container_manager.go @@ -42,7 +42,7 @@ const ( //createNodeAllocatableCgroups creates Node Allocatable Cgroup when CgroupsPerQOS flag is specified as true func (cm *containerManagerImpl) createNodeAllocatableCgroups() error { cgroupConfig := &CgroupConfig{ - Name: CgroupName(cm.cgroupRoot), + Name: cm.cgroupRoot, // The default limits for cpu shares can be very low which can lead to CPU starvation for pods. ResourceParameters: getCgroupConfig(cm.capacity), } @@ -71,7 +71,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc) cgroupConfig := &CgroupConfig{ - Name: CgroupName(cm.cgroupRoot), + Name: cm.cgroupRoot, ResourceParameters: getCgroupConfig(nodeAllocatable), } @@ -88,7 +88,8 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { // Pod Evictions are expected to bring down memory usage to below Node Allocatable limits. // Until evictions happen retry cgroup updates. // Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low. - if cm.cgroupRoot != "/" { + // Check if cgroupRoot is set to a non-empty value (empty would be the root container) + if len(cm.cgroupRoot) > 0 { go func() { for { err := cm.cgroupManager.Update(cgroupConfig) @@ -105,7 +106,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { // Now apply kube reserved and system reserved limits if required. if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedEnforcementKey) { glog.V(2).Infof("Enforcing System reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved) - if err := enforceExistingCgroup(cm.cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil { + if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.SystemReservedCgroupName), nc.SystemReserved); err != nil { message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err) cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message) return fmt.Errorf(message) @@ -114,7 +115,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { } if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedEnforcementKey) { glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved) - if err := enforceExistingCgroup(cm.cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil { + if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.KubeReservedCgroupName), nc.KubeReserved); err != nil { message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err) cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message) return fmt.Errorf(message) @@ -125,9 +126,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { } // enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface. -func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error { +func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error { cgroupConfig := &CgroupConfig{ - Name: CgroupName(cName), + Name: cName, ResourceParameters: getCgroupConfig(rl), } glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory) diff --git a/pkg/kubelet/cm/pod_container_manager_linux.go b/pkg/kubelet/cm/pod_container_manager_linux.go index a9a873691c..a749c9087e 100644 --- a/pkg/kubelet/cm/pod_container_manager_linux.go +++ b/pkg/kubelet/cm/pod_container_manager_linux.go @@ -104,7 +104,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error { func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, string) { podQOS := v1qos.GetPodQOS(pod) // Get the parent QOS container name - var parentContainer string + var parentContainer CgroupName switch podQOS { case v1.PodQOSGuaranteed: parentContainer = m.qosContainersInfo.Guaranteed @@ -116,7 +116,7 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, podContainer := GetPodCgroupNameSuffix(pod.UID) // Get the absolute path of the cgroup - cgroupName := (CgroupName)(path.Join(parentContainer, podContainer)) + cgroupName := NewCgroupName(parentContainer, podContainer) // Get the literal cgroupfs name cgroupfsName := m.cgroupManager.Name(cgroupName) @@ -189,7 +189,7 @@ func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error { func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) { // Map for storing all the found pods on the disk foundPods := make(map[types.UID]CgroupName) - qosContainersList := [3]string{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed} + qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed} // Scan through all the subsystem mounts // and through each QoS cgroup directory for each subsystem mount // If a pod cgroup exists in even a single subsystem mount @@ -197,7 +197,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN for _, val := range m.subsystems.MountPoints { for _, qosContainerName := range qosContainersList { // get the subsystems QoS cgroup absolute name - qcConversion := m.cgroupManager.Name(CgroupName(qosContainerName)) + qcConversion := m.cgroupManager.Name(qosContainerName) qc := path.Join(val, qcConversion) dirInfo, err := ioutil.ReadDir(qc) if err != nil { @@ -219,7 +219,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN internalPath := m.cgroupManager.CgroupName(cgroupfsPath) // we only care about base segment of the converted path since that // is what we are reading currently to know if it is a pod or not. - basePath := path.Base(string(internalPath)) + basePath := internalPath[len(internalPath)-1] if !strings.Contains(basePath, podCgroupNamePrefix) { continue } @@ -259,7 +259,7 @@ func (m *podContainerManagerNoop) EnsureExists(_ *v1.Pod) error { } func (m *podContainerManagerNoop) GetPodContainerName(_ *v1.Pod) (CgroupName, string) { - return m.cgroupRoot, string(m.cgroupRoot) + return m.cgroupRoot, m.cgroupRoot.ToCgroupfs() } func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *v1.Pod) string { diff --git a/pkg/kubelet/cm/pod_container_manager_stub.go b/pkg/kubelet/cm/pod_container_manager_stub.go index 61923f172e..c4cb71156b 100644 --- a/pkg/kubelet/cm/pod_container_manager_stub.go +++ b/pkg/kubelet/cm/pod_container_manager_stub.go @@ -35,7 +35,7 @@ func (m *podContainerManagerStub) EnsureExists(_ *v1.Pod) error { } func (m *podContainerManagerStub) GetPodContainerName(_ *v1.Pod) (CgroupName, string) { - return "", "" + return nil, "" } func (m *podContainerManagerStub) Destroy(_ CgroupName) error { diff --git a/pkg/kubelet/cm/qos_container_manager_linux.go b/pkg/kubelet/cm/qos_container_manager_linux.go index 62380a2b2f..2cfc198c3a 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux.go +++ b/pkg/kubelet/cm/qos_container_manager_linux.go @@ -18,7 +18,6 @@ package cm import ( "fmt" - "path" "strings" "sync" "time" @@ -60,17 +59,17 @@ type qosContainerManagerImpl struct { qosReserved map[v1.ResourceName]int64 } -func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot string, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) { +func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot CgroupName, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) { if !nodeConfig.CgroupsPerQOS { return &qosContainerManagerNoop{ - cgroupRoot: CgroupName(cgroupRoot), + cgroupRoot: cgroupRoot, }, nil } return &qosContainerManagerImpl{ subsystems: subsystems, cgroupManager: cgroupManager, - cgroupRoot: CgroupName(cgroupRoot), + cgroupRoot: cgroupRoot, qosReserved: nodeConfig.QOSReserved, }, nil } @@ -81,23 +80,20 @@ func (m *qosContainerManagerImpl) GetQOSContainersInfo() QOSContainersInfo { func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceList, activePods ActivePodsFunc) error { cm := m.cgroupManager - rootContainer := string(m.cgroupRoot) - if !cm.Exists(CgroupName(rootContainer)) { - return fmt.Errorf("root container %s doesn't exist", rootContainer) + rootContainer := m.cgroupRoot + if !cm.Exists(rootContainer) { + return fmt.Errorf("root container %v doesn't exist", rootContainer) } // Top level for Qos containers are created only for Burstable // and Best Effort classes - qosClasses := map[v1.PodQOSClass]string{ - v1.PodQOSBurstable: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))), - v1.PodQOSBestEffort: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))), + qosClasses := map[v1.PodQOSClass]CgroupName{ + v1.PodQOSBurstable: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))), + v1.PodQOSBestEffort: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))), } // Create containers for both qos classes for qosClass, containerName := range qosClasses { - // get the container's absolute name - absoluteContainerName := CgroupName(containerName) - resourceParameters := &ResourceConfig{} // the BestEffort QoS class has a statically configured minShares value if qosClass == v1.PodQOSBestEffort { @@ -107,7 +103,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis // containerConfig object stores the cgroup specifications containerConfig := &CgroupConfig{ - Name: absoluteContainerName, + Name: containerName, ResourceParameters: resourceParameters, } @@ -117,7 +113,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis } // check if it exists - if !cm.Exists(absoluteContainerName) { + if !cm.Exists(containerName) { if err := cm.Create(containerConfig); err != nil { return fmt.Errorf("failed to create top level %v QOS cgroup : %v", qosClass, err) } @@ -279,11 +275,11 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error { qosConfigs := map[v1.PodQOSClass]*CgroupConfig{ v1.PodQOSBurstable: { - Name: CgroupName(m.qosContainersInfo.Burstable), + Name: m.qosContainersInfo.Burstable, ResourceParameters: &ResourceConfig{}, }, v1.PodQOSBestEffort: { - Name: CgroupName(m.qosContainersInfo.BestEffort), + Name: m.qosContainersInfo.BestEffort, ResourceParameters: &ResourceConfig{}, }, } diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index ce2cc2c826..e95f44340b 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -38,7 +38,9 @@ type ResourceConfig struct { } // CgroupName is the abstract name of a cgroup prior to any driver specific conversion. -type CgroupName string +// It is specified as a list of strings from its individual components, such as: +// {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} +type CgroupName []string // CgroupConfig holds the cgroup configuration information. // This is common object which is used to specify @@ -78,7 +80,7 @@ type CgroupManager interface { Exists(name CgroupName) bool // Name returns the literal cgroupfs name on the host after any driver specific conversions. // We would expect systemd implementation to make appropriate name conversion. - // For example, if we pass /foo/bar + // For example, if we pass {"foo", "bar"} // then systemd should convert the name to something like // foo.slice/foo-bar.slice Name(name CgroupName) string @@ -94,9 +96,9 @@ type CgroupManager interface { // QOSContainersInfo stores the names of containers per qos type QOSContainersInfo struct { - Guaranteed string - BestEffort string - Burstable string + Guaranteed CgroupName + BestEffort CgroupName + Burstable CgroupName } // PodContainerManager stores and manages pod level containers diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index d452a553b0..6f90f2d060 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -85,7 +85,6 @@ go_library( "//pkg/kubelet/checkpointmanager:go_default_library", "//pkg/kubelet/checkpointmanager/checksum:go_default_library", "//pkg/kubelet/checkpointmanager/errors:go_default_library", - "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/dockershim/cm:go_default_library", "//pkg/kubelet/dockershim/libdocker:go_default_library", diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index 1b6f886d94..4cd973af84 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "net/http" + "path" "path/filepath" "sync" "time" @@ -32,7 +33,6 @@ import ( runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - kubecm "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockershim/cm" "k8s.io/kubernetes/pkg/kubelet/dockershim/network" @@ -432,17 +432,14 @@ func (ds *dockerService) ServeHTTP(w http.ResponseWriter, r *http.Request) { // GenerateExpectedCgroupParent returns cgroup parent in syntax expected by cgroup driver func (ds *dockerService) GenerateExpectedCgroupParent(cgroupParent string) (string, error) { - if len(cgroupParent) > 0 { + if cgroupParent != "" { // if docker uses the systemd cgroup driver, it expects *.slice style names for cgroup parent. // if we configured kubelet to use --cgroup-driver=cgroupfs, and docker is configured to use systemd driver // docker will fail to launch the container because the name we provide will not be a valid slice. // this is a very good thing. if ds.cgroupDriver == "systemd" { - systemdCgroupParent, err := kubecm.ConvertCgroupFsNameToSystemd(cgroupParent) - if err != nil { - return "", err - } - cgroupParent = systemdCgroupParent + // Pass only the last component of the cgroup path to systemd. + cgroupParent = path.Base(cgroupParent) } } glog.V(3).Infof("Setting cgroup parent to: %q", cgroupParent) diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 67da08d408..8529872cd5 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -255,9 +255,14 @@ func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool { func getCadvisorPodInfoFromPodUID(podUID types.UID, infos map[string]cadvisorapiv2.ContainerInfo) *cadvisorapiv2.ContainerInfo { for key, info := range infos { if cm.IsSystemdStyleName(key) { - key = cm.RevertFromSystemdToCgroupStyleName(key) + // Convert to internal cgroup name and take the last component only. + internalCgroupName := cm.ParseSystemdToCgroupName(key) + key = internalCgroupName[len(internalCgroupName)-1] + } else { + // Take last component only. + key = path.Base(key) } - if cm.GetPodCgroupNameSuffix(podUID) == path.Base(key) { + if cm.GetPodCgroupNameSuffix(podUID) == key { return &info } } diff --git a/test/e2e_node/hugepages_test.go b/test/e2e_node/hugepages_test.go index a8683dc3cd..cd2ab1e171 100644 --- a/test/e2e_node/hugepages_test.go +++ b/test/e2e_node/hugepages_test.go @@ -19,7 +19,6 @@ package e2e_node import ( "fmt" "os/exec" - "path" "strconv" "strings" "time" @@ -38,14 +37,14 @@ import ( ) // makePodToVerifyHugePages returns a pod that verifies specified cgroup with hugetlb -func makePodToVerifyHugePages(cgroupName cm.CgroupName, hugePagesLimit resource.Quantity) *apiv1.Pod { +func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity) *apiv1.Pod { // convert the cgroup name to its literal form cgroupFsName := "" - cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName))) + cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName) if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { - cgroupFsName = cm.ConvertCgroupNameToSystemd(cgroupName, true) + cgroupFsName = cgroupName.ToSystemd() } else { - cgroupFsName = string(cgroupName) + cgroupFsName = cgroupName.ToCgroupfs() } // this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.limit_in_bytes @@ -184,7 +183,7 @@ func runHugePagesTests(f *framework.Framework) { }) podUID := string(pod.UID) By("checking if the expected hugetlb settings were applied") - verifyPod := makePodToVerifyHugePages(cm.CgroupName("pod"+podUID), resource.MustParse("50Mi")) + verifyPod := makePodToVerifyHugePages("pod"+podUID, resource.MustParse("50Mi")) f.PodClient().Create(verifyPod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) diff --git a/test/e2e_node/node_container_manager_test.go b/test/e2e_node/node_container_manager_test.go index c31bbc96d0..c1bf7e2dce 100644 --- a/test/e2e_node/node_container_manager_test.go +++ b/test/e2e_node/node_container_manager_test.go @@ -21,7 +21,6 @@ package e2e_node import ( "fmt" "io/ioutil" - "path" "path/filepath" "strconv" "strings" @@ -99,8 +98,8 @@ func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resour } const ( - kubeReservedCgroup = "/kube_reserved" - systemReservedCgroup = "/system_reserved" + kubeReservedCgroup = "kube_reserved" + systemReservedCgroup = "system_reserved" ) func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error { @@ -115,13 +114,13 @@ func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error { // Create kube reserved cgroup cgroupConfig := &cm.CgroupConfig{ - Name: cm.CgroupName(kubeReservedCgroup), + Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup), } if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil { return err } // Create system reserved cgroup - cgroupConfig.Name = cm.CgroupName(systemReservedCgroup) + cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) return createIfNotExists(cgroupManager, cgroupConfig) } @@ -129,12 +128,12 @@ func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error { // Create kube reserved cgroup cgroupConfig := &cm.CgroupConfig{ - Name: cm.CgroupName(kubeReservedCgroup), + Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup), } if err := cgroupManager.Destroy(cgroupConfig); err != nil { return err } - cgroupConfig.Name = cm.CgroupName(systemReservedCgroup) + cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) return cgroupManager.Destroy(cgroupConfig) } @@ -173,8 +172,9 @@ func runTest(f *framework.Framework) error { // Set new config and current config. currentConfig := newCfg - expectedNAPodCgroup := path.Join(currentConfig.CgroupRoot, "kubepods") - if !cgroupManager.Exists(cm.CgroupName(expectedNAPodCgroup)) { + expectedNAPodCgroup := cm.ParseCgroupfsToCgroupName(currentConfig.CgroupRoot) + expectedNAPodCgroup = cm.NewCgroupName(expectedNAPodCgroup, "kubepods") + if !cgroupManager.Exists(expectedNAPodCgroup) { return fmt.Errorf("Expected Node Allocatable Cgroup Does not exist") } // TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings. @@ -218,30 +218,32 @@ func runTest(f *framework.Framework) error { return nil }, time.Minute, 5*time.Second).Should(BeNil()) - if !cgroupManager.Exists(cm.CgroupName(kubeReservedCgroup)) { + kubeReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup) + if !cgroupManager.Exists(kubeReservedCgroupName) { return fmt.Errorf("Expected kube reserved cgroup Does not exist") } // Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`. kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceCPU)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], kubeReservedCgroup, "cpu.shares"), int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())), 10); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupManager.Name(kubeReservedCgroupName), "cpu.shares"), int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())), 10); err != nil { return err } // Expect Memory limit kube reserved cgroup to equal configured value `100Mi`. kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceMemory)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], kubeReservedCgroup, "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(kubeReservedCgroupName), "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil { return err } - if !cgroupManager.Exists(cm.CgroupName(systemReservedCgroup)) { + systemReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) + if !cgroupManager.Exists(systemReservedCgroupName) { return fmt.Errorf("Expected system reserved cgroup Does not exist") } // Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`. systemReservedCPU := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceCPU)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], systemReservedCgroup, "cpu.shares"), int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())), 10); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupManager.Name(systemReservedCgroupName), "cpu.shares"), int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())), 10); err != nil { return err } // Expect Memory limit on node allocatable cgroup to equal allocatable. systemReservedMemory := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceMemory)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], systemReservedCgroup, "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(systemReservedCgroupName), "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil { return err } return nil diff --git a/test/e2e_node/pods_container_manager_test.go b/test/e2e_node/pods_container_manager_test.go index b54cee5478..c835548beb 100644 --- a/test/e2e_node/pods_container_manager_test.go +++ b/test/e2e_node/pods_container_manager_test.go @@ -17,7 +17,7 @@ limitations under the License. package e2e_node import ( - "path" + "strings" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -61,12 +61,15 @@ const ( ) // makePodToVerifyCgroups returns a pod that verifies the existence of the specified cgroups. -func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod { +func makePodToVerifyCgroups(cgroupNames []string) *v1.Pod { // convert the names to their literal cgroupfs forms... cgroupFsNames := []string{} - for _, cgroupName := range cgroupNames { + rootCgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup) + for _, baseName := range cgroupNames { // Add top level cgroup used to enforce node allocatable. - cgroupFsNames = append(cgroupFsNames, toCgroupFsName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))) + cgroupComponents := strings.Split(baseName, "/") + cgroupName := cm.NewCgroupName(rootCgroupName, cgroupComponents...) + cgroupFsNames = append(cgroupFsNames, toCgroupFsName(cgroupName)) } glog.Infof("expecting %v cgroups to be found", cgroupFsNames) // build the pod command to either verify cgroups exist @@ -109,8 +112,10 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod { } // makePodToVerifyCgroupRemoved verfies the specified cgroup does not exist. -func makePodToVerifyCgroupRemoved(cgroupName cm.CgroupName) *v1.Pod { - cgroupFsName := toCgroupFsName(string(cgroupName)) +func makePodToVerifyCgroupRemoved(baseName string) *v1.Pod { + components := strings.Split(baseName, "/") + cgroupName := cm.NewCgroupName(cm.RootCgroupName, components...) + cgroupFsName := toCgroupFsName(cgroupName) pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod" + string(uuid.NewUUID()), @@ -151,7 +156,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { if !framework.TestContext.KubeletConfig.CgroupsPerQOS { return } - cgroupsToVerify := []cm.CgroupName{cm.CgroupName(burstableCgroup), cm.CgroupName(bestEffortCgroup)} + cgroupsToVerify := []string{burstableCgroup, bestEffortCgroup} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -189,7 +194,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { podUID = string(guaranteedPod.UID) }) By("Checking if the pod cgroup was created", func() { - cgroupsToVerify := []cm.CgroupName{cm.CgroupName("pod" + podUID)} + cgroupsToVerify := []string{"pod" + podUID} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -198,7 +203,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { By("Checking if the pod cgroup was deleted", func() { gp := int64(1) Expect(f.PodClient().Delete(guaranteedPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred()) - pod := makePodToVerifyCgroupRemoved(cm.CgroupName("pod" + podUID)) + pod := makePodToVerifyCgroupRemoved("pod" + podUID) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) @@ -233,7 +238,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { podUID = string(bestEffortPod.UID) }) By("Checking if the pod cgroup was created", func() { - cgroupsToVerify := []cm.CgroupName{cm.CgroupName("besteffort/pod" + podUID)} + cgroupsToVerify := []string{"besteffort/pod" + podUID} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -242,7 +247,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { By("Checking if the pod cgroup was deleted", func() { gp := int64(1) Expect(f.PodClient().Delete(bestEffortPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred()) - pod := makePodToVerifyCgroupRemoved(cm.CgroupName("besteffort/pod" + podUID)) + pod := makePodToVerifyCgroupRemoved("besteffort/pod" + podUID) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) @@ -277,7 +282,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { podUID = string(burstablePod.UID) }) By("Checking if the pod cgroup was created", func() { - cgroupsToVerify := []cm.CgroupName{cm.CgroupName("burstable/pod" + podUID)} + cgroupsToVerify := []string{"burstable/pod" + podUID} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -286,7 +291,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { By("Checking if the pod cgroup was deleted", func() { gp := int64(1) Expect(f.PodClient().Delete(burstablePod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred()) - pod := makePodToVerifyCgroupRemoved(cm.CgroupName("burstable/pod" + podUID)) + pod := makePodToVerifyCgroupRemoved("burstable/pod" + podUID) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 1faf723379..d86d11d3a4 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -387,18 +387,19 @@ func restartKubelet() { framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) } -func toCgroupFsName(cgroup string) string { +func toCgroupFsName(cgroupName cm.CgroupName) string { if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { - return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true) + return cgroupName.ToSystemd() + } else { + return cgroupName.ToCgroupfs() } - return cgroup } // reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/) // to make the kernel reclaim memory in the allocatable cgroup // the time to reduce pressure may be unbounded, but usually finishes within a second func reduceAllocatableMemoryUsage() { - cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup)) + cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup))) _, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput() framework.ExpectNoError(err) }