diff --git a/pkg/kubelet/cm/cgroup_manager_linux.go b/pkg/kubelet/cm/cgroup_manager_linux.go index 9f23817f16..6a4d65cd85 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux.go +++ b/pkg/kubelet/cm/cgroup_manager_linux.go @@ -53,74 +53,79 @@ const ( // which is what is expected when interacting with libcontainer var hugePageSizeList = []string{"B", "kB", "MB", "GB", "TB", "PB"} -// ConvertCgroupNameToSystemd converts the internal cgroup name to a systemd name. -// For example, the name /Burstable/pod_123-456 becomes Burstable-pod_123_456.slice -// If outputToCgroupFs is true, it expands the systemd name into the cgroupfs form. -// For example, it will return /Burstable.slice/Burstable-pod_123_456.slice in above scenario. -func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string { - name := string(cgroupName) - result := "" - if name != "" && name != "/" { - parts := strings.Split(name, "/") - results := []string{} - for _, part := range parts { - // ignore leading stuff - if part == "" { - continue - } - // detect if we are given a systemd style name. - // if so, we do not want to do double encoding. - if IsSystemdStyleName(part) { - part = strings.TrimSuffix(part, systemdSuffix) - separatorIndex := strings.LastIndex(part, "-") - if separatorIndex >= 0 && separatorIndex < len(part) { - part = part[separatorIndex+1:] - } - } else { - // systemd treats - as a step in the hierarchy, we convert all - to _ - part = strings.Replace(part, "-", "_", -1) - } - results = append(results, part) +var RootCgroupName = CgroupName([]string{}) + +// NewCgroupName composes a new cgroup name. +// Use RootCgroupName as base to start at the root. +// This function does some basic check for invalid characters at the name. +func NewCgroupName(base CgroupName, components ...string) CgroupName { + for _, component := range components { + // Forbit using "_" in internal names. When remapping internal + // names to systemd cgroup driver, we want to remap "-" => "_", + // so we forbid "_" so that we can always reverse the mapping. + if strings.Contains(component, "/") || strings.Contains(component, "_") { + panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component)) } - // each part is appended with systemd style - - result = strings.Join(results, "-") - } else { - // root converts to - - result = "-" } - // always have a .slice suffix - if !IsSystemdStyleName(result) { - result = result + systemdSuffix + return CgroupName(append(base, components...)) +} + +func escapeSystemdCgroupName(part string) string { + return strings.Replace(part, "-", "_", -1) +} + +func unescapeSystemdCgroupName(part string) string { + return strings.Replace(part, "_", "-", -1) +} + +// cgroupName.ToSystemd converts the internal cgroup name to a systemd name. +// For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes +// "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice" +// This function always expands the systemd name into the cgroupfs form. If only +// the last part is needed, use path.Base(...) on it to discard the rest. +func (cgroupName CgroupName) ToSystemd() string { + if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") { + return "/" + } + newparts := []string{} + for _, part := range cgroupName { + part = escapeSystemdCgroupName(part) + newparts = append(newparts, part) } - // if the caller desired the result in cgroupfs format... - if outputToCgroupFs { - var err error - result, err = cgroupsystemd.ExpandSlice(result) - if err != nil { - panic(fmt.Errorf("error adapting cgroup name, input: %v, err: %v", name, err)) - } + result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix) + if err != nil { + // Should never happen... + panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err)) } return result } -// ConvertCgroupFsNameToSystemd converts an expanded cgroupfs name to its systemd name. -// For example, it will convert test.slice/test-a.slice/test-a-b.slice to become test-a-b.slice -// NOTE: this is public right now to allow its usage in dockermanager and dockershim, ideally both those -// code areas could use something from libcontainer if we get this style function upstream. -func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) { - // TODO: see if libcontainer systemd implementation could use something similar, and if so, move - // this function up to that library. At that time, it would most likely do validation specific to systemd - // above and beyond the simple assumption here that the base of the path encodes the hierarchy - // per systemd convention. - return path.Base(cgroupfsName), nil +func ParseSystemdToCgroupName(name string) CgroupName { + driverName := path.Base(name) + driverName = strings.TrimSuffix(driverName, systemdSuffix) + parts := strings.Split(driverName, "-") + result := []string{} + for _, part := range parts { + result = append(result, unescapeSystemdCgroupName(part)) + } + return CgroupName(result) +} + +func (cgroupName CgroupName) ToCgroupfs() string { + return "/" + path.Join(cgroupName...) +} + +func ParseCgroupfsToCgroupName(name string) CgroupName { + components := strings.Split(strings.TrimPrefix(name, "/"), "/") + if len(components) == 1 && components[0] == "" { + components = []string{} + } + return CgroupName(components) } func IsSystemdStyleName(name string) bool { - if strings.HasSuffix(name, systemdSuffix) { - return true - } - return false + return strings.HasSuffix(name, systemdSuffix) } // libcontainerAdapter provides a simplified interface to libcontainer based on libcontainer type. @@ -156,34 +161,6 @@ func (l *libcontainerAdapter) newManager(cgroups *libcontainerconfigs.Cgroup, pa return nil, fmt.Errorf("invalid cgroup manager configuration") } -func (l *libcontainerAdapter) revertName(name string) CgroupName { - if l.cgroupManagerType != libcontainerSystemd { - return CgroupName(name) - } - return CgroupName(RevertFromSystemdToCgroupStyleName(name)) -} - -func RevertFromSystemdToCgroupStyleName(name string) string { - driverName, err := ConvertCgroupFsNameToSystemd(name) - if err != nil { - panic(err) - } - driverName = strings.TrimSuffix(driverName, systemdSuffix) - driverName = strings.Replace(driverName, "-", "/", -1) - driverName = strings.Replace(driverName, "_", "-", -1) - return driverName -} - -// adaptName converts a CgroupName identifier to a driver specific conversion value. -// if outputToCgroupFs is true, the result is returned in the cgroupfs format rather than the driver specific form. -func (l *libcontainerAdapter) adaptName(cgroupName CgroupName, outputToCgroupFs bool) string { - if l.cgroupManagerType != libcontainerSystemd { - name := string(cgroupName) - return name - } - return ConvertCgroupNameToSystemd(cgroupName, outputToCgroupFs) -} - // CgroupSubsystems holds information about the mounted cgroup subsystems type CgroupSubsystems struct { // Cgroup subsystem mounts. @@ -223,13 +200,22 @@ func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { } // Name converts the cgroup to the driver specific value in cgroupfs form. +// This always returns a valid cgroupfs path even when systemd driver is in use! func (m *cgroupManagerImpl) Name(name CgroupName) string { - return m.adapter.adaptName(name, true) + if m.adapter.cgroupManagerType == libcontainerSystemd { + return name.ToSystemd() + } else { + return name.ToCgroupfs() + } } // CgroupName converts the literal cgroupfs name on the host to an internal identifier. func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { - return m.adapter.revertName(name) + if m.adapter.cgroupManagerType == libcontainerSystemd { + return ParseSystemdToCgroupName(name) + } else { + return ParseCgroupfsToCgroupName(name) + } } // buildCgroupPaths builds a path to each cgroup subsystem for the specified name. @@ -242,6 +228,22 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string return cgroupPaths } +// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead. +// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent) +// and split it appropriately, using essentially the logic below. +// This was done for cgroupfs in opencontainers/runc#497 but a counterpart +// for systemd was never introduced. +func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupName CgroupName) { + dir, base := path.Split(cgroupName.ToSystemd()) + if dir == "/" { + dir = "-.slice" + } else { + dir = path.Base(dir) + } + cgroupConfig.Parent = dir + cgroupConfig.Name = base +} + // Exists checks if all subsystem cgroups already exist func (m *cgroupManagerImpl) Exists(name CgroupName) bool { // Get map of all cgroup paths on the system for the particular cgroup @@ -278,23 +280,13 @@ func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name) - // we take the location in traditional cgroupfs format. - abstractCgroupFsName := string(cgroupConfig.Name) - abstractParent := CgroupName(path.Dir(abstractCgroupFsName)) - abstractName := CgroupName(path.Base(abstractCgroupFsName)) - - driverParent := m.adapter.adaptName(abstractParent, false) - driverName := m.adapter.adaptName(abstractName, false) - - // this is an ugly abstraction bleed, but systemd cgroup driver requires full paths... + libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{} + // libcontainer consumes a different field and expects a different syntax + // depending on the cgroup driver in use, so we need this conditional here. if m.adapter.cgroupManagerType == libcontainerSystemd { - driverName = m.adapter.adaptName(cgroupConfig.Name, false) - } - - // Initialize libcontainer's cgroup config with driver specific naming. - libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Name: driverName, - Parent: driverParent, + updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) + } else { + libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() } manager, err := m.adapter.newManager(libcontainerCgroupConfig, cgroupPaths) @@ -418,26 +410,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name) - // we take the location in traditional cgroupfs format. - abstractCgroupFsName := string(cgroupConfig.Name) - abstractParent := CgroupName(path.Dir(abstractCgroupFsName)) - abstractName := CgroupName(path.Base(abstractCgroupFsName)) - - driverParent := m.adapter.adaptName(abstractParent, false) - driverName := m.adapter.adaptName(abstractName, false) - - // this is an ugly abstraction bleed, but systemd cgroup driver requires full paths... - if m.adapter.cgroupManagerType == libcontainerSystemd { - driverName = m.adapter.adaptName(cgroupConfig.Name, false) - } - - // Initialize libcontainer's cgroup config libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Name: driverName, - Parent: driverParent, Resources: resources, Paths: cgroupPaths, } + // libcontainer consumes a different field and expects a different syntax + // depending on the cgroup driver in use, so we need this conditional here. + if m.adapter.cgroupManagerType == libcontainerSystemd { + updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) + } else { + libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() + } if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit @@ -456,25 +439,18 @@ func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { metrics.CgroupManagerLatency.WithLabelValues("create").Observe(metrics.SinceInMicroseconds(start)) }() - // we take the location in traditional cgroupfs format. - abstractCgroupFsName := string(cgroupConfig.Name) - abstractParent := CgroupName(path.Dir(abstractCgroupFsName)) - abstractName := CgroupName(path.Base(abstractCgroupFsName)) - - driverParent := m.adapter.adaptName(abstractParent, false) - driverName := m.adapter.adaptName(abstractName, false) - // this is an ugly abstraction bleed, but systemd cgroup driver requires full paths... - if m.adapter.cgroupManagerType == libcontainerSystemd { - driverName = m.adapter.adaptName(cgroupConfig.Name, false) - } - resources := m.toResources(cgroupConfig.ResourceParameters) - // Initialize libcontainer's cgroup config with driver specific naming. + libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{ - Name: driverName, - Parent: driverParent, Resources: resources, } + // libcontainer consumes a different field and expects a different syntax + // depending on the cgroup driver in use, so we need this conditional here. + if m.adapter.cgroupManagerType == libcontainerSystemd { + updateSystemdCgroupInfo(libcontainerCgroupConfig, cgroupConfig.Name) + } else { + libcontainerCgroupConfig.Path = cgroupConfig.Name.ToCgroupfs() + } if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) && cgroupConfig.ResourceParameters.PodPidsLimit != nil { libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PodPidsLimit diff --git a/pkg/kubelet/cm/cgroup_manager_linux_test.go b/pkg/kubelet/cm/cgroup_manager_linux_test.go index 9947c15dac..eb71014c5a 100644 --- a/pkg/kubelet/cm/cgroup_manager_linux_test.go +++ b/pkg/kubelet/cm/cgroup_manager_linux_test.go @@ -18,119 +18,105 @@ limitations under the License. package cm -import "testing" +import ( + "path" + "testing" +) -func TestLibcontainerAdapterAdaptToSystemd(t *testing.T) { +func TestCgroupNameToSystemdBasename(t *testing.T) { testCases := []struct { - input string + input CgroupName expected string }{ { - input: "/", - expected: "-.slice", + input: RootCgroupName, + expected: "/", }, { - input: "/system.slice", + input: NewCgroupName(RootCgroupName, "system"), expected: "system.slice", }, { - input: "/system.slice/Burstable", + input: NewCgroupName(RootCgroupName, "system", "Burstable"), expected: "system-Burstable.slice", }, { - input: "/Burstable.slice/Burstable-pod_123.slice", + input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"), expected: "Burstable-pod_123.slice", }, { - input: "/test.slice/test-a.slice/test-a-b.slice", + input: NewCgroupName(RootCgroupName, "test", "a", "b"), expected: "test-a-b.slice", }, { - input: "/test.slice/test-a.slice/test-a-b.slice/Burstable", + input: NewCgroupName(RootCgroupName, "test", "a", "b", "Burstable"), expected: "test-a-b-Burstable.slice", }, { - input: "/Burstable", + input: NewCgroupName(RootCgroupName, "Burstable"), expected: "Burstable.slice", }, { - input: "/Burstable/pod_123", - expected: "Burstable-pod_123.slice", - }, - { - input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d", + input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"), expected: "BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice", }, } for _, testCase := range testCases { - f := newLibcontainerAdapter(libcontainerSystemd) - if actual := f.adaptName(CgroupName(testCase.input), false); actual != testCase.expected { + if actual := path.Base(testCase.input.ToSystemd()); actual != testCase.expected { t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) } } } -func TestLibcontainerAdapterAdaptToSystemdAsCgroupFs(t *testing.T) { +func TestCgroupNameToSystemd(t *testing.T) { testCases := []struct { - input string + input CgroupName expected string }{ { - input: "/", + input: RootCgroupName, expected: "/", }, { - input: "/Burstable", + input: NewCgroupName(RootCgroupName, "Burstable"), expected: "/Burstable.slice", }, { - input: "/Burstable/pod_123", + input: NewCgroupName(RootCgroupName, "Burstable", "pod-123"), expected: "/Burstable.slice/Burstable-pod_123.slice", }, { - input: "/BestEffort/pod_6c1a4e95-6bb6-11e6-bc26-28d2444e470d", + input: NewCgroupName(RootCgroupName, "BestEffort", "pod-6c1a4e95-6bb6-11e6-bc26-28d2444e470d"), expected: "/BestEffort.slice/BestEffort-pod_6c1a4e95_6bb6_11e6_bc26_28d2444e470d.slice", }, { - input: "/kubepods", + input: NewCgroupName(RootCgroupName, "kubepods"), expected: "/kubepods.slice", }, } for _, testCase := range testCases { - f := newLibcontainerAdapter(libcontainerSystemd) - if actual := f.adaptName(CgroupName(testCase.input), true); actual != testCase.expected { + if actual := testCase.input.ToSystemd(); actual != testCase.expected { t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) } } } -func TestLibcontainerAdapterNotAdaptToSystemd(t *testing.T) { - cgroupfs := newLibcontainerAdapter(libcontainerCgroupfs) - otherAdatper := newLibcontainerAdapter(libcontainerCgroupManagerType("test")) - +func TestCgroupNameToCgroupfs(t *testing.T) { testCases := []struct { - input string + input CgroupName expected string }{ { - input: "/", + input: RootCgroupName, expected: "/", }, { - input: "/Burstable", + input: NewCgroupName(RootCgroupName, "Burstable"), expected: "/Burstable", }, - { - input: "", - expected: "", - }, } for _, testCase := range testCases { - if actual := cgroupfs.adaptName(CgroupName(testCase.input), true); actual != testCase.expected { - t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) - } - - if actual := otherAdatper.adaptName(CgroupName(testCase.input), true); actual != testCase.expected { + if actual := testCase.input.ToCgroupfs(); actual != testCase.expected { t.Errorf("Unexpected result, input: %v, expected: %v, actual: %v", testCase.input, testCase.expected, actual) } } diff --git a/pkg/kubelet/cm/cgroup_manager_unsupported.go b/pkg/kubelet/cm/cgroup_manager_unsupported.go index 34345985cf..5d77ed7a45 100644 --- a/pkg/kubelet/cm/cgroup_manager_unsupported.go +++ b/pkg/kubelet/cm/cgroup_manager_unsupported.go @@ -63,25 +63,35 @@ func (m *unsupportedCgroupManager) Pids(_ CgroupName) []int { } func (m *unsupportedCgroupManager) CgroupName(name string) CgroupName { - return "" + return CgroupName([]string{}) } func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error { return nil } -func ConvertCgroupFsNameToSystemd(cgroupfsName string) (string, error) { - return "", nil +var RootCgroupName = CgroupName([]string{}) + +func NewCgroupName(base CgroupName, components ...string) CgroupName { + return CgroupName(append(base, components...)) } -func ConvertCgroupNameToSystemd(cgroupName CgroupName, outputToCgroupFs bool) string { +func (cgroupName CgroupName) ToSystemd() string { return "" } -func RevertFromSystemdToCgroupStyleName(name string) string { +func ParseSystemdToCgroupName(name string) CgroupName { + return nil +} + +func (cgroupName CgroupName) ToCgroupfs() string { return "" } +func ParseCgroupfsToCgroupName(name string) CgroupName { + return nil +} + func IsSystemdStyleName(name string) bool { return false } diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index 236ac71dc1..b9979f254a 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -123,7 +123,7 @@ type containerManagerImpl struct { capacity v1.ResourceList // Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under. // This path include a top level container for enforcing Node Allocatable. - cgroupRoot string + cgroupRoot CgroupName // Event recorder interface. recorder record.EventRecorder // Interface for QoS cgroup management @@ -223,7 +223,8 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I } capacity = cadvisor.CapacityFromMachineInfo(machineInfo) - cgroupRoot := nodeConfig.CgroupRoot + // Turn CgroupRoot from a string (in cgroupfs path format) to internal CgroupName + cgroupRoot := ParseCgroupfsToCgroupName(nodeConfig.CgroupRoot) cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver) // Check if Cgroup-root actually exists on the node if nodeConfig.CgroupsPerQOS { @@ -236,13 +237,13 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I // of note, we always use the cgroupfs driver when performing this check since // the input is provided in that format. // this is important because we do not want any name conversion to occur. - if !cgroupManager.Exists(CgroupName(cgroupRoot)) { + if !cgroupManager.Exists(cgroupRoot) { return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist: %v", cgroupRoot, err) } glog.Infof("container manager verified user specified cgroup-root exists: %v", cgroupRoot) // Include the top level cgroup for enforcing node allocatable into cgroup-root. // This way, all sub modules can avoid having to understand the concept of node allocatable. - cgroupRoot = path.Join(cgroupRoot, defaultNodeAllocatableCgroupName) + cgroupRoot = NewCgroupName(cgroupRoot, defaultNodeAllocatableCgroupName) } glog.Infof("Creating Container Manager object based on Node Config: %+v", nodeConfig) @@ -305,7 +306,7 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager { } } return &podContainerManagerNoop{ - cgroupRoot: CgroupName(cm.cgroupRoot), + cgroupRoot: cm.cgroupRoot, } } @@ -503,7 +504,7 @@ func (cm *containerManagerImpl) GetNodeConfig() NodeConfig { // GetPodCgroupRoot returns the literal cgroupfs value for the cgroup containing all pods. func (cm *containerManagerImpl) GetPodCgroupRoot() string { - return cm.cgroupManager.Name(CgroupName(cm.cgroupRoot)) + return cm.cgroupManager.Name(cm.cgroupRoot) } func (cm *containerManagerImpl) GetMountedSubsystems() *CgroupSubsystems { diff --git a/pkg/kubelet/cm/node_container_manager.go b/pkg/kubelet/cm/node_container_manager.go index b43176b1a3..6c1e022c2f 100644 --- a/pkg/kubelet/cm/node_container_manager.go +++ b/pkg/kubelet/cm/node_container_manager.go @@ -42,7 +42,7 @@ const ( //createNodeAllocatableCgroups creates Node Allocatable Cgroup when CgroupsPerQOS flag is specified as true func (cm *containerManagerImpl) createNodeAllocatableCgroups() error { cgroupConfig := &CgroupConfig{ - Name: CgroupName(cm.cgroupRoot), + Name: cm.cgroupRoot, // The default limits for cpu shares can be very low which can lead to CPU starvation for pods. ResourceParameters: getCgroupConfig(cm.capacity), } @@ -71,7 +71,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc) cgroupConfig := &CgroupConfig{ - Name: CgroupName(cm.cgroupRoot), + Name: cm.cgroupRoot, ResourceParameters: getCgroupConfig(nodeAllocatable), } @@ -88,7 +88,8 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { // Pod Evictions are expected to bring down memory usage to below Node Allocatable limits. // Until evictions happen retry cgroup updates. // Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low. - if cm.cgroupRoot != "/" { + // Check if cgroupRoot is set to a non-empty value (empty would be the root container) + if len(cm.cgroupRoot) > 0 { go func() { for { err := cm.cgroupManager.Update(cgroupConfig) @@ -105,7 +106,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { // Now apply kube reserved and system reserved limits if required. if nc.EnforceNodeAllocatable.Has(kubetypes.SystemReservedEnforcementKey) { glog.V(2).Infof("Enforcing System reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved) - if err := enforceExistingCgroup(cm.cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil { + if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.SystemReservedCgroupName), nc.SystemReserved); err != nil { message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err) cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message) return fmt.Errorf(message) @@ -114,7 +115,7 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { } if nc.EnforceNodeAllocatable.Has(kubetypes.KubeReservedEnforcementKey) { glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved) - if err := enforceExistingCgroup(cm.cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil { + if err := enforceExistingCgroup(cm.cgroupManager, ParseCgroupfsToCgroupName(nc.KubeReservedCgroupName), nc.KubeReserved); err != nil { message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err) cm.recorder.Event(nodeRef, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message) return fmt.Errorf(message) @@ -125,9 +126,9 @@ func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error { } // enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface. -func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error { +func enforceExistingCgroup(cgroupManager CgroupManager, cName CgroupName, rl v1.ResourceList) error { cgroupConfig := &CgroupConfig{ - Name: CgroupName(cName), + Name: cName, ResourceParameters: getCgroupConfig(rl), } glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory) diff --git a/pkg/kubelet/cm/pod_container_manager_linux.go b/pkg/kubelet/cm/pod_container_manager_linux.go index a9a873691c..a749c9087e 100644 --- a/pkg/kubelet/cm/pod_container_manager_linux.go +++ b/pkg/kubelet/cm/pod_container_manager_linux.go @@ -104,7 +104,7 @@ func (m *podContainerManagerImpl) EnsureExists(pod *v1.Pod) error { func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, string) { podQOS := v1qos.GetPodQOS(pod) // Get the parent QOS container name - var parentContainer string + var parentContainer CgroupName switch podQOS { case v1.PodQOSGuaranteed: parentContainer = m.qosContainersInfo.Guaranteed @@ -116,7 +116,7 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName, podContainer := GetPodCgroupNameSuffix(pod.UID) // Get the absolute path of the cgroup - cgroupName := (CgroupName)(path.Join(parentContainer, podContainer)) + cgroupName := NewCgroupName(parentContainer, podContainer) // Get the literal cgroupfs name cgroupfsName := m.cgroupManager.Name(cgroupName) @@ -189,7 +189,7 @@ func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error { func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) { // Map for storing all the found pods on the disk foundPods := make(map[types.UID]CgroupName) - qosContainersList := [3]string{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed} + qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed} // Scan through all the subsystem mounts // and through each QoS cgroup directory for each subsystem mount // If a pod cgroup exists in even a single subsystem mount @@ -197,7 +197,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN for _, val := range m.subsystems.MountPoints { for _, qosContainerName := range qosContainersList { // get the subsystems QoS cgroup absolute name - qcConversion := m.cgroupManager.Name(CgroupName(qosContainerName)) + qcConversion := m.cgroupManager.Name(qosContainerName) qc := path.Join(val, qcConversion) dirInfo, err := ioutil.ReadDir(qc) if err != nil { @@ -219,7 +219,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN internalPath := m.cgroupManager.CgroupName(cgroupfsPath) // we only care about base segment of the converted path since that // is what we are reading currently to know if it is a pod or not. - basePath := path.Base(string(internalPath)) + basePath := internalPath[len(internalPath)-1] if !strings.Contains(basePath, podCgroupNamePrefix) { continue } @@ -259,7 +259,7 @@ func (m *podContainerManagerNoop) EnsureExists(_ *v1.Pod) error { } func (m *podContainerManagerNoop) GetPodContainerName(_ *v1.Pod) (CgroupName, string) { - return m.cgroupRoot, string(m.cgroupRoot) + return m.cgroupRoot, m.cgroupRoot.ToCgroupfs() } func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *v1.Pod) string { diff --git a/pkg/kubelet/cm/pod_container_manager_stub.go b/pkg/kubelet/cm/pod_container_manager_stub.go index 61923f172e..c4cb71156b 100644 --- a/pkg/kubelet/cm/pod_container_manager_stub.go +++ b/pkg/kubelet/cm/pod_container_manager_stub.go @@ -35,7 +35,7 @@ func (m *podContainerManagerStub) EnsureExists(_ *v1.Pod) error { } func (m *podContainerManagerStub) GetPodContainerName(_ *v1.Pod) (CgroupName, string) { - return "", "" + return nil, "" } func (m *podContainerManagerStub) Destroy(_ CgroupName) error { diff --git a/pkg/kubelet/cm/qos_container_manager_linux.go b/pkg/kubelet/cm/qos_container_manager_linux.go index 62380a2b2f..2cfc198c3a 100644 --- a/pkg/kubelet/cm/qos_container_manager_linux.go +++ b/pkg/kubelet/cm/qos_container_manager_linux.go @@ -18,7 +18,6 @@ package cm import ( "fmt" - "path" "strings" "sync" "time" @@ -60,17 +59,17 @@ type qosContainerManagerImpl struct { qosReserved map[v1.ResourceName]int64 } -func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot string, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) { +func NewQOSContainerManager(subsystems *CgroupSubsystems, cgroupRoot CgroupName, nodeConfig NodeConfig, cgroupManager CgroupManager) (QOSContainerManager, error) { if !nodeConfig.CgroupsPerQOS { return &qosContainerManagerNoop{ - cgroupRoot: CgroupName(cgroupRoot), + cgroupRoot: cgroupRoot, }, nil } return &qosContainerManagerImpl{ subsystems: subsystems, cgroupManager: cgroupManager, - cgroupRoot: CgroupName(cgroupRoot), + cgroupRoot: cgroupRoot, qosReserved: nodeConfig.QOSReserved, }, nil } @@ -81,23 +80,20 @@ func (m *qosContainerManagerImpl) GetQOSContainersInfo() QOSContainersInfo { func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceList, activePods ActivePodsFunc) error { cm := m.cgroupManager - rootContainer := string(m.cgroupRoot) - if !cm.Exists(CgroupName(rootContainer)) { - return fmt.Errorf("root container %s doesn't exist", rootContainer) + rootContainer := m.cgroupRoot + if !cm.Exists(rootContainer) { + return fmt.Errorf("root container %v doesn't exist", rootContainer) } // Top level for Qos containers are created only for Burstable // and Best Effort classes - qosClasses := map[v1.PodQOSClass]string{ - v1.PodQOSBurstable: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))), - v1.PodQOSBestEffort: path.Join(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))), + qosClasses := map[v1.PodQOSClass]CgroupName{ + v1.PodQOSBurstable: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBurstable))), + v1.PodQOSBestEffort: NewCgroupName(rootContainer, strings.ToLower(string(v1.PodQOSBestEffort))), } // Create containers for both qos classes for qosClass, containerName := range qosClasses { - // get the container's absolute name - absoluteContainerName := CgroupName(containerName) - resourceParameters := &ResourceConfig{} // the BestEffort QoS class has a statically configured minShares value if qosClass == v1.PodQOSBestEffort { @@ -107,7 +103,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis // containerConfig object stores the cgroup specifications containerConfig := &CgroupConfig{ - Name: absoluteContainerName, + Name: containerName, ResourceParameters: resourceParameters, } @@ -117,7 +113,7 @@ func (m *qosContainerManagerImpl) Start(getNodeAllocatable func() v1.ResourceLis } // check if it exists - if !cm.Exists(absoluteContainerName) { + if !cm.Exists(containerName) { if err := cm.Create(containerConfig); err != nil { return fmt.Errorf("failed to create top level %v QOS cgroup : %v", qosClass, err) } @@ -279,11 +275,11 @@ func (m *qosContainerManagerImpl) UpdateCgroups() error { qosConfigs := map[v1.PodQOSClass]*CgroupConfig{ v1.PodQOSBurstable: { - Name: CgroupName(m.qosContainersInfo.Burstable), + Name: m.qosContainersInfo.Burstable, ResourceParameters: &ResourceConfig{}, }, v1.PodQOSBestEffort: { - Name: CgroupName(m.qosContainersInfo.BestEffort), + Name: m.qosContainersInfo.BestEffort, ResourceParameters: &ResourceConfig{}, }, } diff --git a/pkg/kubelet/cm/types.go b/pkg/kubelet/cm/types.go index ce2cc2c826..e95f44340b 100644 --- a/pkg/kubelet/cm/types.go +++ b/pkg/kubelet/cm/types.go @@ -38,7 +38,9 @@ type ResourceConfig struct { } // CgroupName is the abstract name of a cgroup prior to any driver specific conversion. -type CgroupName string +// It is specified as a list of strings from its individual components, such as: +// {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} +type CgroupName []string // CgroupConfig holds the cgroup configuration information. // This is common object which is used to specify @@ -78,7 +80,7 @@ type CgroupManager interface { Exists(name CgroupName) bool // Name returns the literal cgroupfs name on the host after any driver specific conversions. // We would expect systemd implementation to make appropriate name conversion. - // For example, if we pass /foo/bar + // For example, if we pass {"foo", "bar"} // then systemd should convert the name to something like // foo.slice/foo-bar.slice Name(name CgroupName) string @@ -94,9 +96,9 @@ type CgroupManager interface { // QOSContainersInfo stores the names of containers per qos type QOSContainersInfo struct { - Guaranteed string - BestEffort string - Burstable string + Guaranteed CgroupName + BestEffort CgroupName + Burstable CgroupName } // PodContainerManager stores and manages pod level containers diff --git a/pkg/kubelet/dockershim/BUILD b/pkg/kubelet/dockershim/BUILD index d452a553b0..6f90f2d060 100644 --- a/pkg/kubelet/dockershim/BUILD +++ b/pkg/kubelet/dockershim/BUILD @@ -85,7 +85,6 @@ go_library( "//pkg/kubelet/checkpointmanager:go_default_library", "//pkg/kubelet/checkpointmanager/checksum:go_default_library", "//pkg/kubelet/checkpointmanager/errors:go_default_library", - "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/dockershim/cm:go_default_library", "//pkg/kubelet/dockershim/libdocker:go_default_library", diff --git a/pkg/kubelet/dockershim/docker_service.go b/pkg/kubelet/dockershim/docker_service.go index 1b6f886d94..4cd973af84 100644 --- a/pkg/kubelet/dockershim/docker_service.go +++ b/pkg/kubelet/dockershim/docker_service.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "net/http" + "path" "path/filepath" "sync" "time" @@ -32,7 +33,6 @@ import ( runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2" "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - kubecm "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/dockershim/cm" "k8s.io/kubernetes/pkg/kubelet/dockershim/network" @@ -432,17 +432,14 @@ func (ds *dockerService) ServeHTTP(w http.ResponseWriter, r *http.Request) { // GenerateExpectedCgroupParent returns cgroup parent in syntax expected by cgroup driver func (ds *dockerService) GenerateExpectedCgroupParent(cgroupParent string) (string, error) { - if len(cgroupParent) > 0 { + if cgroupParent != "" { // if docker uses the systemd cgroup driver, it expects *.slice style names for cgroup parent. // if we configured kubelet to use --cgroup-driver=cgroupfs, and docker is configured to use systemd driver // docker will fail to launch the container because the name we provide will not be a valid slice. // this is a very good thing. if ds.cgroupDriver == "systemd" { - systemdCgroupParent, err := kubecm.ConvertCgroupFsNameToSystemd(cgroupParent) - if err != nil { - return "", err - } - cgroupParent = systemdCgroupParent + // Pass only the last component of the cgroup path to systemd. + cgroupParent = path.Base(cgroupParent) } } glog.V(3).Infof("Setting cgroup parent to: %q", cgroupParent) diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 67da08d408..8529872cd5 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -255,9 +255,14 @@ func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool { func getCadvisorPodInfoFromPodUID(podUID types.UID, infos map[string]cadvisorapiv2.ContainerInfo) *cadvisorapiv2.ContainerInfo { for key, info := range infos { if cm.IsSystemdStyleName(key) { - key = cm.RevertFromSystemdToCgroupStyleName(key) + // Convert to internal cgroup name and take the last component only. + internalCgroupName := cm.ParseSystemdToCgroupName(key) + key = internalCgroupName[len(internalCgroupName)-1] + } else { + // Take last component only. + key = path.Base(key) } - if cm.GetPodCgroupNameSuffix(podUID) == path.Base(key) { + if cm.GetPodCgroupNameSuffix(podUID) == key { return &info } } diff --git a/test/e2e_node/hugepages_test.go b/test/e2e_node/hugepages_test.go index a8683dc3cd..cd2ab1e171 100644 --- a/test/e2e_node/hugepages_test.go +++ b/test/e2e_node/hugepages_test.go @@ -19,7 +19,6 @@ package e2e_node import ( "fmt" "os/exec" - "path" "strconv" "strings" "time" @@ -38,14 +37,14 @@ import ( ) // makePodToVerifyHugePages returns a pod that verifies specified cgroup with hugetlb -func makePodToVerifyHugePages(cgroupName cm.CgroupName, hugePagesLimit resource.Quantity) *apiv1.Pod { +func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity) *apiv1.Pod { // convert the cgroup name to its literal form cgroupFsName := "" - cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName))) + cgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup, baseName) if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { - cgroupFsName = cm.ConvertCgroupNameToSystemd(cgroupName, true) + cgroupFsName = cgroupName.ToSystemd() } else { - cgroupFsName = string(cgroupName) + cgroupFsName = cgroupName.ToCgroupfs() } // this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.limit_in_bytes @@ -184,7 +183,7 @@ func runHugePagesTests(f *framework.Framework) { }) podUID := string(pod.UID) By("checking if the expected hugetlb settings were applied") - verifyPod := makePodToVerifyHugePages(cm.CgroupName("pod"+podUID), resource.MustParse("50Mi")) + verifyPod := makePodToVerifyHugePages("pod"+podUID, resource.MustParse("50Mi")) f.PodClient().Create(verifyPod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, verifyPod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) diff --git a/test/e2e_node/node_container_manager_test.go b/test/e2e_node/node_container_manager_test.go index c31bbc96d0..c1bf7e2dce 100644 --- a/test/e2e_node/node_container_manager_test.go +++ b/test/e2e_node/node_container_manager_test.go @@ -21,7 +21,6 @@ package e2e_node import ( "fmt" "io/ioutil" - "path" "path/filepath" "strconv" "strings" @@ -99,8 +98,8 @@ func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resour } const ( - kubeReservedCgroup = "/kube_reserved" - systemReservedCgroup = "/system_reserved" + kubeReservedCgroup = "kube_reserved" + systemReservedCgroup = "system_reserved" ) func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error { @@ -115,13 +114,13 @@ func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error { // Create kube reserved cgroup cgroupConfig := &cm.CgroupConfig{ - Name: cm.CgroupName(kubeReservedCgroup), + Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup), } if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil { return err } // Create system reserved cgroup - cgroupConfig.Name = cm.CgroupName(systemReservedCgroup) + cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) return createIfNotExists(cgroupManager, cgroupConfig) } @@ -129,12 +128,12 @@ func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error { // Create kube reserved cgroup cgroupConfig := &cm.CgroupConfig{ - Name: cm.CgroupName(kubeReservedCgroup), + Name: cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup), } if err := cgroupManager.Destroy(cgroupConfig); err != nil { return err } - cgroupConfig.Name = cm.CgroupName(systemReservedCgroup) + cgroupConfig.Name = cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) return cgroupManager.Destroy(cgroupConfig) } @@ -173,8 +172,9 @@ func runTest(f *framework.Framework) error { // Set new config and current config. currentConfig := newCfg - expectedNAPodCgroup := path.Join(currentConfig.CgroupRoot, "kubepods") - if !cgroupManager.Exists(cm.CgroupName(expectedNAPodCgroup)) { + expectedNAPodCgroup := cm.ParseCgroupfsToCgroupName(currentConfig.CgroupRoot) + expectedNAPodCgroup = cm.NewCgroupName(expectedNAPodCgroup, "kubepods") + if !cgroupManager.Exists(expectedNAPodCgroup) { return fmt.Errorf("Expected Node Allocatable Cgroup Does not exist") } // TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings. @@ -218,30 +218,32 @@ func runTest(f *framework.Framework) error { return nil }, time.Minute, 5*time.Second).Should(BeNil()) - if !cgroupManager.Exists(cm.CgroupName(kubeReservedCgroup)) { + kubeReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, kubeReservedCgroup) + if !cgroupManager.Exists(kubeReservedCgroupName) { return fmt.Errorf("Expected kube reserved cgroup Does not exist") } // Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`. kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceCPU)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], kubeReservedCgroup, "cpu.shares"), int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())), 10); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupManager.Name(kubeReservedCgroupName), "cpu.shares"), int64(cm.MilliCPUToShares(kubeReservedCPU.MilliValue())), 10); err != nil { return err } // Expect Memory limit kube reserved cgroup to equal configured value `100Mi`. kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved[string(v1.ResourceMemory)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], kubeReservedCgroup, "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(kubeReservedCgroupName), "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil { return err } - if !cgroupManager.Exists(cm.CgroupName(systemReservedCgroup)) { + systemReservedCgroupName := cm.NewCgroupName(cm.RootCgroupName, systemReservedCgroup) + if !cgroupManager.Exists(systemReservedCgroupName) { return fmt.Errorf("Expected system reserved cgroup Does not exist") } // Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`. systemReservedCPU := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceCPU)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], systemReservedCgroup, "cpu.shares"), int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())), 10); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], cgroupManager.Name(systemReservedCgroupName), "cpu.shares"), int64(cm.MilliCPUToShares(systemReservedCPU.MilliValue())), 10); err != nil { return err } // Expect Memory limit on node allocatable cgroup to equal allocatable. systemReservedMemory := resource.MustParse(currentConfig.SystemReserved[string(v1.ResourceMemory)]) - if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], systemReservedCgroup, "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil { + if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], cgroupManager.Name(systemReservedCgroupName), "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil { return err } return nil diff --git a/test/e2e_node/pods_container_manager_test.go b/test/e2e_node/pods_container_manager_test.go index b54cee5478..c835548beb 100644 --- a/test/e2e_node/pods_container_manager_test.go +++ b/test/e2e_node/pods_container_manager_test.go @@ -17,7 +17,7 @@ limitations under the License. package e2e_node import ( - "path" + "strings" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -61,12 +61,15 @@ const ( ) // makePodToVerifyCgroups returns a pod that verifies the existence of the specified cgroups. -func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod { +func makePodToVerifyCgroups(cgroupNames []string) *v1.Pod { // convert the names to their literal cgroupfs forms... cgroupFsNames := []string{} - for _, cgroupName := range cgroupNames { + rootCgroupName := cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup) + for _, baseName := range cgroupNames { // Add top level cgroup used to enforce node allocatable. - cgroupFsNames = append(cgroupFsNames, toCgroupFsName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))) + cgroupComponents := strings.Split(baseName, "/") + cgroupName := cm.NewCgroupName(rootCgroupName, cgroupComponents...) + cgroupFsNames = append(cgroupFsNames, toCgroupFsName(cgroupName)) } glog.Infof("expecting %v cgroups to be found", cgroupFsNames) // build the pod command to either verify cgroups exist @@ -109,8 +112,10 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod { } // makePodToVerifyCgroupRemoved verfies the specified cgroup does not exist. -func makePodToVerifyCgroupRemoved(cgroupName cm.CgroupName) *v1.Pod { - cgroupFsName := toCgroupFsName(string(cgroupName)) +func makePodToVerifyCgroupRemoved(baseName string) *v1.Pod { + components := strings.Split(baseName, "/") + cgroupName := cm.NewCgroupName(cm.RootCgroupName, components...) + cgroupFsName := toCgroupFsName(cgroupName) pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod" + string(uuid.NewUUID()), @@ -151,7 +156,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { if !framework.TestContext.KubeletConfig.CgroupsPerQOS { return } - cgroupsToVerify := []cm.CgroupName{cm.CgroupName(burstableCgroup), cm.CgroupName(bestEffortCgroup)} + cgroupsToVerify := []string{burstableCgroup, bestEffortCgroup} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -189,7 +194,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { podUID = string(guaranteedPod.UID) }) By("Checking if the pod cgroup was created", func() { - cgroupsToVerify := []cm.CgroupName{cm.CgroupName("pod" + podUID)} + cgroupsToVerify := []string{"pod" + podUID} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -198,7 +203,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { By("Checking if the pod cgroup was deleted", func() { gp := int64(1) Expect(f.PodClient().Delete(guaranteedPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred()) - pod := makePodToVerifyCgroupRemoved(cm.CgroupName("pod" + podUID)) + pod := makePodToVerifyCgroupRemoved("pod" + podUID) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) @@ -233,7 +238,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { podUID = string(bestEffortPod.UID) }) By("Checking if the pod cgroup was created", func() { - cgroupsToVerify := []cm.CgroupName{cm.CgroupName("besteffort/pod" + podUID)} + cgroupsToVerify := []string{"besteffort/pod" + podUID} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -242,7 +247,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { By("Checking if the pod cgroup was deleted", func() { gp := int64(1) Expect(f.PodClient().Delete(bestEffortPod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred()) - pod := makePodToVerifyCgroupRemoved(cm.CgroupName("besteffort/pod" + podUID)) + pod := makePodToVerifyCgroupRemoved("besteffort/pod" + podUID) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) @@ -277,7 +282,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { podUID = string(burstablePod.UID) }) By("Checking if the pod cgroup was created", func() { - cgroupsToVerify := []cm.CgroupName{cm.CgroupName("burstable/pod" + podUID)} + cgroupsToVerify := []string{"burstable/pod" + podUID} pod := makePodToVerifyCgroups(cgroupsToVerify) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) @@ -286,7 +291,7 @@ var _ = framework.KubeDescribe("Kubelet Cgroup Manager", func() { By("Checking if the pod cgroup was deleted", func() { gp := int64(1) Expect(f.PodClient().Delete(burstablePod.Name, &metav1.DeleteOptions{GracePeriodSeconds: &gp})).NotTo(HaveOccurred()) - pod := makePodToVerifyCgroupRemoved(cm.CgroupName("burstable/pod" + podUID)) + pod := makePodToVerifyCgroupRemoved("burstable/pod" + podUID) f.PodClient().Create(pod) err := framework.WaitForPodSuccessInNamespace(f.ClientSet, pod.Name, f.Namespace.Name) Expect(err).NotTo(HaveOccurred()) diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 1faf723379..d86d11d3a4 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -387,18 +387,19 @@ func restartKubelet() { framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) } -func toCgroupFsName(cgroup string) string { +func toCgroupFsName(cgroupName cm.CgroupName) string { if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { - return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true) + return cgroupName.ToSystemd() + } else { + return cgroupName.ToCgroupfs() } - return cgroup } // reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/) // to make the kernel reclaim memory in the allocatable cgroup // the time to reduce pressure may be unbounded, but usually finishes within a second func reduceAllocatableMemoryUsage() { - cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup)) + cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup))) _, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput() framework.ExpectNoError(err) }