2015-05-12 00:04:36 +00:00
|
|
|
/*
|
2016-06-03 00:25:58 +00:00
|
|
|
Copyright 2015 The Kubernetes Authors.
|
2015-05-12 00:04:36 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2015-10-10 00:09:53 +00:00
|
|
|
package cm
|
2015-05-12 00:04:36 +00:00
|
|
|
|
2017-02-09 20:13:28 +00:00
|
|
|
import (
|
2017-09-01 17:46:39 +00:00
|
|
|
"time"
|
|
|
|
|
2017-02-09 20:13:28 +00:00
|
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
2017-02-10 05:14:10 +00:00
|
|
|
// TODO: Migrate kubelet to either use its own internal objects or client library.
|
2017-06-22 17:25:57 +00:00
|
|
|
"k8s.io/api/core/v1"
|
2017-09-01 17:46:39 +00:00
|
|
|
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
|
2018-11-14 03:25:56 +00:00
|
|
|
podresourcesapi "k8s.io/kubernetes/pkg/kubelet/apis/podresources/v1alpha1"
|
2017-10-23 23:18:49 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/config"
|
2017-08-22 22:03:47 +00:00
|
|
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
2017-02-10 05:14:10 +00:00
|
|
|
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
2017-11-02 01:17:48 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
2017-09-01 17:46:39 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/status"
|
2018-01-24 17:06:07 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/util/pluginwatcher"
|
2018-12-08 02:36:11 +00:00
|
|
|
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
2017-02-28 21:03:06 +00:00
|
|
|
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2017-02-09 20:13:28 +00:00
|
|
|
)
|
2015-05-30 00:32:34 +00:00
|
|
|
|
2017-02-21 20:10:45 +00:00
|
|
|
type ActivePodsFunc func() []*v1.Pod
|
|
|
|
|
2015-05-12 00:04:36 +00:00
|
|
|
// Manages the containers running on a machine.
|
2015-10-10 00:09:53 +00:00
|
|
|
type ContainerManager interface {
|
2015-05-12 00:04:36 +00:00
|
|
|
// Runs the container manager's housekeeping.
|
|
|
|
// - Ensures that the Docker daemon is in a container.
|
2015-05-19 22:52:12 +00:00
|
|
|
// - Creates the system container where all non-containerized processes run.
|
2017-10-23 23:18:49 +00:00
|
|
|
Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error
|
2015-05-30 00:32:34 +00:00
|
|
|
|
2017-09-19 03:15:10 +00:00
|
|
|
// SystemCgroupsLimit returns resources allocated to system cgroups in the machine.
|
2016-02-10 00:58:44 +00:00
|
|
|
// These cgroups include the system and Kubernetes services.
|
2016-11-18 20:50:58 +00:00
|
|
|
SystemCgroupsLimit() v1.ResourceList
|
2016-02-05 01:49:17 +00:00
|
|
|
|
2017-09-19 03:15:10 +00:00
|
|
|
// GetNodeConfig returns a NodeConfig that is being used by the container manager.
|
2016-02-05 01:49:17 +00:00
|
|
|
GetNodeConfig() NodeConfig
|
2016-03-04 00:37:09 +00:00
|
|
|
|
2017-09-19 03:15:10 +00:00
|
|
|
// Status returns internal Status.
|
2016-03-04 00:37:09 +00:00
|
|
|
Status() Status
|
2016-07-13 04:39:22 +00:00
|
|
|
|
|
|
|
// NewPodContainerManager is a factory method which returns a podContainerManager object
|
|
|
|
// Returns a noop implementation if qos cgroup hierarchy is not enabled
|
|
|
|
NewPodContainerManager() PodContainerManager
|
|
|
|
|
2017-05-26 02:53:09 +00:00
|
|
|
// GetMountedSubsystems returns the mounted cgroup subsystems on the node
|
2016-07-13 04:39:22 +00:00
|
|
|
GetMountedSubsystems() *CgroupSubsystems
|
|
|
|
|
|
|
|
// GetQOSContainersInfo returns the names of top level QoS containers
|
|
|
|
GetQOSContainersInfo() QOSContainersInfo
|
2017-02-10 05:14:10 +00:00
|
|
|
|
2017-09-19 03:15:10 +00:00
|
|
|
// GetNodeAllocatableReservation returns the amount of compute resources that have to be reserved from scheduling.
|
2017-02-10 05:14:10 +00:00
|
|
|
GetNodeAllocatableReservation() v1.ResourceList
|
2017-02-21 20:10:45 +00:00
|
|
|
|
2017-06-26 19:49:00 +00:00
|
|
|
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
|
|
|
|
GetCapacity() v1.ResourceList
|
|
|
|
|
2017-12-16 06:38:46 +00:00
|
|
|
// GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources),
|
|
|
|
// node allocatable (amount of total healthy resources reported by device plugin),
|
2017-10-23 23:18:49 +00:00
|
|
|
// and inactive device plugin resources previously registered on the node.
|
2017-12-16 06:38:46 +00:00
|
|
|
GetDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string)
|
2017-10-23 23:18:49 +00:00
|
|
|
|
2017-02-21 20:10:45 +00:00
|
|
|
// UpdateQOSCgroups performs housekeeping updates to ensure that the top
|
|
|
|
// level QoS containers have their desired state in a thread-safe way
|
|
|
|
UpdateQOSCgroups() error
|
2017-08-08 23:34:13 +00:00
|
|
|
|
2017-09-19 03:15:10 +00:00
|
|
|
// GetResources returns RunContainerOptions with devices, mounts, and env fields populated for
|
2017-08-22 22:03:47 +00:00
|
|
|
// extended resources required by container.
|
2017-11-02 01:17:48 +00:00
|
|
|
GetResources(pod *v1.Pod, container *v1.Container) (*kubecontainer.RunContainerOptions, error)
|
|
|
|
|
|
|
|
// UpdatePluginResources calls Allocate of device plugin handler for potential
|
|
|
|
// requests for device plugin resources, and returns an error if fails.
|
|
|
|
// Otherwise, it updates allocatableResource in nodeInfo if necessary,
|
|
|
|
// to make sure it is at least equal to the pod's requested capacity for
|
|
|
|
// any registered device plugin resource
|
2018-12-08 02:36:11 +00:00
|
|
|
UpdatePluginResources(*schedulernodeinfo.NodeInfo, *lifecycle.PodAdmitAttributes) error
|
2017-09-01 17:46:39 +00:00
|
|
|
|
|
|
|
InternalContainerLifecycle() InternalContainerLifecycle
|
2018-02-16 22:53:44 +00:00
|
|
|
|
|
|
|
// GetPodCgroupRoot returns the cgroup which contains all pods.
|
|
|
|
GetPodCgroupRoot() string
|
2018-08-11 23:59:39 +00:00
|
|
|
|
|
|
|
// GetPluginRegistrationHandler returns a plugin registration handler
|
|
|
|
// The pluginwatcher's Handlers allow to have a single module for handling
|
|
|
|
// registration.
|
|
|
|
GetPluginRegistrationHandler() pluginwatcher.PluginHandler
|
2018-11-14 03:25:56 +00:00
|
|
|
|
|
|
|
// GetDevices returns information about the devices assigned to pods and containers
|
|
|
|
GetDevices(podUID, containerName string) []*podresourcesapi.ContainerDevices
|
2015-05-12 00:04:36 +00:00
|
|
|
}
|
2015-10-10 00:09:53 +00:00
|
|
|
|
|
|
|
type NodeConfig struct {
|
2016-11-06 17:20:57 +00:00
|
|
|
RuntimeCgroupsName string
|
|
|
|
SystemCgroupsName string
|
|
|
|
KubeletCgroupsName string
|
|
|
|
ContainerRuntime string
|
|
|
|
CgroupsPerQOS bool
|
|
|
|
CgroupRoot string
|
|
|
|
CgroupDriver string
|
2017-10-17 12:23:39 +00:00
|
|
|
KubeletRootDir string
|
2016-11-06 17:20:57 +00:00
|
|
|
ProtectKernelDefaults bool
|
2017-02-09 20:13:28 +00:00
|
|
|
NodeAllocatableConfig
|
2018-04-13 01:51:08 +00:00
|
|
|
QOSReserved map[v1.ResourceName]int64
|
2017-09-01 17:46:39 +00:00
|
|
|
ExperimentalCPUManagerPolicy string
|
|
|
|
ExperimentalCPUManagerReconcilePeriod time.Duration
|
2018-01-08 17:32:34 +00:00
|
|
|
ExperimentalPodPidsLimit int64
|
2018-03-16 18:45:14 +00:00
|
|
|
EnforceCPULimits bool
|
2018-05-04 15:25:22 +00:00
|
|
|
CPUCFSQuotaPeriod time.Duration
|
2017-02-09 20:13:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type NodeAllocatableConfig struct {
|
|
|
|
KubeReservedCgroupName string
|
|
|
|
SystemReservedCgroupName string
|
|
|
|
EnforceNodeAllocatable sets.String
|
|
|
|
KubeReserved v1.ResourceList
|
|
|
|
SystemReserved v1.ResourceList
|
|
|
|
HardEvictionThresholds []evictionapi.Threshold
|
2015-10-10 00:09:53 +00:00
|
|
|
}
|
2016-03-04 00:37:09 +00:00
|
|
|
|
|
|
|
type Status struct {
|
|
|
|
// Any soft requirements that were unsatisfied.
|
|
|
|
SoftRequirements error
|
|
|
|
}
|
2017-02-10 05:14:10 +00:00
|
|
|
|
2017-12-09 07:14:18 +00:00
|
|
|
// parsePercentage parses the percentage string to numeric value.
|
2017-02-28 21:03:06 +00:00
|
|
|
func parsePercentage(v string) (int64, error) {
|
|
|
|
if !strings.HasSuffix(v, "%") {
|
|
|
|
return 0, fmt.Errorf("percentage expected, got '%s'", v)
|
|
|
|
}
|
|
|
|
percentage, err := strconv.ParseInt(strings.TrimRight(v, "%"), 10, 0)
|
|
|
|
if err != nil {
|
|
|
|
return 0, fmt.Errorf("invalid number in percentage '%s'", v)
|
|
|
|
}
|
|
|
|
if percentage < 0 || percentage > 100 {
|
|
|
|
return 0, fmt.Errorf("percentage must be between 0 and 100")
|
|
|
|
}
|
|
|
|
return percentage, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ParseQOSReserved parses the --qos-reserve-requests option
|
Lift embedded structure out of eviction-related KubeletConfiguration fields
- Changes the following KubeletConfiguration fields from `string` to
`map[string]string`:
- `EvictionHard`
- `EvictionSoft`
- `EvictionSoftGracePeriod`
- `EvictionMinimumReclaim`
- Adds flag parsing shims to maintain Kubelet's public flags API, while
enabling structured input in the file API.
- Also removes `kubeletconfig.ConfigurationMap`, which was an ad-hoc flag
parsing shim living in the kubeletconfig API group, and replaces it
with the `MapStringString` shim introduced in this PR. Flag parsing
shims belong in a common place, not in the kubeletconfig API.
I manually audited these to ensure that this wouldn't cause errors
parsing the command line for syntax that would have previously been
error free (`kubeletconfig.ConfigurationMap` was unique in that it
allowed keys to be provided on the CLI without values. I believe this was
done in `flags.ConfigurationMap` to facilitate the `--node-labels` flag,
which rightfully accepts value-free keys, and that this shim was then
just copied to `kubeletconfig`). Fortunately, the affected fields
(`ExperimentalQOSReserved`, `SystemReserved`, and `KubeReserved`) expect
non-empty strings in the values of the map, and as a result passing the
empty string is already an error. Thus requiring keys shouldn't break
anyone's scripts.
- Updates code and tests accordingly.
Regarding eviction operators, directionality is already implicit in the
signal type (for a given signal, the decision to evict will be made when
crossing the threshold from either above or below, never both). There is
no need to expose an operator, such as `<`, in the API. By changing
`EvictionHard` and `EvictionSoft` to `map[string]string`, this PR
simplifies the experience of working with these fields via the
`KubeletConfiguration` type. Again, flags stay the same.
Other things:
- There is another flag parsing shim, `flags.ConfigurationMap`, from the
shared flag utility. The `NodeLabels` field still uses
`flags.ConfigurationMap`. This PR moves the allocation of the
`map[string]string` for the `NodeLabels` field from
`AddKubeletConfigFlags` to the defaulter for the external
`KubeletConfiguration` type. Flags are layered on top of an internal
object that has undergone conversion from a defaulted external object,
which means that previously the mere registration of flags would have
overwritten any previously-defined defaults for `NodeLabels` (fortunately
there were none).
2017-10-19 22:42:07 +00:00
|
|
|
func ParseQOSReserved(m map[string]string) (*map[v1.ResourceName]int64, error) {
|
2017-02-28 21:03:06 +00:00
|
|
|
reservations := make(map[v1.ResourceName]int64)
|
|
|
|
for k, v := range m {
|
|
|
|
switch v1.ResourceName(k) {
|
|
|
|
// Only memory resources are supported.
|
|
|
|
case v1.ResourceMemory:
|
|
|
|
q, err := parsePercentage(v)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
reservations[v1.ResourceName(k)] = q
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("cannot reserve %q resource", k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &reservations, nil
|
|
|
|
}
|