add support for node allocatable phase 2 to kubelet

Signed-off-by: Vishnu Kannan <vishnuk@google.com>
pull/6/head
Vishnu Kannan 2017-02-09 21:14:10 -08:00 committed by Vishnu kannan
parent 70e340b045
commit cc5f5474d5
37 changed files with 1577 additions and 730 deletions

View File

@ -18,7 +18,6 @@ go_test(
tags = ["automanaged"],
deps = [
"//pkg/apis/componentconfig:go_default_library",
"//pkg/kubelet:go_default_library",
"//vendor:k8s.io/apimachinery/pkg/util/diff",
"//vendor:k8s.io/client-go/rest",
],
@ -56,6 +55,8 @@ go_library(
"//pkg/kubelet/config:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/dockertools:go_default_library",
"//pkg/kubelet/eviction:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/network:go_default_library",
"//pkg/kubelet/network/cni:go_default_library",
"//pkg/kubelet/network/kubenet:go_default_library",
@ -98,10 +99,12 @@ go_library(
"//vendor:github.com/spf13/cobra",
"//vendor:github.com/spf13/pflag",
"//vendor:golang.org/x/exp/inotify",
"//vendor:k8s.io/apimachinery/pkg/api/resource",
"//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
"//vendor:k8s.io/apimachinery/pkg/runtime",
"//vendor:k8s.io/apimachinery/pkg/types",
"//vendor:k8s.io/apimachinery/pkg/util/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
"//vendor:k8s.io/apiserver/pkg/authentication/authenticator",
"//vendor:k8s.io/apiserver/pkg/authentication/authenticatorfactory",

View File

@ -225,7 +225,6 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.Float64Var(&s.ChaosChance, "chaos-chance", s.ChaosChance, "If > 0.0, introduce random client errors and latency. Intended for testing. [default=0.0]")
fs.BoolVar(&s.Containerized, "containerized", s.Containerized, "Experimental support for running kubelet in a container. Intended for testing. [default=false]")
fs.Int64Var(&s.MaxOpenFiles, "max-open-files", s.MaxOpenFiles, "Number of files that can be opened by Kubelet process. [default=1000000]")
fs.BoolVar(&s.RegisterSchedulable, "register-schedulable", s.RegisterSchedulable, "Register the node as schedulable. Won't have any effect if register-node is false. [default=true]")
fs.MarkDeprecated("register-schedulable", "will be removed in a future version")
fs.Var(utiltaints.NewTaintsVar(&s.RegisterWithTaints), "register-with-taints", "Register the node with the given list of taints (comma seperated \"<key>=<value>:<effect>\"). No-op if register-node is false.")
@ -267,9 +266,8 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
// Node Allocatable Flags
fs.Var(&s.SystemReserved, "system-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.Var(&s.KubeReserved, "kube-reserved", "A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://kubernetes.io/docs/user-guide/compute-resources for more detail. [default=none]")
fs.StringSliceVar(&s.EnforceNodeAllocatable, "enforce-node-allocatable", s.EnforceNodeAllocatable, "A comma separated list of levels of node allocatable enforcement to be enforced by kubelet. Acceptible options are 'pods', 'system-reserved' & 'kube-reserved'. If the latter two options are specified, '--system-reserved-cgroup' & '--kube-reserved-cgroup' must also be set respectively. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default='']")
fs.StringVar(&s.SystemReservedCgroup, "system-reserved-cgroup", s.SystemReservedCgroup, "Absolute name of the top level cgroup that is used to manage non-kubernetes components for which compute resources were reserved via '--system-reserved' flag. Ex. '/system-reserved'. [default='']")
fs.StringVar(&s.KubeReservedCgroup, "kube-reserved-cgroup", s.KubeReservedCgroup, "Absolute name of the top level cgroup that is used to manage kubernetes components for which compute resources were reserved via '--kube-reserved' flag. Ex. '/kube-reserved'. [default='']")
fs.BoolVar(&s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-node-allocatable-ignore-eviction-threshold", s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default=false]")
fs.BoolVar(&s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "experimental-allocatable-ignore-eviction", s.ExperimentalNodeAllocatableIgnoreEvictionThreshold, "When set to 'true', Hard Eviction Thresholds will be ignored while calculating Node Allocatable. See https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md for more details. [default=false]")
}

View File

@ -36,6 +36,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
@ -70,6 +71,8 @@ import (
"k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/eviction"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/server"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/util/configz"
@ -82,12 +85,17 @@ import (
"k8s.io/kubernetes/pkg/version"
)
const (
// Kubelet component name
componentKubelet = "kubelet"
)
// NewKubeletCommand creates a *cobra.Command object with default parameters
func NewKubeletCommand() *cobra.Command {
s := options.NewKubeletServer()
s.AddFlags(pflag.CommandLine)
cmd := &cobra.Command{
Use: "kubelet",
Use: componentKubelet,
Long: `The kubelet is the primary "node agent" that runs on each
node. The kubelet works in terms of a PodSpec. A PodSpec is a YAML or JSON object
that describes a pod. The kubelet takes a set of PodSpecs that are provided through
@ -306,6 +314,44 @@ func initConfigz(kc *componentconfig.KubeletConfiguration) (*configz.Config, err
return cz, err
}
// validateConfig validates configuration of Kubelet and returns an error is the input configuration is invalid.
func validateConfig(s *options.KubeletServer) error {
if !s.CgroupsPerQOS && len(s.EnforceNodeAllocatable) > 0 {
return fmt.Errorf("Node Allocatable enforcement is not supported unless Cgroups Per QOS feature is turned on")
}
if s.SystemCgroups != "" && s.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
for _, val := range s.EnforceNodeAllocatable {
switch val {
case cm.NodeAllocatableEnforcementKey:
case cm.SystemReservedEnforcementKey:
case cm.KubeReservedEnforcementKey:
continue
default:
return fmt.Errorf("invalid option %q specified for EnforceNodeAllocatable setting. Valid options are %q, %q or %q", val, cm.NodeAllocatableEnforcementKey, cm.SystemReservedEnforcementKey, cm.KubeReservedEnforcementKey)
}
}
return nil
}
// makeEventRecorder sets up kubeDeps.Recorder if its nil. Its a no-op otherwise.
func makeEventRecorder(s *componentconfig.KubeletConfiguration, kubeDeps *kubelet.KubeletDeps, nodeName types.NodeName) {
if kubeDeps.Recorder != nil {
return
}
eventBroadcaster := record.NewBroadcaster()
kubeDeps.Recorder = eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: componentKubelet, Host: string(nodeName)})
eventBroadcaster.StartLogging(glog.V(3).Infof)
if kubeDeps.EventClient != nil {
glog.V(4).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeDeps.EventClient.Events("")})
} else {
glog.Warning("No api server defined - no events will be sent to API server.")
}
}
func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
// TODO: this should be replaced by a --standalone flag
standaloneMode := (len(s.APIServerList) == 0 && !s.RequireKubeConfig)
@ -363,6 +409,11 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
}
}
// Validate configuration.
if err := validateConfig(s); err != nil {
return err
}
if kubeDeps == nil {
var kubeClient clientset.Interface
var eventClient v1core.EventsGetter
@ -381,11 +432,12 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
}
}
nodeName, err := getNodeName(cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
if s.BootstrapKubeconfig != "" {
nodeName, err := getNodeName(cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
if err := bootstrapClientCert(s.KubeConfig.Value(), s.BootstrapKubeconfig, s.CertDirectory, nodeName); err != nil {
return err
}
@ -429,12 +481,12 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
kubeDeps.EventClient = eventClient
}
if kubeDeps.Auth == nil {
nodeName, err := getNodeName(kubeDeps.Cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
nodeName, err := getNodeName(kubeDeps.Cloud, nodeutil.GetHostname(s.HostnameOverride))
if err != nil {
return err
}
if kubeDeps.Auth == nil {
auth, err := buildAuth(nodeName, kubeDeps.ExternalKubeClient, s.KubeletConfiguration)
if err != nil {
return err
@ -449,14 +501,30 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
}
}
// Setup event recorder if required.
makeEventRecorder(&s.KubeletConfiguration, kubeDeps, nodeName)
if kubeDeps.ContainerManager == nil {
if s.SystemCgroups != "" && s.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
if s.CgroupsPerQOS && s.CgroupRoot == "" {
glog.Infof("--cgroups-per-qos enabled, but --cgroup-root was not specified. defaulting to /")
s.CgroupRoot = "/"
}
kubeReserved, err := parseResourceList(s.KubeReserved)
if err != nil {
return err
}
systemReserved, err := parseResourceList(s.SystemReserved)
if err != nil {
return err
}
var hardEvictionThresholds []evictionapi.Threshold
// If the user requested to ignore eviction thresholds, then do not set valid values for hardEvictionThresholds here.
if !s.ExperimentalNodeAllocatableIgnoreEvictionThreshold {
hardEvictionThresholds, err = eviction.ParseThresholdConfig(s.EvictionHard, "", "", "")
if err != nil {
return err
}
}
kubeDeps.ContainerManager, err = cm.NewContainerManager(
kubeDeps.Mounter,
kubeDeps.CAdvisorInterface,
@ -479,7 +547,8 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
HardEvictionThresholds: hardEvictionThresholds,
},
},
s.ExperimentalFailSwapOn)
s.ExperimentalFailSwapOn,
kubeDeps.Recorder)
if err != nil {
return err
@ -694,16 +763,8 @@ func RunKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *kubelet
if err != nil {
return err
}
eventBroadcaster := record.NewBroadcaster()
kubeDeps.Recorder = eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "kubelet", Host: string(nodeName)})
eventBroadcaster.StartLogging(glog.V(3).Infof)
if kubeDeps.EventClient != nil {
glog.V(4).Infof("Sending events to api server.")
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeDeps.EventClient.Events("")})
} else {
glog.Warning("No api server defined - no events will be sent to API server.")
}
// Setup event recorder if required.
makeEventRecorder(kubeCfg, kubeDeps, nodeName)
// TODO(mtaufen): I moved the validation of these fields here, from UnsecuredKubeletConfig,
// so that I could remove the associated fields from KubeletConfig. I would
@ -837,3 +898,29 @@ func CreateAndInitKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDep
return k, nil
}
// parseResourceList parses the given configuration map into an API
// ResourceList or returns an error.
func parseResourceList(m componentconfig.ConfigurationMap) (v1.ResourceList, error) {
if len(m) == 0 {
return nil, nil
}
rl := make(v1.ResourceList)
for k, v := range m {
switch v1.ResourceName(k) {
// Only CPU and memory resources are supported.
case v1.ResourceCPU, v1.ResourceMemory:
q, err := resource.ParseQuantity(v)
if err != nil {
return nil, err
}
if q.Sign() == -1 {
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
}
rl[v1.ResourceName(k)] = q
default:
return nil, fmt.Errorf("cannot reserve %q resource", k)
}
}
return rl, nil
}

View File

@ -20,7 +20,6 @@ import (
"testing"
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/kubelet"
)
func TestValueOfAllocatableResources(t *testing.T) {
@ -32,13 +31,13 @@ func TestValueOfAllocatableResources(t *testing.T) {
}{
{
kubeReserved: "cpu=200m,memory=-150G",
systemReserved: "cpu=200m,memory=150G",
systemReserved: "cpu=200m,memory=15Ki",
errorExpected: true,
name: "negative quantity value",
},
{
kubeReserved: "cpu=200m,memory=150GG",
systemReserved: "cpu=200m,memory=150G",
kubeReserved: "cpu=200m,memory=150Gi",
systemReserved: "cpu=200m,memory=15Ky",
errorExpected: true,
name: "invalid quantity unit",
},
@ -57,17 +56,15 @@ func TestValueOfAllocatableResources(t *testing.T) {
kubeReservedCM.Set(test.kubeReserved)
systemReservedCM.Set(test.systemReserved)
_, err := kubelet.ParseReservation(kubeReservedCM, systemReservedCM)
if err != nil {
t.Logf("%s: error returned: %v", test.name, err)
}
_, err1 := parseResourceList(kubeReservedCM)
_, err2 := parseResourceList(systemReservedCM)
if test.errorExpected {
if err == nil {
if err1 == nil && err2 == nil {
t.Errorf("%s: error expected", test.name)
}
} else {
if err != nil {
t.Errorf("%s: unexpected error: %v", test.name, err)
if err1 != nil || err2 != nil {
t.Errorf("%s: unexpected error: %v, %v", test.name, err1, err2)
}
}
}

View File

@ -173,6 +173,7 @@ pkg/kubelet/api
pkg/kubelet/container
pkg/kubelet/envvars
pkg/kubelet/eviction
pkg/kubelet/eviction/api
pkg/kubelet/util/csr
pkg/kubelet/util/format
pkg/kubelet/util/ioutils

View File

@ -29,6 +29,7 @@ RUNTIME_CONFIG=${RUNTIME_CONFIG:-""}
KUBELET_AUTHORIZATION_WEBHOOK=${KUBELET_AUTHORIZATION_WEBHOOK:-""}
KUBELET_AUTHENTICATION_WEBHOOK=${KUBELET_AUTHENTICATION_WEBHOOK:-""}
POD_MANIFEST_PATH=${POD_MANIFEST_PATH:-"/var/run/kubernetes/static-pods"}
KUBELET_FLAGS=${KUBELET_FLAGS:-""}
# Name of the network plugin, eg: "kubenet"
NET_PLUGIN=${NET_PLUGIN:-""}
# Place the binaries required by NET_PLUGIN in this directory, eg: "/home/kubernetes/bin".
@ -603,7 +604,8 @@ function start_kubelet {
${net_plugin_args} \
${container_runtime_endpoint_args} \
${image_service_endpoint_args} \
--port="$KUBELET_PORT" >"${KUBELET_LOG}" 2>&1 &
--port="$KUBELET_PORT" \
${KUBELET_FLAGS} >"${KUBELET_LOG}" 2>&1 &
KUBELET_PID=$!
# Quick check that kubelet is running.
if ps -p $KUBELET_PID > /dev/null ; then

View File

@ -14,7 +14,6 @@ cluster/gce/configure-vm.sh: cloud_config: ${CLOUD_CONFIG}
cluster/gce/configure-vm.sh: env-to-grains "feature_gates"
cluster/gce/configure-vm.sh: env-to-grains "runtime_config"
cluster/gce/configure-vm.sh: kubelet_api_servers: '${KUBELET_APISERVER}'
cluster/gce/configure-vm.sh: local -r client_ca_file="/srv/salt-overlay/salt/kubelet/ca.crt"
cluster/gce/container-linux/configure-helper.sh: authorization_mode+=",ABAC"
cluster/gce/container-linux/configure-helper.sh: authorization_mode+=",Webhook"
cluster/gce/container-linux/configure-helper.sh: grep -o "{{ *pillar\.get('storage_backend', '\(.*\)') *}}" | \
@ -40,7 +39,6 @@ cluster/gce/trusty/configure-helper.sh: sed -i -e "s@{{ *pillar\.get('storage
cluster/gce/trusty/configure-helper.sh: sed -i -e "s@{{pillar\['allow_privileged'\]}}@true@g" "${src_file}"
cluster/gce/util.sh: local node_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: context['pillar'] = {'num_nodes': get_node_count()}
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: msg = "Cannot change {0} to {1}".format(service_cidr(),
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: ca_cert_path = layer_options.get('ca_certificate_path')
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: cluster_dns.set_dns_info(53, hookenv.config('dns_domain'), dns_ip)
cluster/juju/layers/kubernetes-master/reactive/kubernetes_master.py: ip = service_cidr().split('/')[0]
@ -171,6 +169,8 @@ test/e2e_node/container_manager_test.go: return fmt.Errorf("expected pid %d's o
test/e2e_node/container_manager_test.go: return fmt.Errorf("failed to get oom_score_adj for %d", pid)
test/e2e_node/container_manager_test.go: return fmt.Errorf("failed to get oom_score_adj for %d: %v", pid, err)
test/e2e_node/container_manager_test.go: procfsPath := path.Join("/proc", strconv.Itoa(pid), "oom_score_adj")
test/e2e_node/node_container_manager_test.go: kubeReservedCgroup = "/kube_reserved"
test/e2e_node/node_container_manager_test.go: systemReservedCgroup = "/system_reserved"
test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)")
test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop")
test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from")

View File

@ -7,9 +7,9 @@ advertised-address
algorithm-provider
all-namespaces
allocate-node-cidrs
allowed-not-ready-nodes
allow-missing-template-keys
allow-privileged
allowed-not-ready-nodes
anonymous-auth
api-advertise-addresses
api-burst
@ -19,6 +19,11 @@ api-prefix
api-rate
api-server-advertise-address
api-server-port
apiserver-arg-overrides
apiserver-count
apiserver-count
api-server-port
api-servers
api-server-service-type
api-servers
api-token
@ -31,10 +36,6 @@ audit-log-maxage
audit-log-maxbackup
audit-log-maxsize
audit-log-path
auth-provider
auth-provider
auth-provider-arg
auth-provider-arg
authentication-kubeconfig
authentication-token-webhook
authentication-token-webhook-cache-ttl
@ -46,6 +47,10 @@ authorization-rbac-super-user
authorization-webhook-cache-authorized-ttl
authorization-webhook-cache-unauthorized-ttl
authorization-webhook-config-file
auth-provider
auth-provider
auth-provider-arg
auth-provider-arg
azure-container-registry-config
babysit-daemons
basic-auth-file
@ -155,13 +160,13 @@ dns-port
dns-provider
dns-provider-config
dns-zone-name
dockercfg-path
docker-email
docker-endpoint
docker-exec-handler
docker-password
docker-server
docker-username
dockercfg-path
driver-port
drop-embedded-fields
dry-run
@ -185,6 +190,7 @@ enable-hostpath-provisioner
enable-server
enable-swagger-ui
enable-taint-manager
enforce-node-allocatable
etcd-address
etcd-cafile
etcd-certfile
@ -245,8 +251,8 @@ federated-kube-context
federation-name
federation-system-namespace
file-check-frequency
file-suffix
file_content_in_loop
file-suffix
flex-volume-plugin-dir
forward-services
framework-name
@ -282,11 +288,11 @@ heapster-service
horizontal-pod-autoscaler-sync-period
host-cluster-context
host-ipc-sources
hostname-override
host-network-sources
host-pid-sources
host-port-endpoints
host-system-namespace
hostname-override
http-check-frequency
http-port
ignore-daemonsets
@ -298,9 +304,9 @@ image-project
image-pull-policy
image-pull-progress-deadline
image-service-endpoint
include-extended-apis
include-extended-apis
included-types-overrides
include-extended-apis
include-extended-apis
initial-sync-timeout
input-base
input-dirs
@ -339,6 +345,8 @@ kops-ssh-key
kops-state
kops-up-timeout
kops-zones
kubeadm-cmd-skip
kubeadm-path
kube-api-burst
kube-api-content-type
kube-api-qps
@ -371,6 +379,11 @@ kubelet-read-only-port
kubelet-root-dir
kubelet-sync-frequency
kubelet-timeout
kube-master
kube-master
kube-master-url
kube-reserved
kube-reserved-cgroup
kubernetes-anywhere-cluster
kubernetes-anywhere-path
kubernetes-anywhere-phase2-provider
@ -404,6 +417,8 @@ master-os-distro
master-service-namespace
max-concurrency
max-connection-bytes-per-sec
maximum-dead-containers
maximum-dead-containers-per-container
max-log-age
max-log-backups
max-log-size
@ -413,8 +428,6 @@ max-outgoing-burst
max-outgoing-qps
max-pods
max-requests-inflight
maximum-dead-containers
maximum-dead-containers-per-container
mesos-authentication-principal
mesos-authentication-provider
mesos-authentication-secret-file
@ -430,23 +443,19 @@ mesos-sandbox-overlay
mesos-user
metrics-path
min-available
min-pr-number
min-request-timeout
min-resync-period
minimum-container-ttl-duration
minimum-image-ttl-duration
minion-max-log-age
minion-max-log-backups
minion-max-log-size
minion-path-override
min-pr-number
min-request-timeout
min-resync-period
namespace-sync-period
network-plugin
network-plugin-dir
network-plugin-mtu
no-headers
no-headers
no-suggestions
no-suggestions
node-cidr-mask-size
node-eviction-rate
node-instance-group
@ -465,7 +474,11 @@ node-schedulable-timeout
node-startup-grace-period
node-status-update-frequency
node-sync-period
no-headers
no-headers
non-masquerade-cidr
no-suggestions
no-suggestions
num-nodes
oidc-ca-file
oidc-client-id
@ -474,7 +487,6 @@ oidc-issuer-url
oidc-username-claim
only-idl
oom-score-adj
out-version
outofdisk-transition-frequency
output-base
output-directory
@ -482,6 +494,7 @@ output-file-base
output-package
output-print-type
output-version
out-version
path-override
pod-cidr
pod-eviction-timeout
@ -506,6 +519,7 @@ proxy-logv
proxy-mode
proxy-port-range
public-address-override
pvclaimbinder-sync-period
pv-recycler-increment-timeout-nfs
pv-recycler-maximum-retry
pv-recycler-minimum-timeout-hostpath
@ -540,8 +554,8 @@ requestheader-client-ca-file
requestheader-extra-headers-prefix
requestheader-group-headers
requestheader-username-headers
require-kubeconfig
required-contexts
require-kubeconfig
resolv-conf
resource
resource-container
@ -624,6 +638,7 @@ sync-frequency
system-cgroups
system-pods-startup-timeout
system-reserved
system-reserved-cgroup
system-validate-mode
target-port
target-ram-mb
@ -637,6 +652,7 @@ tls-private-key-file
tls-sni-cert-key
to-version
token-auth-file
to-version
ttl-keys-prefix
ttl-secs
type-src
@ -652,6 +668,7 @@ use-service-account-credentials
use-kubernetes-version
use-taint-based-evictions
user-whitelist
use-service-account-credentials
verb
verify-only
versioned-clientset-package
@ -668,3 +685,4 @@ windows-line-endings
www-prefix
zone-id
zone-name
experimental-allocatable-ignore-eviction

View File

@ -16,6 +16,7 @@ go_library(
"container_manager_linux.go",
"container_manager_stub.go",
"helpers_linux.go",
"node_container_manager.go",
"pod_container_manager_linux.go",
"pod_container_manager_stub.go",
"types.go",
@ -25,6 +26,8 @@ go_library(
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/cadvisor:go_default_library",
"//pkg/kubelet/cm/util:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/util:go_default_library",
"//pkg/util/mount:go_default_library",
@ -43,6 +46,7 @@ go_library(
"//vendor:k8s.io/apimachinery/pkg/util/runtime",
"//vendor:k8s.io/apimachinery/pkg/util/sets",
"//vendor:k8s.io/apimachinery/pkg/util/wait",
"//vendor:k8s.io/client-go/tools/record",
],
)
@ -52,11 +56,13 @@ go_test(
"cgroup_manager_linux_test.go",
"container_manager_linux_test.go",
"helpers_linux_test.go",
"node_container_manager_test.go",
],
library = ":go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/util/mount:go_default_library",
"//vendor:github.com/stretchr/testify/assert",
"//vendor:github.com/stretchr/testify/require",

View File

@ -18,7 +18,9 @@ package cm
import (
"k8s.io/apimachinery/pkg/util/sets"
// TODO: Migrate kubelet to either use its own internal objects or client library.
"k8s.io/kubernetes/pkg/api/v1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
// Manages the containers running on a machine.
@ -47,6 +49,9 @@ type ContainerManager interface {
// GetQOSContainersInfo returns the names of top level QoS containers
GetQOSContainersInfo() QOSContainersInfo
// GetNodeAllocatable returns the amount of compute resources that have to be reserved from scheduling.
GetNodeAllocatableReservation() v1.ResourceList
}
type NodeConfig struct {
@ -75,3 +80,10 @@ type Status struct {
// Any soft requirements that were unsatisfied.
SoftRequirements error
}
const (
// Uer visible keys for managing node allocatable enforcement on the node.
NodeAllocatableEnforcementKey = "pods"
SystemReservedEnforcementKey = "system-reserved"
KubeReservedEnforcementKey = "kube-reserved"
)

View File

@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
@ -101,10 +102,20 @@ type containerManagerImpl struct {
// External containers being managed.
systemContainers []*systemContainer
qosContainers QOSContainersInfo
periodicTasks []func()
// Tasks that are run periodically
periodicTasks []func()
// holds all the mounted cgroup subsystems
subsystems *CgroupSubsystems
nodeInfo *v1.Node
// Interface for cgroup management
cgroupManager CgroupManager
// Capacity of this node.
capacity v1.ResourceList
// Absolute cgroupfs path to a cgroup that Kubelet needs to place all pods under.
// This path include a top level container for enforcing Node Allocatable.
cgroupRoot string
// Event recorder interface.
recorder record.EventRecorder
}
type features struct {
@ -167,7 +178,7 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
// TODO(vmarmol): Add limits to the system containers.
// Takes the absolute name of the specified containers.
// Empty container name disables use of the specified container.
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) {
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) {
subsystems, err := GetCgroupSubsystems()
if err != nil {
return nil, fmt.Errorf("failed to get mounted cgroup subsystems: %v", err)
@ -204,7 +215,17 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
"This will be a fatal error by default starting in K8s v1.6! " +
"In the meantime, you can opt-in to making this a fatal error by enabling --experimental-fail-swap-on.")
}
var capacity = v1.ResourceList{}
// It is safe to invoke `MachineInfo` on cAdvisor before logically initializing cAdvisor here because
// machine info is computed and cached once as part of cAdvisor object creation.
if info, err := cadvisorInterface.MachineInfo(); err == nil {
capacity = cadvisor.CapacityFromMachineInfo(info)
} else {
return nil, err
}
cgroupRoot := nodeConfig.CgroupRoot
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
// Check if Cgroup-root actually exists on the node
if nodeConfig.CgroupsPerQOS {
// this does default to / when enabled, but this tests against regressions.
@ -216,17 +237,24 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
// of note, we always use the cgroupfs driver when performing this check since
// the input is provided in that format.
// this is important because we do not want any name conversion to occur.
cgroupManager := NewCgroupManager(subsystems, "cgroupfs")
if !cgroupManager.Exists(CgroupName(nodeConfig.CgroupRoot)) {
return nil, fmt.Errorf("invalid configuration: cgroup-root doesn't exist: %v", err)
if !cgroupManager.Exists(CgroupName(cgroupRoot)) {
return nil, fmt.Errorf("invalid configuration: cgroup-root %q doesn't exist: %v", cgroupRoot, err)
}
glog.Infof("container manager verified cgroup-root exists: %v", nodeConfig.CgroupRoot)
glog.Infof("container manager verified user specified cgroup-root exists: %v", cgroupRoot)
// Include the the top level cgroup for enforcing node allocatable into cgroup-root.
// This way, all sub modules can avoid having to understand the concept of node allocatable.
cgroupRoot = path.Join(cgroupRoot, defaultNodeAllocatableCgroupName)
}
glog.Infof("Creating Container Manager object based on Node Config: %+v", nodeConfig)
return &containerManagerImpl{
cadvisorInterface: cadvisorInterface,
mountUtil: mountUtil,
NodeConfig: nodeConfig,
subsystems: subsystems,
cgroupManager: cgroupManager,
capacity: capacity,
cgroupRoot: cgroupRoot,
recorder: recorder,
}, nil
}
@ -239,11 +267,11 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
qosContainersInfo: cm.qosContainers,
nodeInfo: cm.nodeInfo,
subsystems: cm.subsystems,
cgroupManager: NewCgroupManager(cm.subsystems, cm.NodeConfig.CgroupDriver),
cgroupManager: cm.cgroupManager,
}
}
return &podContainerManagerNoop{
cgroupRoot: CgroupName(cm.NodeConfig.CgroupRoot),
cgroupRoot: CgroupName(cm.cgroupRoot),
}
}
@ -373,13 +401,21 @@ func (cm *containerManagerImpl) setupNode() error {
// Setup top level qos containers only if CgroupsPerQOS flag is specified as true
if cm.NodeConfig.CgroupsPerQOS {
qosContainersInfo, err := InitQOS(cm.NodeConfig.CgroupDriver, cm.NodeConfig.CgroupRoot, cm.subsystems)
if err := cm.createNodeAllocatableCgroups(); err != nil {
return err
}
qosContainersInfo, err := InitQOS(cm.NodeConfig.CgroupDriver, cm.cgroupRoot, cm.subsystems)
if err != nil {
return fmt.Errorf("failed to initialise top level QOS containers: %v", err)
}
cm.qosContainers = qosContainersInfo
}
// Enforce Node Allocatable (if required)
if err := cm.enforceNodeAllocatableCgroups(); err != nil {
return err
}
systemContainers := []*systemContainer{}
if cm.ContainerRuntime == "docker" {
dockerVersion := getDockerVersion(cm.cadvisorInterface)
@ -405,11 +441,7 @@ func (cm *containerManagerImpl) setupNode() error {
})
} else if cm.RuntimeCgroupsName != "" {
cont := newSystemCgroups(cm.RuntimeCgroupsName)
var capacity = v1.ResourceList{}
if info, err := cm.cadvisorInterface.MachineInfo(); err == nil {
capacity = cadvisor.CapacityFromMachineInfo(info)
}
memoryLimit := (int64(capacity.Memory().Value() * DockerMemoryLimitThresholdPercent / 100))
memoryLimit := (int64(cm.capacity.Memory().Value() * DockerMemoryLimitThresholdPercent / 100))
if memoryLimit < MinDockerMemoryLimit {
glog.Warningf("Memory limit %d for container %s is too small, reset it to %d", memoryLimit, cm.RuntimeCgroupsName, MinDockerMemoryLimit)
memoryLimit = MinDockerMemoryLimit
@ -544,6 +576,10 @@ func (cm *containerManagerImpl) Start(node *v1.Node) error {
if err := cm.setupNode(); err != nil {
return err
}
// Ensure that node allocatable configuration is valid.
if err := cm.validateNodeAllocatable(); err != nil {
return err
}
// Don't run a background thread if there are no ensureStateFuncs.
hasEnsureStateFuncs := false
for _, cont := range cm.systemContainers {
@ -823,3 +859,7 @@ func getDockerVersion(cadvisor cadvisor.Interface) *utilversion.Version {
}
return dockerVersion
}
func (m *containerManagerImpl) GetCapacity() v1.ResourceList {
return m.capacity
}

View File

@ -50,6 +50,10 @@ func (cm *containerManagerStub) Status() Status {
return Status{}
}
func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList {
return nil
}
func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
return &podContainerManagerStub{}
}

View File

@ -21,6 +21,7 @@ package cm
import (
"fmt"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/util/mount"
@ -55,10 +56,14 @@ func (cm *unsupportedContainerManager) Status() Status {
return Status{}
}
func (cm *unsupportedContainerManager) GetNodeAllocatableReservation() v1.ResourceList {
return nil
}
func (cm *unsupportedContainerManager) NewPodContainerManager() PodContainerManager {
return &unsupportedPodContainerManager{}
}
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool) (ContainerManager, error) {
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) {
return &unsupportedContainerManager{}, nil
}

View File

@ -21,6 +21,7 @@ package cm
import (
"github.com/golang/glog"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
"k8s.io/kubernetes/pkg/util/mount"
@ -37,6 +38,6 @@ func (cm *containerManagerImpl) Start(_ *v1.Node) error {
return nil
}
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool) (ContainerManager, error) {
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig, failSwapOn bool, recorder record.EventRecorder) (ContainerManager, error) {
return &containerManagerImpl{}, nil
}

View File

@ -1,164 +0,0 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
const (
defaultNodeAllocatableCgroupName = "/kubepods"
nodeAllocatableEnforcementKey = "pods"
systemReservedEnforcementKey = "system-reserved"
kubeReservedEnforcementKey = "kube-reserved"
)
func createNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(defaultNodeAllocatableCgroupName),
}
if err := cgroupManager.Create(cgroupConfig); err != nil {
glog.Errorf("Failed to create %q cgroup and apply limits")
return err
}
return nil
}
// Enforce Node Allocatable Cgroup settings.
func enforceNodeAllocatableCgroups(nc NodeAllocatableConfig, nodeAllocatable v1.ResourceList, cgroupManager CgroupManager) error {
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
glog.V(4).Infof("Node Allocatable resources: %+v", nodeAllocatable)
// Create top level cgroups for all pods if necessary.
if nc.EnforceNodeAllocatable.Has(nodeAllocatableEnforcementKey) {
cgroupConfig := &CgroupConfig{
Name: CgroupName(defaultNodeAllocatableCgroupName),
ResourceParameters: getCgroupConfig(nodeAllocatable),
}
glog.V(4).Infof("Updating Node Allocatable cgroup with %d cpu shares and %d bytes of memory", cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
if err := cgroupManager.Update(cgroupConfig); err != nil {
glog.Errorf("Failed to create %q cgroup and apply limits")
return err
}
}
// Now apply kube reserved and system reserved limits if required.
if nc.EnforceNodeAllocatable.Has(systemReservedEnforcementKey) {
glog.V(2).Infof("Enforcing system reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
if err := enforceExistingCgroup(cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
return fmt.Errorf("failed to enforce System Reserved Cgroup Limits: %v", err)
}
}
if nc.EnforceNodeAllocatable.Has(kubeReservedEnforcementKey) {
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
if err := enforceExistingCgroup(cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
return fmt.Errorf("failed to enforce Kube Reserved Cgroup Limits: %v", err)
}
}
return nil
}
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cName),
ResourceParameters: getCgroupConfig(rl),
}
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
if !cgroupManager.Exists(cgroupConfig.Name) {
return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name)
}
if err := cgroupManager.Update(cgroupConfig); err != nil {
return err
}
return nil
}
func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
// TODO(vishh): Set CPU Quota if necessary.
if rl == nil {
return nil
}
var rc ResourceConfig
if q, exists := rl[v1.ResourceMemory]; exists {
// Memory is defined in bytes.
val := q.Value()
rc.Memory = &val
}
if q, exists := rl[v1.ResourceCPU]; exists {
// CPU is defined in milli-cores.
val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val
}
return &rc
}
func (cm *containerManagerImpl) getNodeAllocatableInternal(includeHardEviction bool) v1.ResourceList {
var evictionReservation v1.ResourceList
if includeHardEviction {
evictionReservation = hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
}
result := make(v1.ResourceList)
for k, v := range cm.capacity {
value := *(v.Copy())
if cm.NodeConfig.SystemReserved != nil {
value.Sub(cm.NodeConfig.SystemReserved[k])
}
if cm.NodeConfig.KubeReserved != nil {
value.Sub(cm.NodeConfig.KubeReserved[k])
}
if evictionReservation != nil {
value.Sub(evictionReservation[k])
}
if value.Sign() < 0 {
// Negative Allocatable resources don't make sense.
value.Set(0)
}
result[k] = value
}
return result
}
// GetNodeAllocatable returns amount of compute resource available for pods.
func (cm *containerManagerImpl) GetNodeAllocatable() v1.ResourceList {
return cm.getNodeAllocatableInternal(!cm.NodeConfig.IgnoreHardEvictionThreshold)
}
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
if len(thresholds) == 0 {
return nil
}
ret := v1.ResourceList{}
for _, threshold := range thresholds {
if threshold.Operator != evictionapi.OpLessThan {
continue
}
switch threshold.Signal {
case evictionapi.SignalMemoryAvailable:
memoryCapacity := capacity[v1.ResourceMemory]
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
ret[v1.ResourceMemory] = *value
}
}
return ret
}

View File

@ -0,0 +1,229 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"fmt"
"strings"
"time"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/kubelet/events"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
const (
defaultNodeAllocatableCgroupName = "kubepods"
)
func (cm *containerManagerImpl) createNodeAllocatableCgroups() error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
// The default limits for cpu shares can be very low which can lead to CPU starvation for pods.
ResourceParameters: getCgroupConfig(cm.capacity),
}
if cm.cgroupManager.Exists(cgroupConfig.Name) {
return nil
}
if err := cm.cgroupManager.Create(cgroupConfig); err != nil {
glog.Errorf("Failed to create %q cgroup", cm.cgroupRoot)
return err
}
return nil
}
// Enforce Node Allocatable Cgroup settings.
func (cm *containerManagerImpl) enforceNodeAllocatableCgroups() error {
nc := cm.NodeConfig.NodeAllocatableConfig
// We need to update limits on node allocatable cgroup no matter what because
// default cpu shares on cgroups are low and can cause cpu starvation.
nodeAllocatable := cm.capacity
// Use Node Allocatable limits instead of capacity if the user requested enforcing node allocatable.
if cm.CgroupsPerQOS && nc.EnforceNodeAllocatable.Has(NodeAllocatableEnforcementKey) {
nodeAllocatable = cm.getNodeAllocatableAbsolute()
}
glog.V(4).Infof("Attempting to enforce Node Allocatable with config: %+v", nc)
cgroupConfig := &CgroupConfig{
Name: CgroupName(cm.cgroupRoot),
ResourceParameters: getCgroupConfig(nodeAllocatable),
}
// If Node Allocatable is enforced on a node that has not been drained or is updated on an existing node to a lower value,
// existing memory usage across pods might be higher that current Node Allocatable Memory Limits.
// Pod Evictions are expected to bring down memory usage to below Node Allocatable limits.
// Until evictions happen retry cgroup updates.
// Update limits on non root cgroup-root to be safe since the default limits for CPU can be too low.
if cm.cgroupRoot != "/" {
go func() {
for {
err := cm.cgroupManager.Update(cgroupConfig)
if err == nil {
cm.recorder.Event(cm.nodeInfo, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated Node Allocatable limit across pods")
return
}
message := fmt.Sprintf("Failed to update Node Allocatable Limits %q: %v", cm.cgroupRoot, err)
cm.recorder.Event(cm.nodeInfo, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
time.Sleep(time.Minute)
}
}()
}
// Now apply kube reserved and system reserved limits if required.
if nc.EnforceNodeAllocatable.Has(SystemReservedEnforcementKey) {
glog.V(2).Infof("Enforcing System reserved on cgroup %q with limits: %+v", nc.SystemReservedCgroupName, nc.SystemReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.SystemReservedCgroupName, nc.SystemReserved); err != nil {
message := fmt.Sprintf("Failed to enforce System Reserved Cgroup Limits on %q: %v", nc.SystemReservedCgroupName, err)
cm.recorder.Event(cm.nodeInfo, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
}
cm.recorder.Eventf(cm.nodeInfo, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on system reserved cgroup %v", nc.SystemReservedCgroupName)
}
if nc.EnforceNodeAllocatable.Has(KubeReservedEnforcementKey) {
glog.V(2).Infof("Enforcing kube reserved on cgroup %q with limits: %+v", nc.KubeReservedCgroupName, nc.KubeReserved)
if err := enforceExistingCgroup(cm.cgroupManager, nc.KubeReservedCgroupName, nc.KubeReserved); err != nil {
message := fmt.Sprintf("Failed to enforce Kube Reserved Cgroup Limits on %q: %v", nc.KubeReservedCgroupName, err)
cm.recorder.Event(cm.nodeInfo, v1.EventTypeWarning, events.FailedNodeAllocatableEnforcement, message)
return fmt.Errorf(message)
}
cm.recorder.Eventf(cm.nodeInfo, v1.EventTypeNormal, events.SuccessfulNodeAllocatableEnforcement, "Updated limits on kube reserved cgroup %v", nc.KubeReservedCgroupName)
}
return nil
}
// enforceExistingCgroup updates the limits `rl` on existing cgroup `cName` using `cgroupManager` interface.
func enforceExistingCgroup(cgroupManager CgroupManager, cName string, rl v1.ResourceList) error {
cgroupConfig := &CgroupConfig{
Name: CgroupName(cName),
ResourceParameters: getCgroupConfig(rl),
}
glog.V(4).Infof("Enforcing limits on cgroup %q with %d cpu shares and %d bytes of memory", cName, cgroupConfig.ResourceParameters.CpuShares, cgroupConfig.ResourceParameters.Memory)
if !cgroupManager.Exists(cgroupConfig.Name) {
return fmt.Errorf("%q cgroup does not exist", cgroupConfig.Name)
}
if err := cgroupManager.Update(cgroupConfig); err != nil {
return err
}
return nil
}
// Returns a ResourceConfig object that can be used to create or update cgroups via CgroupManager interface.
func getCgroupConfig(rl v1.ResourceList) *ResourceConfig {
// TODO(vishh): Set CPU Quota if necessary.
if rl == nil {
return nil
}
var rc ResourceConfig
if q, exists := rl[v1.ResourceMemory]; exists {
// Memory is defined in bytes.
val := q.Value()
rc.Memory = &val
}
if q, exists := rl[v1.ResourceCPU]; exists {
// CPU is defined in milli-cores.
val := MilliCPUToShares(q.MilliValue())
rc.CpuShares = &val
}
return &rc
}
// getNodeAllocatableAbsolute returns the absolute value of Node Allocatable which is primarily useful for enforcement.
// Note that not all resources that are available on the node are included in the returned list of resources.
// Returns a ResourceList.
func (cm *containerManagerImpl) getNodeAllocatableAbsolute() v1.ResourceList {
result := make(v1.ResourceList)
for k, v := range cm.capacity {
value := *(v.Copy())
if cm.NodeConfig.SystemReserved != nil {
value.Sub(cm.NodeConfig.SystemReserved[k])
}
if cm.NodeConfig.KubeReserved != nil {
value.Sub(cm.NodeConfig.KubeReserved[k])
}
if value.Sign() < 0 {
// Negative Allocatable resources don't make sense.
value.Set(0)
}
result[k] = value
}
return result
}
// GetNodeAllocatable returns amount of compute resource that have to be reserved on this node from scheduling.
func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList {
evictionReservation := hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
result := make(v1.ResourceList)
for k := range cm.capacity {
value := resource.NewQuantity(0, resource.DecimalSI)
if cm.NodeConfig.SystemReserved != nil {
value.Add(cm.NodeConfig.SystemReserved[k])
}
if cm.NodeConfig.KubeReserved != nil {
value.Add(cm.NodeConfig.KubeReserved[k])
}
if evictionReservation != nil {
value.Add(evictionReservation[k])
}
if !value.IsZero() {
result[k] = *value
}
}
return result
}
// hardEvictionReservation returns a resourcelist that includes reservation of resources based on hard eviction thresholds.
func hardEvictionReservation(thresholds []evictionapi.Threshold, capacity v1.ResourceList) v1.ResourceList {
if len(thresholds) == 0 {
return nil
}
ret := v1.ResourceList{}
for _, threshold := range thresholds {
if threshold.Operator != evictionapi.OpLessThan {
continue
}
switch threshold.Signal {
case evictionapi.SignalMemoryAvailable:
memoryCapacity := capacity[v1.ResourceMemory]
value := evictionapi.GetThresholdQuantity(threshold.Value, &memoryCapacity)
ret[v1.ResourceMemory] = *value
}
}
return ret
}
// validateNodeAllocatable ensures that the user specified Node Allocatable Configuration doesn't reserve more than the node capacity.
// Returns error if the configuration is invalid, nil otherwise.
func (cm *containerManagerImpl) validateNodeAllocatable() error {
na := cm.GetNodeAllocatableReservation()
zeroValue := resource.MustParse("0")
var errors []string
for key, val := range na {
if val.Cmp(zeroValue) <= 0 {
errors = append(errors, fmt.Sprintf("Resource %q has an allocatable of %v", key, val))
}
}
if len(errors) > 0 {
return fmt.Errorf("Invalid Node Allocatable configuration. %s", strings.Join(errors, " "))
}
return nil
}

View File

@ -0,0 +1,305 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"testing"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
func TestNodeAllocatableReservationForScheduling(t *testing.T) {
memoryEvictionThreshold := resource.MustParse("100Mi")
testCases := []struct {
kubeReserved v1.ResourceList
systemReserved v1.ResourceList
expected v1.ResourceList
capacity v1.ResourceList
hardThreshold evictionapi.ThresholdValue
}{
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("150m", "150Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &memoryEvictionThreshold,
},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("150m", "250Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
hardThreshold: evictionapi.ThresholdValue{
Percentage: 0.05,
},
expected: getResourceList("150m", "694157320"),
},
{
kubeReserved: v1.ResourceList{},
systemReserved: v1.ResourceList{},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("", ""),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("50m", "150Mi"),
},
{
kubeReserved: getResourceList("50m", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("50m", "150Mi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", ""),
expected: getResourceList("", "150Mi"),
},
}
for idx, tc := range testCases {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: tc.kubeReserved,
SystemReserved: tc.systemReserved,
HardEvictionThresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: tc.hardThreshold,
},
},
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: tc.capacity,
}
for k, v := range cm.GetNodeAllocatableReservation() {
expected, exists := tc.expected[k]
assert.True(t, exists, "test case %d expected resource %q", idx+1, k)
assert.Equal(t, expected.MilliValue(), v.MilliValue(), "test case %d failed for resource %q", idx+1, k)
}
}
}
func TestNodeAllocatableWithNilHardThreshold(t *testing.T) {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: getResourceList("100m", "100Mi"),
SystemReserved: getResourceList("50m", "50Mi"),
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: getResourceList("10", "10Gi"),
}
expected := getResourceList("150m", "150Mi")
for k, v := range cm.GetNodeAllocatableReservation() {
expected, exists := expected[k]
assert.True(t, exists)
assert.Equal(t, expected.MilliValue(), v.MilliValue(), "failed for resource %q", k)
}
}
func TestNodeAllocatableForEnforcement(t *testing.T) {
memoryEvictionThreshold := resource.MustParse("100Mi")
testCases := []struct {
kubeReserved v1.ResourceList
systemReserved v1.ResourceList
capacity v1.ResourceList
expected v1.ResourceList
hardThreshold evictionapi.ThresholdValue
}{
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9850m", "10090Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &memoryEvictionThreshold,
},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9850m", "10090Mi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Percentage: 0.05,
},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9850m", "10090Mi"),
},
{
kubeReserved: v1.ResourceList{},
systemReserved: v1.ResourceList{},
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9950m", "10090Mi"),
},
{
kubeReserved: getResourceList("50m", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", "10Gi"),
expected: getResourceList("9950m", "10090Mi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", ""),
expected: getResourceList("10", ""),
},
}
for idx, tc := range testCases {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: tc.kubeReserved,
SystemReserved: tc.systemReserved,
HardEvictionThresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: tc.hardThreshold,
},
},
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: tc.capacity,
}
for k, v := range cm.getNodeAllocatableAbsolute() {
expected, exists := tc.expected[k]
assert.True(t, exists)
assert.Equal(t, expected.MilliValue(), v.MilliValue(), "test case %d failed for resource %q", idx+1, k)
}
}
}
func TestNodeAllocatableInputValidation(t *testing.T) {
memoryEvictionThreshold := resource.MustParse("100Mi")
highMemoryEvictionThreshold := resource.MustParse("2Gi")
testCases := []struct {
kubeReserved v1.ResourceList
systemReserved v1.ResourceList
capacity v1.ResourceList
hardThreshold evictionapi.ThresholdValue
invalidConfiguration bool
}{
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &memoryEvictionThreshold,
},
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("100m", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
hardThreshold: evictionapi.ThresholdValue{
Percentage: 0.05,
},
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: v1.ResourceList{},
systemReserved: v1.ResourceList{},
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("50m", "50Mi"),
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("50m", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", "10Gi"),
},
{
kubeReserved: getResourceList("", "100Mi"),
systemReserved: getResourceList("", "50Mi"),
capacity: getResourceList("10", ""),
},
{
kubeReserved: getResourceList("5", "10Gi"),
systemReserved: getResourceList("5", "10Gi"),
hardThreshold: evictionapi.ThresholdValue{
Quantity: &highMemoryEvictionThreshold,
},
capacity: getResourceList("10", "11Gi"),
invalidConfiguration: true,
},
}
for _, tc := range testCases {
nc := NodeConfig{
NodeAllocatableConfig: NodeAllocatableConfig{
KubeReserved: tc.kubeReserved,
SystemReserved: tc.systemReserved,
HardEvictionThresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: tc.hardThreshold,
},
},
},
}
cm := &containerManagerImpl{
NodeConfig: nc,
capacity: tc.capacity,
}
if err := cm.validateNodeAllocatable(); err != nil && !tc.invalidConfiguration {
t.Logf("Expected valid node allocatable configuration: %v", err)
t.FailNow()
}
}
}

View File

@ -41,28 +41,30 @@ const (
BackOffPullImage = "BackOff"
// kubelet event reason list
NodeReady = "NodeReady"
NodeNotReady = "NodeNotReady"
NodeSchedulable = "NodeSchedulable"
NodeNotSchedulable = "NodeNotSchedulable"
StartingKubelet = "Starting"
KubeletSetupFailed = "KubeletSetupFailed"
FailedDetachVolume = "FailedDetachVolume"
FailedMountVolume = "FailedMount"
FailedUnMountVolume = "FailedUnMount"
SuccessfulDetachVolume = "SuccessfulDetachVolume"
SuccessfulMountVolume = "SuccessfulMountVolume"
SuccessfulUnMountVolume = "SuccessfulUnMountVolume"
HostPortConflict = "HostPortConflict"
NodeSelectorMismatching = "NodeSelectorMismatching"
InsufficientFreeCPU = "InsufficientFreeCPU"
InsufficientFreeMemory = "InsufficientFreeMemory"
OutOfDisk = "OutOfDisk"
HostNetworkNotSupported = "HostNetworkNotSupported"
UndefinedShaper = "NilShaper"
NodeRebooted = "Rebooted"
ContainerGCFailed = "ContainerGCFailed"
ImageGCFailed = "ImageGCFailed"
NodeReady = "NodeReady"
NodeNotReady = "NodeNotReady"
NodeSchedulable = "NodeSchedulable"
NodeNotSchedulable = "NodeNotSchedulable"
StartingKubelet = "Starting"
KubeletSetupFailed = "KubeletSetupFailed"
FailedDetachVolume = "FailedDetachVolume"
FailedMountVolume = "FailedMount"
FailedUnMountVolume = "FailedUnMount"
SuccessfulDetachVolume = "SuccessfulDetachVolume"
SuccessfulMountVolume = "SuccessfulMountVolume"
SuccessfulUnMountVolume = "SuccessfulUnMountVolume"
HostPortConflict = "HostPortConflict"
NodeSelectorMismatching = "NodeSelectorMismatching"
InsufficientFreeCPU = "InsufficientFreeCPU"
InsufficientFreeMemory = "InsufficientFreeMemory"
OutOfDisk = "OutOfDisk"
HostNetworkNotSupported = "HostNetworkNotSupported"
UndefinedShaper = "NilShaper"
NodeRebooted = "Rebooted"
ContainerGCFailed = "ContainerGCFailed"
ImageGCFailed = "ImageGCFailed"
FailedNodeAllocatableEnforcement = "FailedNodeAllocatableEnforcement"
SuccessfulNodeAllocatableEnforcement = "NodeAllocatableEnforced"
// Image manager event reason list
InvalidDiskCapacity = "InvalidDiskCapacity"

View File

@ -33,6 +33,7 @@ go_test(
"//pkg/api:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/quota:go_default_library",
@ -62,6 +63,7 @@ go_library(
"//pkg/features:go_default_library",
"//pkg/kubelet/api/v1alpha1/stats:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/eviction/api:go_default_library",
"//pkg/kubelet/lifecycle:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/qos:go_default_library",
@ -90,6 +92,9 @@ filegroup(
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
srcs = [
":package-srcs",
"//pkg/kubelet/eviction/api:all-srcs",
],
tags = ["automanaged"],
)

View File

@ -0,0 +1,28 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["types.go"],
tags = ["automanaged"],
deps = ["//vendor:k8s.io/apimachinery/pkg/api/resource"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,79 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/apimachinery/pkg/api/resource"
)
// Signal defines a signal that can trigger eviction of pods on a node.
type Signal string
const (
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
SignalMemoryAvailable Signal = "memory.available"
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
)
// ThresholdOperator is the operator used to express a Threshold.
type ThresholdOperator string
const (
// OpLessThan is the operator that expresses a less than operator.
OpLessThan ThresholdOperator = "LessThan"
)
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
type ThresholdValue struct {
// The following fields are exclusive. Only the topmost non-zero field is used.
// Quantity is a quantity associated with the signal that is evaluated against the specified operator.
Quantity *resource.Quantity
// Percentage represents the usage percentage over the total resource that is evaluated against the specified operator.
Percentage float32
}
// Threshold defines a metric for when eviction should occur.
type Threshold struct {
// Signal defines the entity that was measured.
Signal Signal
// Operator represents a relationship of a signal to a value.
Operator ThresholdOperator
// Value is the threshold the resource is evaluated against.
Value ThresholdValue
// GracePeriod represents the amount of time that a threshold must be met before eviction is triggered.
GracePeriod time.Duration
// MinReclaim represents the minimum amount of resource to reclaim if the threshold is met.
MinReclaim *ThresholdValue
}
// GetThresholdQuantity returns the expected quantity value for a thresholdValue
func GetThresholdQuantity(value ThresholdValue, capacity *resource.Quantity) *resource.Quantity {
if value.Quantity != nil {
return value.Quantity.Copy()
}
return resource.NewQuantity(int64(float64(capacity.Value())*float64(value.Percentage)), resource.BinarySI)
}

View File

@ -33,6 +33,7 @@ import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/qos"
@ -66,7 +67,7 @@ type managerImpl struct {
// records when a threshold was first observed
thresholdsFirstObservedAt thresholdsObservedAt
// records the set of thresholds that have been met (including graceperiod) but not yet resolved
thresholdsMet []Threshold
thresholdsMet []evictionapi.Threshold
// resourceToRankFunc maps a resource to ranking function for that resource.
resourceToRankFunc map[v1.ResourceName]rankFunc
// resourceToNodeReclaimFuncs maps a resource to an ordered list of functions that know how to reclaim that resource.
@ -152,12 +153,12 @@ func (m *managerImpl) IsUnderDiskPressure() bool {
return hasNodeCondition(m.nodeConditions, v1.NodeDiskPressure)
}
func startMemoryThresholdNotifier(thresholds []Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
func startMemoryThresholdNotifier(thresholds []evictionapi.Threshold, observations signalObservations, hard bool, handler thresholdNotifierHandlerFunc) error {
for _, threshold := range thresholds {
if threshold.Signal != SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
if threshold.Signal != evictionapi.SignalMemoryAvailable || hard != isHardEvictionThreshold(threshold) {
continue
}
observed, found := observations[SignalMemoryAvailable]
observed, found := observations[evictionapi.SignalMemoryAvailable]
if !found {
continue
}
@ -171,7 +172,7 @@ func startMemoryThresholdNotifier(thresholds []Threshold, observations signalObs
return fmt.Errorf("memory cgroup mount point not found")
}
attribute := "memory.usage_in_bytes"
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
usageThreshold := resource.NewQuantity(observed.capacity.Value(), resource.DecimalSI)
usageThreshold.Sub(*quantity)
description := fmt.Sprintf("<%s available", formatThresholdValue(threshold.Value))

View File

@ -29,6 +29,7 @@ import (
kubeapi "k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@ -180,18 +181,18 @@ func TestMemoryPressure(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,
@ -397,18 +398,18 @@ func TestDiskPressureNodeFs(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,
@ -594,14 +595,14 @@ func TestMinReclaim(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("500Mi"),
},
},
@ -733,14 +734,14 @@ func TestNodeReclaimFuncs(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("500Mi"),
},
},
@ -925,18 +926,18 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Mi"),
},
},
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Mi"),
},
GracePeriod: time.Minute * 2,
@ -1127,18 +1128,18 @@ func TestCriticalPodsAreNotEvicted(t *testing.T) {
config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []Threshold{
Thresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: time.Minute * 2,

View File

@ -29,6 +29,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/quota/evaluator/core"
@ -56,44 +57,44 @@ const (
var (
// signalToNodeCondition maps a signal to the node condition to report if threshold is met.
signalToNodeCondition map[Signal]v1.NodeConditionType
signalToNodeCondition map[evictionapi.Signal]v1.NodeConditionType
// signalToResource maps a Signal to its associated Resource.
signalToResource map[Signal]v1.ResourceName
signalToResource map[evictionapi.Signal]v1.ResourceName
// resourceToSignal maps a Resource to its associated Signal
resourceToSignal map[v1.ResourceName]Signal
resourceToSignal map[v1.ResourceName]evictionapi.Signal
)
func init() {
// map eviction signals to node conditions
signalToNodeCondition = map[Signal]v1.NodeConditionType{}
signalToNodeCondition[SignalMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[SignalImageFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[SignalNodeFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition = map[evictionapi.Signal]v1.NodeConditionType{}
signalToNodeCondition[evictionapi.SignalMemoryAvailable] = v1.NodeMemoryPressure
signalToNodeCondition[evictionapi.SignalImageFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsAvailable] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalImageFsInodesFree] = v1.NodeDiskPressure
signalToNodeCondition[evictionapi.SignalNodeFsInodesFree] = v1.NodeDiskPressure
// map signals to resources (and vice-versa)
signalToResource = map[Signal]v1.ResourceName{}
signalToResource[SignalMemoryAvailable] = v1.ResourceMemory
signalToResource[SignalImageFsAvailable] = resourceImageFs
signalToResource[SignalImageFsInodesFree] = resourceImageFsInodes
signalToResource[SignalNodeFsAvailable] = resourceNodeFs
signalToResource[SignalNodeFsInodesFree] = resourceNodeFsInodes
resourceToSignal = map[v1.ResourceName]Signal{}
signalToResource = map[evictionapi.Signal]v1.ResourceName{}
signalToResource[evictionapi.SignalMemoryAvailable] = v1.ResourceMemory
signalToResource[evictionapi.SignalImageFsAvailable] = resourceImageFs
signalToResource[evictionapi.SignalImageFsInodesFree] = resourceImageFsInodes
signalToResource[evictionapi.SignalNodeFsAvailable] = resourceNodeFs
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceNodeFsInodes
resourceToSignal = map[v1.ResourceName]evictionapi.Signal{}
for key, value := range signalToResource {
resourceToSignal[value] = key
}
}
// validSignal returns true if the signal is supported.
func validSignal(signal Signal) bool {
func validSignal(signal evictionapi.Signal) bool {
_, found := signalToResource[signal]
return found
}
// ParseThresholdConfig parses the flags for thresholds.
func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim string) ([]Threshold, error) {
results := []Threshold{}
func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, evictionMinimumReclaim string) ([]evictionapi.Threshold, error) {
results := []evictionapi.Threshold{}
hardThresholds, err := parseThresholdStatements(evictionHard)
if err != nil {
@ -134,11 +135,11 @@ func ParseThresholdConfig(evictionHard, evictionSoft, evictionSoftGracePeriod, e
}
// parseThresholdStatements parses the input statements into a list of Threshold objects.
func parseThresholdStatements(expr string) ([]Threshold, error) {
func parseThresholdStatements(expr string) ([]evictionapi.Threshold, error) {
if len(expr) == 0 {
return nil, nil
}
results := []Threshold{}
results := []evictionapi.Threshold{}
statements := strings.Split(expr, ",")
signalsFound := sets.NewString()
for _, statement := range statements {
@ -156,12 +157,12 @@ func parseThresholdStatements(expr string) ([]Threshold, error) {
}
// parseThresholdStatement parses a threshold statement.
func parseThresholdStatement(statement string) (Threshold, error) {
tokens2Operator := map[string]ThresholdOperator{
"<": OpLessThan,
func parseThresholdStatement(statement string) (evictionapi.Threshold, error) {
tokens2Operator := map[string]evictionapi.ThresholdOperator{
"<": evictionapi.OpLessThan,
}
var (
operator ThresholdOperator
operator evictionapi.ThresholdOperator
parts []string
)
for token := range tokens2Operator {
@ -173,41 +174,41 @@ func parseThresholdStatement(statement string) (Threshold, error) {
}
}
if len(operator) == 0 || len(parts) != 2 {
return Threshold{}, fmt.Errorf("invalid eviction threshold syntax %v, expected <signal><operator><value>", statement)
return evictionapi.Threshold{}, fmt.Errorf("invalid eviction threshold syntax %v, expected <signal><operator><value>", statement)
}
signal := Signal(parts[0])
signal := evictionapi.Signal(parts[0])
if !validSignal(signal) {
return Threshold{}, fmt.Errorf(unsupportedEvictionSignal, signal)
return evictionapi.Threshold{}, fmt.Errorf(unsupportedEvictionSignal, signal)
}
quantityValue := parts[1]
if strings.HasSuffix(quantityValue, "%") {
percentage, err := parsePercentage(quantityValue)
if err != nil {
return Threshold{}, err
return evictionapi.Threshold{}, err
}
if percentage <= 0 {
return Threshold{}, fmt.Errorf("eviction percentage threshold %v must be positive: %s", signal, quantityValue)
return evictionapi.Threshold{}, fmt.Errorf("eviction percentage threshold %v must be positive: %s", signal, quantityValue)
}
return Threshold{
return evictionapi.Threshold{
Signal: signal,
Operator: operator,
Value: ThresholdValue{
Value: evictionapi.ThresholdValue{
Percentage: percentage,
},
}, nil
}
quantity, err := resource.ParseQuantity(quantityValue)
if err != nil {
return Threshold{}, err
return evictionapi.Threshold{}, err
}
if quantity.Sign() < 0 || quantity.IsZero() {
return Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
return evictionapi.Threshold{}, fmt.Errorf("eviction threshold %v must be positive: %s", signal, &quantity)
}
return Threshold{
return evictionapi.Threshold{
Signal: signal,
Operator: operator,
Value: ThresholdValue{
Value: evictionapi.ThresholdValue{
Quantity: &quantity,
},
}, nil
@ -223,18 +224,18 @@ func parsePercentage(input string) (float32, error) {
}
// parseGracePeriods parses the grace period statements
func parseGracePeriods(expr string) (map[Signal]time.Duration, error) {
func parseGracePeriods(expr string) (map[evictionapi.Signal]time.Duration, error) {
if len(expr) == 0 {
return nil, nil
}
results := map[Signal]time.Duration{}
results := map[evictionapi.Signal]time.Duration{}
statements := strings.Split(expr, ",")
for _, statement := range statements {
parts := strings.Split(statement, "=")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid eviction grace period syntax %v, expected <signal>=<duration>", statement)
}
signal := Signal(parts[0])
signal := evictionapi.Signal(parts[0])
if !validSignal(signal) {
return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
}
@ -257,18 +258,18 @@ func parseGracePeriods(expr string) (map[Signal]time.Duration, error) {
}
// parseMinimumReclaims parses the minimum reclaim statements
func parseMinimumReclaims(expr string) (map[Signal]ThresholdValue, error) {
func parseMinimumReclaims(expr string) (map[evictionapi.Signal]evictionapi.ThresholdValue, error) {
if len(expr) == 0 {
return nil, nil
}
results := map[Signal]ThresholdValue{}
results := map[evictionapi.Signal]evictionapi.ThresholdValue{}
statements := strings.Split(expr, ",")
for _, statement := range statements {
parts := strings.Split(statement, "=")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid eviction minimum reclaim syntax: %v, expected <signal>=<value>", statement)
}
signal := Signal(parts[0])
signal := evictionapi.Signal(parts[0])
if !validSignal(signal) {
return nil, fmt.Errorf(unsupportedEvictionSignal, signal)
}
@ -286,7 +287,7 @@ func parseMinimumReclaims(expr string) (map[Signal]ThresholdValue, error) {
if _, found := results[signal]; found {
return nil, fmt.Errorf("duplicate eviction minimum reclaim specified for %v", signal)
}
results[signal] = ThresholdValue{
results[signal] = evictionapi.ThresholdValue{
Percentage: percentage,
}
continue
@ -302,7 +303,7 @@ func parseMinimumReclaims(expr string) (map[Signal]ThresholdValue, error) {
if err != nil {
return nil, err
}
results[signal] = ThresholdValue{
results[signal] = evictionapi.ThresholdValue{
Quantity: &quantity,
}
}
@ -402,12 +403,12 @@ func podMemoryUsage(podStats statsapi.PodStats) (v1.ResourceList, error) {
}
// formatThreshold formats a threshold for logging.
func formatThreshold(threshold Threshold) string {
return fmt.Sprintf("threshold(signal=%v, operator=%v, value=%v, gracePeriod=%v)", threshold.Signal, formatThresholdValue(threshold.Value), threshold.Operator, threshold.GracePeriod)
func formatThreshold(threshold evictionapi.Threshold) string {
return fmt.Sprintf("threshold(signal=%v, operator=%v, value=%v, gracePeriod=%v)", threshold.Signal, threshold.Operator, evictionapi.ThresholdValue(threshold.Value), threshold.GracePeriod)
}
// formatThresholdValue formats a thresholdValue for logging.
func formatThresholdValue(value ThresholdValue) string {
// formatevictionapi.ThresholdValue formats a thresholdValue for logging.
func formatThresholdValue(value evictionapi.ThresholdValue) string {
if value.Quantity != nil {
return value.Quantity.String()
}
@ -622,7 +623,7 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
result := signalObservations{}
if memory := summary.Node.Memory; memory != nil && memory.AvailableBytes != nil && memory.WorkingSetBytes != nil {
result[SignalMemoryAvailable] = signalObservation{
result[evictionapi.SignalMemoryAvailable] = signalObservation{
available: resource.NewQuantity(int64(*memory.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*memory.AvailableBytes+*memory.WorkingSetBytes), resource.BinarySI),
time: memory.Time,
@ -630,14 +631,14 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
}
if nodeFs := summary.Node.Fs; nodeFs != nil {
if nodeFs.AvailableBytes != nil && nodeFs.CapacityBytes != nil {
result[SignalNodeFsAvailable] = signalObservation{
result[evictionapi.SignalNodeFsAvailable] = signalObservation{
available: resource.NewQuantity(int64(*nodeFs.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*nodeFs.CapacityBytes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
}
}
if nodeFs.InodesFree != nil && nodeFs.Inodes != nil {
result[SignalNodeFsInodesFree] = signalObservation{
result[evictionapi.SignalNodeFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*nodeFs.InodesFree), resource.BinarySI),
capacity: resource.NewQuantity(int64(*nodeFs.Inodes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
@ -647,13 +648,13 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
if summary.Node.Runtime != nil {
if imageFs := summary.Node.Runtime.ImageFs; imageFs != nil {
if imageFs.AvailableBytes != nil && imageFs.CapacityBytes != nil {
result[SignalImageFsAvailable] = signalObservation{
result[evictionapi.SignalImageFsAvailable] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.AvailableBytes), resource.BinarySI),
capacity: resource.NewQuantity(int64(*imageFs.CapacityBytes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
}
if imageFs.InodesFree != nil && imageFs.Inodes != nil {
result[SignalImageFsInodesFree] = signalObservation{
result[evictionapi.SignalImageFsInodesFree] = signalObservation{
available: resource.NewQuantity(int64(*imageFs.InodesFree), resource.BinarySI),
capacity: resource.NewQuantity(int64(*imageFs.Inodes), resource.BinarySI),
// TODO: add timestamp to stat (see memory stat)
@ -666,8 +667,8 @@ func makeSignalObservations(summaryProvider stats.SummaryProvider) (signalObserv
}
// thresholdsMet returns the set of thresholds that were met independent of grace period
func thresholdsMet(thresholds []Threshold, observations signalObservations, enforceMinReclaim bool) []Threshold {
results := []Threshold{}
func thresholdsMet(thresholds []evictionapi.Threshold, observations signalObservations, enforceMinReclaim bool) []evictionapi.Threshold {
results := []evictionapi.Threshold{}
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
@ -677,14 +678,14 @@ func thresholdsMet(thresholds []Threshold, observations signalObservations, enfo
}
// determine if we have met the specified threshold
thresholdMet := false
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
// if enforceMinReclaim is specified, we compare relative to value - minreclaim
if enforceMinReclaim && threshold.MinReclaim != nil {
quantity.Add(*getThresholdQuantity(*threshold.MinReclaim, observed.capacity))
quantity.Add(*evictionapi.GetThresholdQuantity(*threshold.MinReclaim, observed.capacity))
}
thresholdResult := quantity.Cmp(*observed.available)
switch threshold.Operator {
case OpLessThan:
case evictionapi.OpLessThan:
thresholdMet = thresholdResult > 0
}
if thresholdMet {
@ -704,12 +705,12 @@ func debugLogObservations(logPrefix string, observations signalObservations) {
}
}
func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold, observations signalObservations) {
func debugLogThresholdsWithObservation(logPrefix string, thresholds []evictionapi.Threshold, observations signalObservations) {
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
if found {
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
quantity := evictionapi.GetThresholdQuantity(threshold.Value, observed.capacity)
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available)
} else {
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal)
@ -717,8 +718,8 @@ func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold,
}
}
func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservations signalObservations) []Threshold {
results := []Threshold{}
func thresholdsUpdatedStats(thresholds []evictionapi.Threshold, observations, lastObservations signalObservations) []evictionapi.Threshold {
results := []evictionapi.Threshold{}
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
@ -734,16 +735,8 @@ func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservatio
return results
}
// getThresholdQuantity returns the expected quantity value for a thresholdValue
func getThresholdQuantity(value ThresholdValue, capacity *resource.Quantity) *resource.Quantity {
if value.Quantity != nil {
return value.Quantity.Copy()
}
return resource.NewQuantity(int64(float64(capacity.Value())*float64(value.Percentage)), resource.BinarySI)
}
// thresholdsFirstObservedAt merges the input set of thresholds with the previous observation to determine when active set of thresholds were initially met.
func thresholdsFirstObservedAt(thresholds []Threshold, lastObservedAt thresholdsObservedAt, now time.Time) thresholdsObservedAt {
func thresholdsFirstObservedAt(thresholds []evictionapi.Threshold, lastObservedAt thresholdsObservedAt, now time.Time) thresholdsObservedAt {
results := thresholdsObservedAt{}
for i := range thresholds {
observedAt, found := lastObservedAt[thresholds[i]]
@ -756,8 +749,8 @@ func thresholdsFirstObservedAt(thresholds []Threshold, lastObservedAt thresholds
}
// thresholdsMetGracePeriod returns the set of thresholds that have satisfied associated grace period
func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []Threshold {
results := []Threshold{}
func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []evictionapi.Threshold {
results := []evictionapi.Threshold{}
for threshold, at := range observedAt {
duration := now.Sub(at)
if duration < threshold.GracePeriod {
@ -770,7 +763,7 @@ func thresholdsMetGracePeriod(observedAt thresholdsObservedAt, now time.Time) []
}
// nodeConditions returns the set of node conditions associated with a threshold
func nodeConditions(thresholds []Threshold) []v1.NodeConditionType {
func nodeConditions(thresholds []evictionapi.Threshold) []v1.NodeConditionType {
results := []v1.NodeConditionType{}
for _, threshold := range thresholds {
if nodeCondition, found := signalToNodeCondition[threshold.Signal]; found {
@ -832,7 +825,7 @@ func hasNodeCondition(inputs []v1.NodeConditionType, item v1.NodeConditionType)
}
// mergeThresholds will merge both threshold lists eliminating duplicates.
func mergeThresholds(inputsA []Threshold, inputsB []Threshold) []Threshold {
func mergeThresholds(inputsA []evictionapi.Threshold, inputsB []evictionapi.Threshold) []evictionapi.Threshold {
results := inputsA
for _, threshold := range inputsB {
if !hasThreshold(results, threshold) {
@ -843,7 +836,7 @@ func mergeThresholds(inputsA []Threshold, inputsB []Threshold) []Threshold {
}
// hasThreshold returns true if the threshold is in the input list
func hasThreshold(inputs []Threshold, item Threshold) bool {
func hasThreshold(inputs []evictionapi.Threshold, item evictionapi.Threshold) bool {
for _, input := range inputs {
if input.GracePeriod == item.GracePeriod && input.Operator == item.Operator && input.Signal == item.Signal && compareThresholdValue(input.Value, item.Value) {
return true
@ -852,8 +845,8 @@ func hasThreshold(inputs []Threshold, item Threshold) bool {
return false
}
// compareThresholdValue returns true if the two thresholdValue objects are logically the same
func compareThresholdValue(a ThresholdValue, b ThresholdValue) bool {
// compareevictionapi.ThresholdValue returns true if the two thresholdValue objects are logically the same
func compareThresholdValue(a evictionapi.ThresholdValue, b evictionapi.ThresholdValue) bool {
if a.Quantity != nil {
if b.Quantity == nil {
return false
@ -867,7 +860,7 @@ func compareThresholdValue(a ThresholdValue, b ThresholdValue) bool {
}
// getStarvedResources returns the set of resources that are starved based on thresholds met.
func getStarvedResources(thresholds []Threshold) []v1.ResourceName {
func getStarvedResources(thresholds []evictionapi.Threshold) []v1.ResourceName {
results := []v1.ResourceName{}
for _, threshold := range thresholds {
if starvedResource, found := signalToResource[threshold.Signal]; found {
@ -878,7 +871,7 @@ func getStarvedResources(thresholds []Threshold) []v1.ResourceName {
}
// isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds
func isSoftEvictionThresholds(thresholds []Threshold, starvedResource v1.ResourceName) bool {
func isSoftEvictionThresholds(thresholds []evictionapi.Threshold, starvedResource v1.ResourceName) bool {
for _, threshold := range thresholds {
if resourceToCheck := signalToResource[threshold.Signal]; resourceToCheck != starvedResource {
continue
@ -891,7 +884,7 @@ func isSoftEvictionThresholds(thresholds []Threshold, starvedResource v1.Resourc
}
// isSoftEviction returns true if the thresholds met for the starved resource are only soft thresholds
func isHardEvictionThreshold(threshold Threshold) bool {
func isHardEvictionThreshold(threshold evictionapi.Threshold) bool {
return threshold.GracePeriod == time.Duration(0)
}

View File

@ -28,6 +28,7 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
"k8s.io/kubernetes/pkg/quota"
)
@ -44,7 +45,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod string
evictionMinReclaim string
expectErr bool
expectThresholds []Threshold
expectThresholds []evictionapi.Threshold
}{
"no values": {
evictionHard: "",
@ -52,7 +53,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: false,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"all flag values": {
evictionHard: "memory.available<150Mi",
@ -60,25 +61,25 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "memory.available=30s",
evictionMinReclaim: "memory.available=0",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("300Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("0"),
},
},
@ -90,25 +91,25 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "memory.available=30s",
evictionMinReclaim: "memory.available=5%",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.1,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.3,
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
@ -120,46 +121,46 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s",
evictionMinReclaim: "imagefs.available=2Gi,nodefs.available=1Gi",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("100Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("300Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("200Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
@ -171,46 +172,46 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "imagefs.available=30s,nodefs.available=30s",
evictionMinReclaim: "imagefs.available=10%,nodefs.available=5%",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.15,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.1,
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.105,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
{
Signal: SignalImageFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.3,
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.1,
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.205,
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
@ -222,46 +223,46 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "imagefs.inodesFree=30s,nodefs.inodesFree=30s",
evictionMinReclaim: "imagefs.inodesFree=2Gi,nodefs.inodesFree=1Gi",
expectErr: false,
expectThresholds: []Threshold{
expectThresholds: []evictionapi.Threshold{
{
Signal: SignalImageFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("150Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("100Mi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
{
Signal: SignalImageFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalImageFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("300Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
},
{
Signal: SignalNodeFsInodesFree,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsInodesFree,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("200Mi"),
},
GracePeriod: gracePeriod,
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
},
@ -273,7 +274,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"hard-signal-negative": {
evictionHard: "memory.available<-150Mi",
@ -281,7 +282,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"hard-signal-negative-percentage": {
evictionHard: "memory.available<-15%",
@ -289,7 +290,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"soft-signal-negative": {
evictionHard: "",
@ -297,7 +298,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"duplicate-signal": {
evictionHard: "memory.available<150Mi,memory.available<100Mi",
@ -305,7 +306,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"valid-and-invalid-signal": {
evictionHard: "memory.available<150Mi,invalid.foo<150Mi",
@ -313,7 +314,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"soft-no-grace-period": {
evictionHard: "",
@ -321,7 +322,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"soft-neg-grace-period": {
evictionHard: "",
@ -329,7 +330,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "memory.available=-30s",
evictionMinReclaim: "",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"neg-reclaim": {
evictionHard: "",
@ -337,7 +338,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "memory.available=-300Mi",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
"duplicate-reclaim": {
evictionHard: "",
@ -345,7 +346,7 @@ func TestParseThresholdConfig(t *testing.T) {
evictionSoftGracePeriod: "",
evictionMinReclaim: "memory.available=-300Mi,memory.available=-100Mi",
expectErr: true,
expectThresholds: []Threshold{},
expectThresholds: []evictionapi.Threshold{},
},
}
for testName, testCase := range testCases {
@ -359,7 +360,7 @@ func TestParseThresholdConfig(t *testing.T) {
}
}
func thresholdsEqual(expected []Threshold, actual []Threshold) bool {
func thresholdsEqual(expected []evictionapi.Threshold, actual []evictionapi.Threshold) bool {
if len(expected) != len(actual) {
return false
}
@ -388,7 +389,7 @@ func thresholdsEqual(expected []Threshold, actual []Threshold) bool {
return true
}
func thresholdEqual(a Threshold, b Threshold) bool {
func thresholdEqual(a evictionapi.Threshold, b evictionapi.Threshold) bool {
return a.GracePeriod == b.GracePeriod &&
a.Operator == b.Operator &&
a.Signal == b.Signal &&
@ -746,7 +747,7 @@ func TestMakeSignalObservations(t *testing.T) {
if err != nil {
t.Errorf("Unexpected err: %v", err)
}
memQuantity, found := actualObservations[SignalMemoryAvailable]
memQuantity, found := actualObservations[evictionapi.SignalMemoryAvailable]
if !found {
t.Errorf("Expected available memory observation: %v", err)
}
@ -756,7 +757,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expectedBytes := int64(nodeWorkingSetBytes + nodeAvailableBytes); memQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, memQuantity.capacity.Value())
}
nodeFsQuantity, found := actualObservations[SignalNodeFsAvailable]
nodeFsQuantity, found := actualObservations[evictionapi.SignalNodeFsAvailable]
if !found {
t.Errorf("Expected available nodefs observation: %v", err)
}
@ -766,7 +767,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expectedBytes := int64(nodeFsCapacityBytes); nodeFsQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, nodeFsQuantity.capacity.Value())
}
nodeFsInodesQuantity, found := actualObservations[SignalNodeFsInodesFree]
nodeFsInodesQuantity, found := actualObservations[evictionapi.SignalNodeFsInodesFree]
if !found {
t.Errorf("Expected inodes free nodefs observation: %v", err)
}
@ -776,7 +777,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expected := int64(nodeFsInodes); nodeFsInodesQuantity.capacity.Value() != expected {
t.Errorf("Expected %v, actual: %v", expected, nodeFsInodesQuantity.capacity.Value())
}
imageFsQuantity, found := actualObservations[SignalImageFsAvailable]
imageFsQuantity, found := actualObservations[evictionapi.SignalImageFsAvailable]
if !found {
t.Errorf("Expected available imagefs observation: %v", err)
}
@ -786,7 +787,7 @@ func TestMakeSignalObservations(t *testing.T) {
if expectedBytes := int64(imageFsCapacityBytes); imageFsQuantity.capacity.Value() != expectedBytes {
t.Errorf("Expected %v, actual: %v", expectedBytes, imageFsQuantity.capacity.Value())
}
imageFsInodesQuantity, found := actualObservations[SignalImageFsInodesFree]
imageFsInodesQuantity, found := actualObservations[evictionapi.SignalImageFsInodesFree]
if !found {
t.Errorf("Expected inodes free imagefs observation: %v", err)
}
@ -811,67 +812,67 @@ func TestMakeSignalObservations(t *testing.T) {
}
func TestThresholdsMet(t *testing.T) {
hardThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
hardThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Quantity: quantityMustParse("500Mi"),
},
}
testCases := map[string]struct {
enforceMinReclaim bool
thresholds []Threshold
thresholds []evictionapi.Threshold
observations signalObservations
result []Threshold
result []evictionapi.Threshold
}{
"empty": {
enforceMinReclaim: false,
thresholds: []Threshold{},
thresholds: []evictionapi.Threshold{},
observations: signalObservations{},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"threshold-met-memory": {
enforceMinReclaim: false,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("500Mi"),
},
},
result: []Threshold{hardThreshold},
result: []evictionapi.Threshold{hardThreshold},
},
"threshold-not-met": {
enforceMinReclaim: false,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("2Gi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"threshold-met-with-min-reclaim": {
enforceMinReclaim: true,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("1.05Gi"),
},
},
result: []Threshold{hardThreshold},
result: []evictionapi.Threshold{hardThreshold},
},
"threshold-not-met-with-min-reclaim": {
enforceMinReclaim: true,
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("2Gi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
}
for testName, testCase := range testCases {
@ -883,8 +884,8 @@ func TestThresholdsMet(t *testing.T) {
}
func TestThresholdsUpdatedStats(t *testing.T) {
updatedThreshold := Threshold{
Signal: SignalMemoryAvailable,
updatedThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
}
locationUTC, err := time.LoadLocation("UTC")
if err != nil {
@ -892,76 +893,76 @@ func TestThresholdsUpdatedStats(t *testing.T) {
return
}
testCases := map[string]struct {
thresholds []Threshold
thresholds []evictionapi.Threshold
observations signalObservations
last signalObservations
result []Threshold
result []evictionapi.Threshold
}{
"empty": {
thresholds: []Threshold{},
thresholds: []evictionapi.Threshold{},
observations: signalObservations{},
last: signalObservations{},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"no-time": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{},
evictionapi.SignalMemoryAvailable: signalObservation{},
},
last: signalObservations{},
result: []Threshold{updatedThreshold},
result: []evictionapi.Threshold{updatedThreshold},
},
"no-last-observation": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
last: signalObservations{},
result: []Threshold{updatedThreshold},
result: []evictionapi.Threshold{updatedThreshold},
},
"time-machine": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
last: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 1, 0, 0, locationUTC),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"same-observation": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
last: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"new-observation": {
thresholds: []Threshold{updatedThreshold},
thresholds: []evictionapi.Threshold{updatedThreshold},
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 1, 0, 0, locationUTC),
},
},
last: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
time: metav1.Date(2016, 1, 1, 0, 0, 0, 0, locationUTC),
},
},
result: []Threshold{updatedThreshold},
result: []evictionapi.Threshold{updatedThreshold},
},
}
for testName, testCase := range testCases {
@ -973,21 +974,21 @@ func TestThresholdsUpdatedStats(t *testing.T) {
}
func TestPercentageThresholdsMet(t *testing.T) {
specificThresholds := []Threshold{
specificThresholds := []evictionapi.Threshold{
{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.2,
},
MinReclaim: &ThresholdValue{
MinReclaim: &evictionapi.ThresholdValue{
Percentage: 0.05,
},
},
{
Signal: SignalNodeFsAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
Signal: evictionapi.SignalNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Percentage: 0.3,
},
},
@ -995,19 +996,19 @@ func TestPercentageThresholdsMet(t *testing.T) {
testCases := map[string]struct {
enforceMinRelaim bool
thresholds []Threshold
thresholds []evictionapi.Threshold
observations signalObservations
result []Threshold
result []evictionapi.Threshold
}{
"BothMet": {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("100Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("100Gi"),
capacity: quantityMustParse("1000Gi"),
},
@ -1018,68 +1019,68 @@ func TestPercentageThresholdsMet(t *testing.T) {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("300Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("400Gi"),
capacity: quantityMustParse("1000Gi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"DiskMet": {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("300Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("100Gi"),
capacity: quantityMustParse("1000Gi"),
},
},
result: []Threshold{specificThresholds[1]},
result: []evictionapi.Threshold{specificThresholds[1]},
},
"MemoryMet": {
enforceMinRelaim: false,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("100Mi"),
capacity: quantityMustParse("1000Mi"),
},
SignalNodeFsAvailable: signalObservation{
evictionapi.SignalNodeFsAvailable: signalObservation{
available: quantityMustParse("400Gi"),
capacity: quantityMustParse("1000Gi"),
},
},
result: []Threshold{specificThresholds[0]},
result: []evictionapi.Threshold{specificThresholds[0]},
},
"MemoryMetWithMinReclaim": {
enforceMinRelaim: true,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("225Mi"),
capacity: quantityMustParse("1000Mi"),
},
},
result: []Threshold{specificThresholds[0]},
result: []evictionapi.Threshold{specificThresholds[0]},
},
"MemoryNotMetWithMinReclaim": {
enforceMinRelaim: true,
thresholds: specificThresholds,
observations: signalObservations{
SignalMemoryAvailable: signalObservation{
evictionapi.SignalMemoryAvailable: signalObservation{
available: quantityMustParse("300Mi"),
capacity: quantityMustParse("1000Mi"),
},
},
result: []Threshold{},
result: []evictionapi.Threshold{},
},
}
for testName, testCase := range testCases {
@ -1091,29 +1092,29 @@ func TestPercentageThresholdsMet(t *testing.T) {
}
func TestThresholdsFirstObservedAt(t *testing.T) {
hardThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
hardThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
}
now := metav1.Now()
oldTime := metav1.NewTime(now.Time.Add(-1 * time.Minute))
testCases := map[string]struct {
thresholds []Threshold
thresholds []evictionapi.Threshold
lastObservedAt thresholdsObservedAt
now time.Time
result thresholdsObservedAt
}{
"empty": {
thresholds: []Threshold{},
thresholds: []evictionapi.Threshold{},
lastObservedAt: thresholdsObservedAt{},
now: now.Time,
result: thresholdsObservedAt{},
},
"no-previous-observation": {
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
lastObservedAt: thresholdsObservedAt{},
now: now.Time,
result: thresholdsObservedAt{
@ -1121,7 +1122,7 @@ func TestThresholdsFirstObservedAt(t *testing.T) {
},
},
"previous-observation": {
thresholds: []Threshold{hardThreshold},
thresholds: []evictionapi.Threshold{hardThreshold},
lastObservedAt: thresholdsObservedAt{
hardThreshold: oldTime.Time,
},
@ -1141,17 +1142,17 @@ func TestThresholdsFirstObservedAt(t *testing.T) {
func TestThresholdsMetGracePeriod(t *testing.T) {
now := metav1.Now()
hardThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
hardThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Gi"),
},
}
softThreshold := Threshold{
Signal: SignalMemoryAvailable,
Operator: OpLessThan,
Value: ThresholdValue{
softThreshold := evictionapi.Threshold{
Signal: evictionapi.SignalMemoryAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("2Gi"),
},
GracePeriod: 1 * time.Minute,
@ -1160,33 +1161,33 @@ func TestThresholdsMetGracePeriod(t *testing.T) {
testCases := map[string]struct {
observedAt thresholdsObservedAt
now time.Time
result []Threshold
result []evictionapi.Threshold
}{
"empty": {
observedAt: thresholdsObservedAt{},
now: now.Time,
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"hard-threshold-met": {
observedAt: thresholdsObservedAt{
hardThreshold: now.Time,
},
now: now.Time,
result: []Threshold{hardThreshold},
result: []evictionapi.Threshold{hardThreshold},
},
"soft-threshold-not-met": {
observedAt: thresholdsObservedAt{
softThreshold: now.Time,
},
now: now.Time,
result: []Threshold{},
result: []evictionapi.Threshold{},
},
"soft-threshold-met": {
observedAt: thresholdsObservedAt{
softThreshold: oldTime.Time,
},
now: now.Time,
result: []Threshold{softThreshold},
result: []evictionapi.Threshold{softThreshold},
},
}
for testName, testCase := range testCases {
@ -1199,16 +1200,16 @@ func TestThresholdsMetGracePeriod(t *testing.T) {
func TestNodeConditions(t *testing.T) {
testCases := map[string]struct {
inputs []Threshold
inputs []evictionapi.Threshold
result []v1.NodeConditionType
}{
"empty-list": {
inputs: []Threshold{},
inputs: []evictionapi.Threshold{},
result: []v1.NodeConditionType{},
},
"memory.available": {
inputs: []Threshold{
{Signal: SignalMemoryAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalMemoryAvailable},
},
result: []v1.NodeConditionType{v1.NodeMemoryPressure},
},
@ -1327,24 +1328,24 @@ func TestHasNodeConditions(t *testing.T) {
func TestGetStarvedResources(t *testing.T) {
testCases := map[string]struct {
inputs []Threshold
inputs []evictionapi.Threshold
result []v1.ResourceName
}{
"memory.available": {
inputs: []Threshold{
{Signal: SignalMemoryAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalMemoryAvailable},
},
result: []v1.ResourceName{v1.ResourceMemory},
},
"imagefs.available": {
inputs: []Threshold{
{Signal: SignalImageFsAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalImageFsAvailable},
},
result: []v1.ResourceName{resourceImageFs},
},
"nodefs.available": {
inputs: []Threshold{
{Signal: SignalNodeFsAvailable},
inputs: []evictionapi.Threshold{
{Signal: evictionapi.SignalNodeFsAvailable},
},
result: []v1.ResourceName{resourceNodeFs},
},
@ -1397,50 +1398,50 @@ func testParsePercentage(t *testing.T) {
func testCompareThresholdValue(t *testing.T) {
testCases := []struct {
a, b ThresholdValue
a, b evictionapi.ThresholdValue
equal bool
}{
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
equal: true,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(456, resource.BinarySI),
},
equal: false,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Quantity: resource.NewQuantity(123, resource.BinarySI),
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Percentage: 0.1,
},
equal: false,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Percentage: 0.1,
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Percentage: 0.1,
},
equal: true,
},
{
a: ThresholdValue{
a: evictionapi.ThresholdValue{
Percentage: 0.2,
},
b: ThresholdValue{
b: evictionapi.ThresholdValue{
Percentage: 0.1,
},
equal: false,
@ -1601,7 +1602,7 @@ func (s1 nodeConditionList) Equal(s2 nodeConditionList) bool {
}
// thresholdList is a simple alias to support equality checking independent of order
type thresholdList []Threshold
type thresholdList []evictionapi.Threshold
// Equal adds the ability to check equality between two lists of node conditions.
func (s1 thresholdList) Equal(s2 thresholdList) bool {

View File

@ -23,22 +23,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
statsapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
)
// Signal defines a signal that can trigger eviction of pods on a node.
type Signal string
const (
// SignalMemoryAvailable is memory available (i.e. capacity - workingSet), in bytes.
SignalMemoryAvailable Signal = "memory.available"
// SignalNodeFsAvailable is amount of storage available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsAvailable Signal = "nodefs.available"
// SignalNodeFsInodesFree is amount of inodes available on filesystem that kubelet uses for volumes, daemon logs, etc.
SignalNodeFsInodesFree Signal = "nodefs.inodesFree"
// SignalImageFsAvailable is amount of storage available on filesystem that container runtime uses for storing images and container writable layers.
SignalImageFsAvailable Signal = "imagefs.available"
// SignalImageFsInodesFree is amount of inodes available on filesystem that container runtime uses for storing images and container writeable layers.
SignalImageFsInodesFree Signal = "imagefs.inodesFree"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
)
// fsStatsType defines the types of filesystem stats to collect.
@ -53,14 +38,6 @@ const (
fsStatsRoot fsStatsType = "root"
)
// ThresholdOperator is the operator used to express a Threshold.
type ThresholdOperator string
const (
// OpLessThan is the operator that expresses a less than operator.
OpLessThan ThresholdOperator = "LessThan"
)
// Config holds information about how eviction is configured.
type Config struct {
// PressureTransitionPeriod is duration the kubelet has to wait before transititioning out of a pressure condition.
@ -68,35 +45,11 @@ type Config struct {
// Maximum allowed grace period (in seconds) to use when terminating pods in response to a soft eviction threshold being met.
MaxPodGracePeriodSeconds int64
// Thresholds define the set of conditions monitored to trigger eviction.
Thresholds []Threshold
Thresholds []evictionapi.Threshold
// KernelMemcgNotification if true will integrate with the kernel memcg notification to determine if memory thresholds are crossed.
KernelMemcgNotification bool
}
// ThresholdValue is a value holder that abstracts literal versus percentage based quantity
type ThresholdValue struct {
// The following fields are exclusive. Only the topmost non-zero field is used.
// Quantity is a quantity associated with the signal that is evaluated against the specified operator.
Quantity *resource.Quantity
// Percentage represents the usage percentage over the total resource that is evaluated against the specified operator.
Percentage float32
}
// Threshold defines a metric for when eviction should occur.
type Threshold struct {
// Signal defines the entity that was measured.
Signal Signal
// Operator represents a relationship of a signal to a value.
Operator ThresholdOperator
// Value is the threshold the resource is evaluated against.
Value ThresholdValue
// GracePeriod represents the amount of time that a threshold must be met before eviction is triggered.
GracePeriod time.Duration
// MinReclaim represents the minimum amount of resource to reclaim if the threshold is met.
MinReclaim *ThresholdValue
}
// Manager evaluates when an eviction threshold for node stability has been met on the node.
type Manager interface {
// Start starts the control loop to monitor eviction thresholds at specified interval.
@ -150,10 +103,10 @@ type signalObservation struct {
}
// signalObservations maps a signal to an observed quantity
type signalObservations map[Signal]signalObservation
type signalObservations map[evictionapi.Signal]signalObservation
// thresholdsObservedAt maps a threshold to a time that it was observed
type thresholdsObservedAt map[Threshold]time.Time
type thresholdsObservedAt map[evictionapi.Threshold]time.Time
// nodeConditionsObservedAt maps a node condition to a time that it was observed
type nodeConditionsObservedAt map[v1.NodeConditionType]time.Time

View File

@ -34,7 +34,6 @@ import (
clientgoclientset "k8s.io/client-go/kubernetes"
cadvisorapi "github.com/google/cadvisor/info/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
@ -359,11 +358,6 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
KernelMemcgNotification: kubeCfg.ExperimentalKernelMemcgNotification,
}
reservation, err := ParseReservation(kubeCfg.KubeReserved, kubeCfg.SystemReserved)
if err != nil {
return nil, err
}
var dockerExecHandler dockertools.ExecHandler
switch kubeCfg.DockerExecHandlerName {
case "native":
@ -465,7 +459,6 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
nodeIP: net.ParseIP(kubeCfg.NodeIP),
clock: clock.RealClock{},
outOfDiskTransitionFrequency: kubeCfg.OutOfDiskTransitionFrequency.Duration,
reservation: *reservation,
enableCustomMetrics: kubeCfg.EnableCustomMetrics,
babysitDaemons: kubeCfg.BabysitDaemons,
enableControllerAttachDetach: kubeCfg.EnableControllerAttachDetach,
@ -1034,10 +1027,6 @@ type Kubelet struct {
// getting rescheduled onto the node.
outOfDiskTransitionFrequency time.Duration
// reservation specifies resources which are reserved for non-pod usage, including kubernetes and
// non-kubernetes system processes.
reservation kubetypes.Reservation
// support gathering custom metrics.
enableCustomMetrics bool
@ -2119,47 +2108,6 @@ func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool {
return event.Type != pleg.ContainerRemoved
}
// parseResourceList parses the given configuration map into an API
// ResourceList or returns an error.
func parseResourceList(m componentconfig.ConfigurationMap) (v1.ResourceList, error) {
rl := make(v1.ResourceList)
for k, v := range m {
switch v1.ResourceName(k) {
// Only CPU and memory resources are supported.
case v1.ResourceCPU, v1.ResourceMemory:
q, err := resource.ParseQuantity(v)
if err != nil {
return nil, err
}
if q.Sign() == -1 {
return nil, fmt.Errorf("resource quantity for %q cannot be negative: %v", k, v)
}
rl[v1.ResourceName(k)] = q
default:
return nil, fmt.Errorf("cannot reserve %q resource", k)
}
}
return rl, nil
}
// ParseReservation parses the given kubelet- and system- reservations
// configuration maps into an internal Reservation instance or returns an
// error.
func ParseReservation(kubeReserved, systemReserved componentconfig.ConfigurationMap) (*kubetypes.Reservation, error) {
reservation := new(kubetypes.Reservation)
if rl, err := parseResourceList(kubeReserved); err != nil {
return nil, err
} else {
reservation.Kubernetes = rl
}
if rl, err := parseResourceList(systemReserved); err != nil {
return nil, err
} else {
reservation.System = rl
}
return reservation, nil
}
// Gets the streaming server configuration to use with in-process CRI shims.
func getStreamingConfig(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *KubeletDeps) *streaming.Config {
config := &streaming.Config{

View File

@ -522,18 +522,14 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
}
// Set Allocatable.
node.Status.Allocatable = make(v1.ResourceList)
if node.Status.Allocatable == nil {
node.Status.Allocatable = make(v1.ResourceList)
}
allocatableReservation := kl.containerManager.GetNodeAllocatableReservation()
for k, v := range node.Status.Capacity {
value := *(v.Copy())
if kl.reservation.System != nil {
value.Sub(kl.reservation.System[k])
}
if kl.reservation.Kubernetes != nil {
value.Sub(kl.reservation.Kubernetes[k])
}
if value.Sign() < 0 {
// Negative Allocatable resources don't make sense.
value.Set(0)
if res, exists := allocatableReservation[k]; exists {
value.Sub(res)
}
node.Status.Allocatable[k] = value
}

View File

@ -41,6 +41,7 @@ import (
core "k8s.io/client-go/testing"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
"k8s.io/kubernetes/pkg/version"
@ -109,6 +110,15 @@ func applyNodeStatusPatch(originalNode *v1.Node, patch []byte) (*v1.Node, error)
return updatedNode, nil
}
type localCM struct {
cm.ContainerManager
allocatable v1.ResourceList
}
func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList {
return lcm.allocatable
}
func TestUpdateNewNodeStatus(t *testing.T) {
// generate one more than maxImagesInNodeStatus in inputImageList
inputImageList, expectedImageList := generateTestingImageList(maxImagesInNodeStatus + 1)
@ -116,6 +126,13 @@ func TestUpdateNewNodeStatus(t *testing.T) {
t, inputImageList, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
},
}
kubeClient := testKubelet.fakeKubeClient
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}
kubeClient.ReactionChain = fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{existingNode}}).ReactionChain
@ -332,6 +349,14 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
},
}
kubeClient := testKubelet.fakeKubeClient
existingNode := v1.Node{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
@ -377,9 +402,10 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceCPU: *resource.NewMilliQuantity(2800, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(19900E6, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(0, resource.DecimalSI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(0, resource.DecimalSI),
},
},
}
@ -687,6 +713,14 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.containerManager = &localCM{
ContainerManager: cm.NewStubContainerManager(),
allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
},
}
clock := testKubelet.fakeClock
kubeClient := testKubelet.fakeKubeClient
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}

View File

@ -19,6 +19,8 @@ package kubelet
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/fieldpath"
@ -41,7 +43,7 @@ func (kl *Kubelet) defaultPodLimitsForDownwardApi(pod *v1.Pod, container *v1.Con
return nil, nil, fmt.Errorf("failed to find node object, expected a node")
}
allocatable := node.Status.Allocatable
glog.Errorf("allocatable: %v", allocatable)
podCopy, err := api.Scheme.Copy(pod)
if err != nil {
return nil, nil, fmt.Errorf("failed to perform a deep copy of pod object: %v", err)

View File

@ -25,8 +25,8 @@ import (
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/api/v1"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
func TestPodResourceLimitsDefaulting(t *testing.T) {
@ -41,18 +41,21 @@ func TestPodResourceLimitsDefaulting(t *testing.T) {
}, nil)
tk.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
tk.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil)
tk.kubelet.reservation = kubetypes.Reservation{
Kubernetes: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("3"),
v1.ResourceMemory: resource.MustParse("4Gi"),
},
System: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("2Gi"),
tk.kubelet.nodeInfo = &testNodeInfo{
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: string(tk.kubelet.nodeName),
},
Status: v1.NodeStatus{
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("6"),
v1.ResourceMemory: resource.MustParse("4Gi"),
},
},
},
},
}
cases := []struct {
pod *v1.Pod
expected *v1.Pod

View File

@ -222,12 +222,6 @@ func newTestKubeletWithImageList(
kubelet.backOff.Clock = fakeClock
kubelet.podKillingCh = make(chan *kubecontainer.PodPair, 20)
kubelet.resyncInterval = 10 * time.Second
kubelet.reservation = kubetypes.Reservation{
Kubernetes: v1.ResourceList{
v1.ResourceCPU: resource.MustParse(testReservationCPU),
v1.ResourceMemory: resource.MustParse(testReservationMemory),
},
}
kubelet.workQueue = queue.NewBasicWorkQueue(fakeClock)
// Relist period does not affect the tests.
kubelet.pleg = pleg.NewGenericPLEG(fakeRuntime, 100, time.Hour, nil, clock.RealClock{})

View File

@ -50,7 +50,6 @@ go_test(
name = "go_default_test",
srcs = [
"apparmor_test.go",
"cgroup_manager_test.go",
"container_manager_test.go",
"critical_pod_test.go",
"density_test.go",
@ -65,6 +64,8 @@ go_test(
"log_path_test.go",
"memory_eviction_test.go",
"mirror_pod_test.go",
"node_container_manager_test.go",
"pods_container_manager_test.go",
"resource_usage_test.go",
"restart_test.go",
"runtime_conformance_test.go",
@ -117,6 +118,7 @@ go_test(
"//vendor:k8s.io/apimachinery/pkg/util/intstr",
"//vendor:k8s.io/apimachinery/pkg/util/uuid",
"//vendor:k8s.io/apimachinery/pkg/watch",
"//vendor:k8s.io/client-go/pkg/api",
"//vendor:k8s.io/client-go/tools/cache",
],
)

View File

@ -70,9 +70,8 @@ func validateOOMScoreAdjSettingIsInRange(pid int, expectedMinOOMScoreAdj, expect
return nil
}
var _ = framework.KubeDescribe("Kubelet Container Manager [Serial]", func() {
var _ = framework.KubeDescribe("Container Manager Misc [Serial]", func() {
f := framework.NewDefaultFramework("kubelet-container-manager")
Describe("Validate OOM score adjustments", func() {
Context("once the node is setup", func() {
It("docker daemon's oom-score-adj should be -999", func() {

View File

@ -0,0 +1,247 @@
// +build linux
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e_node
import (
"fmt"
"io/ioutil"
"path"
"path/filepath"
"strconv"
"strings"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/pkg/api"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/test/e2e/framework"
. "github.com/onsi/ginkgo"
)
func setDesiredConfiguration(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EnforceNodeAllocatable = []string{"pods", "kube-reserved", "system-reserved"}
initialConfig.SystemReserved = componentconfig.ConfigurationMap{
"cpu": "100m",
"memory": "100Mi",
}
initialConfig.KubeReserved = componentconfig.ConfigurationMap{
"cpu": "100m",
"memory": "100Mi",
}
initialConfig.EvictionHard = "memory.available<100Mi"
// Necessary for allocatable cgroup creation.
initialConfig.CgroupsPerQOS = true
initialConfig.KubeReservedCgroup = kubeReservedCgroup
initialConfig.SystemReservedCgroup = systemReservedCgroup
}
var _ = framework.KubeDescribe("Node Container Manager [Serial]", func() {
f := framework.NewDefaultFramework("node-container-manager")
Describe("Validate Node Allocatable", func() {
It("set's up the node and runs the test", func() {
framework.ExpectNoError(runTest(f))
})
})
})
func expectFileValToEqual(filePath string, expectedValue, delta int64) error {
out, err := ioutil.ReadFile(filePath)
if err != nil {
return fmt.Errorf("failed to read file %q", filePath)
}
actual, err := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64)
if err != nil {
return fmt.Errorf("failed to parse output %v", err)
}
// Ensure that values are within a delta range to work arounding rounding errors.
if (actual < (expectedValue - delta)) || (actual > (expectedValue + delta)) {
return fmt.Errorf("Expected value at %q to be between %d and %d. Got %d", filePath, (expectedValue - delta), (expectedValue + delta), actual)
}
return nil
}
func getAllocatableLimits(cpu, memory string, capacity v1.ResourceList) (*resource.Quantity, *resource.Quantity) {
var allocatableCPU, allocatableMemory *resource.Quantity
// Total cpu reservation is 200m.
for k, v := range capacity {
if k == v1.ResourceCPU {
allocatableCPU = v.Copy()
allocatableCPU.Sub(resource.MustParse(cpu))
}
if k == v1.ResourceMemory {
allocatableMemory = v.Copy()
allocatableMemory.Sub(resource.MustParse(memory))
}
}
return allocatableCPU, allocatableMemory
}
const (
kubeReservedCgroup = "/kube_reserved"
systemReservedCgroup = "/system_reserved"
)
func createIfNotExists(cm cm.CgroupManager, cgroupConfig *cm.CgroupConfig) error {
if !cm.Exists(cgroupConfig.Name) {
if err := cm.Create(cgroupConfig); err != nil {
return err
}
}
return nil
}
func createTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
// Create kube reserved cgroup
cgroupConfig := &cm.CgroupConfig{
Name: cm.CgroupName(kubeReservedCgroup),
}
if err := createIfNotExists(cgroupManager, cgroupConfig); err != nil {
return err
}
// Create system reserved cgroup
cgroupConfig.Name = cm.CgroupName(systemReservedCgroup)
return createIfNotExists(cgroupManager, cgroupConfig)
}
func destroyTemporaryCgroupsForReservation(cgroupManager cm.CgroupManager) error {
// Create kube reserved cgroup
cgroupConfig := &cm.CgroupConfig{
Name: cm.CgroupName(kubeReservedCgroup),
}
if err := cgroupManager.Destroy(cgroupConfig); err != nil {
return err
}
cgroupConfig.Name = cm.CgroupName(systemReservedCgroup)
return cgroupManager.Destroy(cgroupConfig)
}
func runTest(f *framework.Framework) error {
var oldCfg *componentconfig.KubeletConfiguration
subsystems, err := cm.GetCgroupSubsystems()
if err != nil {
return err
}
// Get current kubelet configuration
oldCfg, err = getCurrentKubeletConfig()
if err != nil {
return err
}
// Create a cgroup manager object for manipulating cgroups.
cgroupManager := cm.NewCgroupManager(subsystems, oldCfg.CgroupDriver)
defer destroyTemporaryCgroupsForReservation(cgroupManager)
defer func() {
if oldCfg != nil {
framework.ExpectNoError(setKubeletConfiguration(f, oldCfg))
}
}()
if err := createTemporaryCgroupsForReservation(cgroupManager); err != nil {
return err
}
clone, err := api.Scheme.DeepCopy(oldCfg)
if err != nil {
return err
}
newCfg := clone.(*componentconfig.KubeletConfiguration)
// Change existing kubelet configuration
setDesiredConfiguration(newCfg)
// Set the new kubelet configuration.
err = setKubeletConfiguration(f, newCfg)
if err != nil {
return err
}
// Set new config and current config.
currentConfig := newCfg
expectedNAPodCgroup := path.Join(currentConfig.CgroupRoot, "kubepods")
if !cgroupManager.Exists(cm.CgroupName(expectedNAPodCgroup)) {
return fmt.Errorf("Expected Node Allocatable Cgroup Does not exist")
}
// TODO: Update cgroupManager to expose a Status interface to get current Cgroup Settings.
nodeList, err := f.ClientSet.Core().Nodes().List(metav1.ListOptions{})
if err != nil {
return err
}
if len(nodeList.Items) != 1 {
return fmt.Errorf("Unexpected number of node objects for node e2e. Expects only one node: %+V", nodeList)
}
node := nodeList.Items[0]
capacity := node.Status.Capacity
allocatableCPU, allocatableMemory := getAllocatableLimits("200m", "200Mi", capacity)
// Total Memory reservation is 200Mi excluding eviction thresholds.
// Expect CPU shares on node allocatable cgroup to equal allocatable.
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], "kubepods", "cpu.shares"), cm.MilliCPUToShares(allocatableCPU.MilliValue()), 10); err != nil {
return err
}
// Expect Memory limit on node allocatable cgroup to equal allocatable.
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], "kubepods", "memory.limit_in_bytes"), allocatableMemory.Value(), 0); err != nil {
return err
}
// Check that Allocatable reported to scheduler includes eviction thresholds.
schedulerAllocatable := node.Status.Allocatable
// Memory allocatable should take into account eviction thresholds.
allocatableCPU, allocatableMemory = getAllocatableLimits("200m", "300Mi", capacity)
// Expect allocatable to include all resources in capacity.
if len(schedulerAllocatable) != len(capacity) {
return fmt.Errorf("Expected all resources in capacity to be found in allocatable")
}
// CPU based evictions are not supported.
if allocatableCPU.Cmp(schedulerAllocatable["cpu"]) != 0 {
return fmt.Errorf("Unexpected cpu allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableCPU, schedulerAllocatable["cpu"], capacity["cpu"])
}
if allocatableMemory.Cmp(schedulerAllocatable["memory"]) != 0 {
return fmt.Errorf("Unexpected cpu allocatable value exposed by the node. Expected: %v, got: %v, capacity: %v", allocatableCPU, schedulerAllocatable["cpu"], capacity["memory"])
}
if !cgroupManager.Exists(cm.CgroupName(kubeReservedCgroup)) {
return fmt.Errorf("Expected kube reserved cgroup Does not exist")
}
// Expect CPU shares on kube reserved cgroup to equal it's reservation which is `100m`.
kubeReservedCPU := resource.MustParse(currentConfig.KubeReserved["cpu"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], kubeReservedCgroup, "cpu.shares"), cm.MilliCPUToShares(kubeReservedCPU.MilliValue()), 10); err != nil {
return err
}
// Expect Memory limit kube reserved cgroup to equal configured value `100Mi`.
kubeReservedMemory := resource.MustParse(currentConfig.KubeReserved["memory"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], kubeReservedCgroup, "memory.limit_in_bytes"), kubeReservedMemory.Value(), 0); err != nil {
return err
}
if !cgroupManager.Exists(cm.CgroupName(systemReservedCgroup)) {
return fmt.Errorf("Expected system reserved cgroup Does not exist")
}
// Expect CPU shares on system reserved cgroup to equal it's reservation which is `100m`.
systemReservedCPU := resource.MustParse(currentConfig.SystemReserved["cpu"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["cpu"], systemReservedCgroup, "cpu.shares"), cm.MilliCPUToShares(systemReservedCPU.MilliValue()), 10); err != nil {
return err
}
// Expect Memory limit on node allocatable cgroup to equal allocatable.
systemReservedMemory := resource.MustParse(currentConfig.SystemReserved["memory"])
if err := expectFileValToEqual(filepath.Join(subsystems.MountPoints["memory"], systemReservedCgroup, "memory.limit_in_bytes"), systemReservedMemory.Value(), 0); err != nil {
return err
}
return nil
}

View File

@ -17,6 +17,8 @@ limitations under the License.
package e2e_node
import (
"path"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/uuid"
@ -24,6 +26,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/test/e2e/framework"
"github.com/golang/glog"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
@ -49,18 +52,23 @@ func getResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequir
return res
}
// Kubelet internal cgroup name for node allocatable cgroup.
const defaultNodeAllocatableCgroup = "kubepods"
// makePodToVerifyCgroups returns a pod that verifies the existence of the specified cgroups.
func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
// convert the names to their literal cgroupfs forms...
cgroupFsNames := []string{}
for _, cgroupName := range cgroupNames {
// Add top level cgroup used to enforce node allocatable.
cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
cgroupFsNames = append(cgroupFsNames, cm.ConvertCgroupNameToSystemd(cgroupName, true))
} else {
cgroupFsNames = append(cgroupFsNames, string(cgroupName))
}
}
glog.Infof("expecting %v cgroups to be found", cgroupFsNames)
// build the pod command to either verify cgroups exist
command := ""
for _, cgroupFsName := range cgroupFsNames {

View File

@ -95,6 +95,7 @@ func tempSetEvictionHard(f *framework.Framework, evictionHard string) {
// Must be called within a Context. Allows the function to modify the KubeletConfiguration during the BeforeEach of the context.
// The change is reverted in the AfterEach of the context.
// Returns true on success.
func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(initialConfig *componentconfig.KubeletConfiguration)) {
var oldCfg *componentconfig.KubeletConfiguration
BeforeEach(func() {
@ -292,3 +293,9 @@ func logNodeEvents(f *framework.Framework) {
err := framework.ListNamespaceEvents(f.ClientSet, "")
framework.ExpectNoError(err)
}
func getLocalNode(f *framework.Framework) *v1.Node {
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
Expect(len(nodeList.Items)).To(Equal(1), "Unexpected number of node objects for node e2e. Expects only one node.")
return &nodeList.Items[0]
}