Merge pull request #20687 from vishh/systemd-support

Auto commit by PR queue bot
pull/6/head
k8s-merge-robot 2016-02-11 13:06:29 -08:00
commit f6ae6b47ae
18 changed files with 229 additions and 7784 deletions

View File

@ -156,7 +156,9 @@ script
--cluster-domain=${DNS_DOMAIN} \
--configure-cbr0=${ALLOCATE_NODE_CIDRS} \
--cgroup-root=/ \
--system-container=/system \
--system-cgroups=/system \
--runtime-cgroups=/docker-daemon \
--kubelet-cgroups=/kubelet \
--nosystemd=true \
${ARGS} 1>>/var/log/kubelet.log 2>&1
end script

View File

@ -141,7 +141,9 @@ script
--cluster-domain=${DNS_DOMAIN} \
--configure-cbr0=true \
--cgroup-root=/ \
--system-container=/system \
--system-cgroups=/system \
--runtime-cgroups=/docker-daemon \
--kubelet-cgroups=/kubelet \
--nosystemd=true \
${ARGS} 1>>/var/log/kubelet.log 2>&1
end script

View File

@ -102,15 +102,19 @@
{% set experimental_flannel_overlay = "--experimental-flannel-overlay=true" %}
{% endif -%}
# Run containers under the root cgroup and create a system container.
{% set system_container = "" -%}
# Setup cgroups hierarchies.
{% set cgroup_root = "" -%}
{% if grains['os_family'] == 'Debian' -%}
{% set system_container = "--system-container=/system" -%}
{% set system_container = "" -%}
{% set kubelet_container = "" -%}
{% set runtime_container = "" -%}
{% if grains['os_family'] == 'Debian' -%}
{% if pillar.get('is_systemd') %}
{% set cgroup_root = "--cgroup-root=docker" -%}
{% else %}
{% set cgroup_root = "--cgroup-root=/" -%}
{% set system_container = "--system-cgroups=/system" -%}
{% set runtime_container = "--runtime-cgroups=/docker-daemon" -%}
{% set kubelet_container= "--kubelet-cgroups=/kubelet" -%}
{% endif %}
{% endif -%}
{% if grains['oscodename'] == 'vivid' -%}

View File

@ -47,8 +47,7 @@ type KubeletServer struct {
KubeConfig util.StringFlag
APIServerList []string
DockerDaemonContainer string
RunOnce bool
RunOnce bool
// Insert a probability of random errors during calls to the master.
ChaosChance float64
@ -61,9 +60,8 @@ type KubeletServer struct {
// NewKubeletServer will create a new KubeletServer with default values.
func NewKubeletServer() *KubeletServer {
return &KubeletServer{
AuthPath: util.NewStringFlag("/var/lib/kubelet/kubernetes_auth"), // deprecated
KubeConfig: util.NewStringFlag("/var/lib/kubelet/kubeconfig"),
DockerDaemonContainer: "/docker-daemon",
AuthPath: util.NewStringFlag("/var/lib/kubelet/kubernetes_auth"), // deprecated
KubeConfig: util.NewStringFlag("/var/lib/kubelet/kubeconfig"),
SystemReserved: make(util.ConfigurationMap),
KubeReserved: make(util.ConfigurationMap),
@ -113,14 +111,15 @@ func NewKubeletServer() *KubeletServer {
RegisterSchedulable: true,
RegistryBurst: 10,
RegistryPullQPS: 5.0,
ResourceContainer: "/kubelet",
KubeletCgroups: "",
RktPath: "",
RktStage1Image: "",
RootDirectory: defaultRootDir,
RuntimeCgroups: "",
SerializeImagePulls: true,
StreamingConnectionIdleTimeout: unversioned.Duration{4 * time.Hour},
SyncFrequency: unversioned.Duration{1 * time.Minute},
SystemContainer: "",
SystemCgroups: "",
ReconcileCIDR: true,
KubeAPIQPS: 5.0,
KubeAPIBurst: 10,
@ -192,13 +191,20 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.VolumePluginDir, "volume-plugin-dir", s.VolumePluginDir, "<Warning: Alpha feature> The full path of the directory in which to search for additional third party volume plugins")
fs.StringVar(&s.CloudProvider, "cloud-provider", s.CloudProvider, "The provider for cloud services. Empty string for no provider.")
fs.StringVar(&s.CloudConfigFile, "cloud-config", s.CloudConfigFile, "The path to the cloud provider configuration file. Empty string for no configuration file.")
fs.StringVar(&s.ResourceContainer, "resource-container", s.ResourceContainer, "Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).")
fs.StringVar(&s.KubeletCgroups, "resource-container", s.KubeletCgroups, "Optional absolute name of the resource-only container to create and run the Kubelet in.")
fs.MarkDeprecated("resource-container", "Use --kubelet-cgroups instead. Will be removed in a future version.")
fs.StringVar(&s.KubeletCgroups, "kubelet-cgroups", s.KubeletCgroups, "Optional absolute name of cgroups to create and run the Kubelet in.")
fs.StringVar(&s.SystemCgroups, "system-container", s.SystemCgroups, "Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
fs.MarkDeprecated("system-container", "Use --system-cgroups instead. Will be removed in a future version.")
fs.StringVar(&s.SystemCgroups, "system-cgroups", s.SystemCgroups, "Optional absolute name of cgroups in which to place all non-kernel processes that are not already inside a cgroup under `/`. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
fs.StringVar(&s.CgroupRoot, "cgroup-root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
fs.StringVar(&s.LockFilePath, "lock-file", s.LockFilePath, "<Warning: Alpha feature> The path to file for kubelet to use as a lock file.")
fs.StringVar(&s.RktPath, "rkt-path", s.RktPath, "Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'")
fs.StringVar(&s.RktStage1Image, "rkt-stage1-image", s.RktStage1Image, "image to use as stage1. Local paths and http/https URLs are supported. If empty, the 'stage1.aci' in the same directory as '--rkt-path' will be used")
fs.StringVar(&s.SystemContainer, "system-container", s.SystemContainer, "Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.")
fs.BoolVar(&s.HairpinMode, "configure-hairpin-mode", s.HairpinMode, "If true, kubelet will set the hairpin mode flag on container interfaces. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service.")
fs.IntVar(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.")
@ -223,4 +229,5 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&s.OutOfDiskTransitionFrequency.Duration, "outofdisk-transition-frequency", s.OutOfDiskTransitionFrequency.Duration, "Duration for which the kubelet has to wait before transitioning out of out-of-disk node condition status. Default: 5m0s")
fs.StringVar(&s.NodeIP, "node-ip", s.NodeIP, "IP address of the node. If set, kubelet will use this IP address for the node")
fs.BoolVar(&s.EnableCustomMetrics, "enable-custom-metrics", s.EnableCustomMetrics, "Support for gathering custom metrics.")
fs.StringVar(&s.RuntimeCgroups, "runtime-cgroups", s.RuntimeCgroups, "Optional absolute name of cgroups to create and run the runtime in.")
}

View File

@ -194,7 +194,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
CPUCFSQuota: s.CPUCFSQuota,
DiskSpacePolicy: diskSpacePolicy,
DockerClient: dockertools.ConnectToDockerOrDie(s.DockerEndpoint),
DockerDaemonContainer: s.DockerDaemonContainer,
RuntimeCgroups: s.RuntimeCgroups,
DockerExecHandler: dockerExecHandler,
EnableCustomMetrics: s.EnableCustomMetrics,
EnableDebuggingHandlers: s.EnableDebuggingHandlers,
@ -236,7 +236,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
RegistryPullQPS: s.RegistryPullQPS,
ResolverConfig: s.ResolverConfig,
Reservation: *reservation,
ResourceContainer: s.ResourceContainer,
KubeletCgroups: s.KubeletCgroups,
RktPath: s.RktPath,
RktStage1Image: s.RktStage1Image,
RootDirectory: s.RootDirectory,
@ -245,7 +245,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
StandaloneMode: (len(s.APIServerList) == 0),
StreamingConnectionIdleTimeout: s.StreamingConnectionIdleTimeout.Duration,
SyncFrequency: s.SyncFrequency.Duration,
SystemContainer: s.SystemContainer,
SystemCgroups: s.SystemCgroups,
TLSOptions: tlsOptions,
Writer: writer,
VolumePlugins: ProbeVolumePlugins(s.VolumePluginDir),
@ -306,7 +306,16 @@ func Run(s *options.KubeletServer, kcfg *KubeletConfig) error {
}
if kcfg.ContainerManager == nil {
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, kcfg.CAdvisorInterface)
if kcfg.SystemCgroups != "" && kcfg.CgroupRoot == "" {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, kcfg.CAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: kcfg.RuntimeCgroups,
SystemCgroupsName: kcfg.SystemCgroups,
KubeletCgroupsName: kcfg.KubeletCgroups,
ContainerRuntime: kcfg.ContainerRuntime,
})
if err != nil {
return err
}
@ -501,7 +510,7 @@ func SimpleKubelet(client *clientset.Clientset,
CPUCFSQuota: true,
DiskSpacePolicy: diskSpacePolicy,
DockerClient: dockerClient,
DockerDaemonContainer: "/docker-daemon",
RuntimeCgroups: "",
DockerExecHandler: &dockertools.NativeExecHandler{},
EnableCustomMetrics: false,
EnableDebuggingHandlers: true,
@ -530,11 +539,11 @@ func SimpleKubelet(client *clientset.Clientset,
RegistryBurst: 10,
RegistryPullQPS: 5.0,
ResolverConfig: kubetypes.ResolvConfDefault,
ResourceContainer: "/kubelet",
KubeletCgroups: "/kubelet",
RootDirectory: rootDir,
SerializeImagePulls: true,
SyncFrequency: syncFrequency,
SystemContainer: "",
SystemCgroups: "",
TLSOptions: tlsOptions,
VolumePlugins: volumePlugins,
Writer: &io.StdWriter{},
@ -677,7 +686,7 @@ type KubeletConfig struct {
CPUCFSQuota bool
DiskSpacePolicy kubelet.DiskSpacePolicy
DockerClient dockertools.DockerInterface
DockerDaemonContainer string
RuntimeCgroups string
DockerExecHandler dockertools.ExecHandler
EnableCustomMetrics bool
EnableDebuggingHandlers bool
@ -724,7 +733,7 @@ type KubeletConfig struct {
RegistryPullQPS float64
Reservation kubetypes.Reservation
ResolverConfig string
ResourceContainer string
KubeletCgroups string
RktPath string
RktStage1Image string
RootDirectory string
@ -733,7 +742,7 @@ type KubeletConfig struct {
StandaloneMode bool
StreamingConnectionIdleTimeout time.Duration
SyncFrequency time.Duration
SystemContainer string
SystemCgroups string
TLSOptions *server.TLSOptions
Writer io.Writer
VolumePlugins []volume.VolumePlugin
@ -802,7 +811,6 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.Cloud,
kc.NodeLabels,
kc.NodeStatusUpdateFrequency,
kc.ResourceContainer,
kc.OSInterface,
kc.CgroupRoot,
kc.ContainerRuntime,
@ -810,8 +818,6 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.RktStage1Image,
kc.Mounter,
kc.Writer,
kc.DockerDaemonContainer,
kc.SystemContainer,
kc.ConfigureCBR0,
kc.NonMasqueradeCIDR,
kc.PodCIDR,

View File

@ -180,7 +180,7 @@ func (s *KubeletExecutorServer) runKubelet(
return decorated, pc, nil
}
kcfg.DockerDaemonContainer = "" // don't move the docker daemon into a cgroup
kcfg.RuntimeCgroups = "" // don't move the docker daemon into a cgroup
kcfg.Hostname = kcfg.HostnameOverride
kcfg.KubeClient = apiclient
@ -201,7 +201,7 @@ func (s *KubeletExecutorServer) runKubelet(
kcfg.NodeName = kcfg.HostnameOverride
kcfg.PodConfig = kconfig.NewPodConfig(kconfig.PodConfigNotificationIncremental, kcfg.Recorder) // override the default pod source
kcfg.StandaloneMode = false
kcfg.SystemContainer = "" // don't take control over other system processes.
kcfg.SystemCgroups = "" // don't take control over other system processes.
if kcfg.Cloud != nil {
// fail early and hard because having the cloud provider loaded would go unnoticed,
// but break bigger cluster because accessing the state.json from every slave kills the master.
@ -216,7 +216,12 @@ func (s *KubeletExecutorServer) runKubelet(
}
kcfg.CAdvisorInterface = cAdvisorInterface
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, cAdvisorInterface)
kcfg.ContainerManager, err = cm.NewContainerManager(kcfg.Mounter, cAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: kcfg.RuntimeCgroups,
SystemCgroupsName: kcfg.SystemCgroups,
KubeletCgroupsName: kcfg.KubeletCgroups,
ContainerRuntime: kcfg.ContainerRuntime,
})
if err != nil {
return err
}

View File

@ -104,6 +104,7 @@ kubelet
--kube-api-qps=5: QPS to use while talking with kubernetes apiserver
--kube-reserved=: A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for kubernetes system components. Currently only cpu and memory are supported. See http://releases.k8s.io/HEAD/docs/user-guide/compute-resources.html for more detail. [default=none]
--kubeconfig="/var/lib/kubelet/kubeconfig": Path to a kubeconfig file, specifying how to authenticate to API server (the master location is set by the api-servers flag).
--kubelet-cgroups="": Optional absolute name of cgroups to create and run the Kubelet in.
--lock-file="/var/run/lock/kubelet.lock": <Warning: Alpha feature> The path to file for kubelet to use as a lock file.
--log-flush-frequency=5s: Maximum number of seconds between log flushes
--low-diskspace-threshold-mb=256: The absolute free disk space, in MB, to maintain. When disk space falls below this threshold, new pods would be rejected. Default: 256
@ -134,15 +135,15 @@ kubelet
--registry-burst=10: Maximum size of a bursty pulls, temporarily allows pulls to burst to this number, while still not exceeding registry-qps. Only used if --registry-qps > 0
--registry-qps=5: If > 0, limit registry pull QPS to this value. If 0, unlimited. [default=5.0]
--resolv-conf="/etc/resolv.conf": Resolver configuration file used as the basis for the container DNS resolution configuration.
--resource-container="/kubelet": Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).
--rkt-path="": Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'
--rkt-stage1-image="": image to use as stage1. Local paths and http/https URLs are supported. If empty, the 'stage1.aci' in the same directory as '--rkt-path' will be used
--root-dir="/var/lib/kubelet": Directory path for managing kubelet files (volume mounts,etc).
--runonce[=false]: If true, exit after spawning pods from local manifests or remote urls. Exclusive with --api-servers, and --enable-server
--runtime-cgroups="": Optional absolute name of cgroups to create and run the runtime in.
--serialize-image-pulls[=true]: Pull images one at a time. We recommend *not* changing the default value on nodes that run docker daemon with version < 1.9 or an Aufs storage backend. Issue #10959 has more details. [default=true]
--streaming-connection-idle-timeout=4h0m0s: Maximum time a streaming connection can be idle before the connection is automatically closed. 0 indicates no timeout. Example: '5m'
--sync-frequency=1m0s: Max period between synchronizing running containers and config
--system-container="": Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: "").
--system-cgroups="": Optional absolute name of cgroups in which to place all non-kernel processes that are not already inside a cgroup under `/`. Empty for no container. Rolling back the flag requires a reboot. (Default: "").
--system-reserved=: A set of ResourceName=ResourceQuantity (e.g. cpu=200m,memory=150G) pairs that describe resources reserved for non-kubernetes components. Currently only cpu and memory are supported. See http://releases.k8s.io/HEAD/docs/user-guide/compute-resources.html for more detail. [default=none]
--tls-cert-file="": File containing x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory passed to --cert-dir.
--tls-private-key-file="": File containing x509 private key matching --tls-cert-file.

View File

@ -171,6 +171,7 @@ kubectl-path
kubelet-address
kubelet-cadvisor-port
kubelet-certificate-authority
kubelet-cgroups
kubelet-client-certificate
kubelet-client-key
kubelet-docker-endpoint
@ -311,6 +312,7 @@ root-ca-file
root-dir
run-proxy
runtime-config
runtime-cgroups
save-config
scheduler-config
scheduler-name
@ -347,6 +349,7 @@ storage-versions
streaming-connection-idle-timeout
suicide-timeout
sync-frequency
system-cgroups
system-container
system-reserved
target-port

File diff suppressed because it is too large Load Diff

View File

@ -52,9 +52,9 @@ type KubeProxyConfiguration struct {
// portRange is the range of host ports (beginPort-endPort, inclusive) that may be consumed
// in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.
PortRange string `json:"portRange"`
// resourceContainer is the bsolute name of the resource-only container to create and run
// resourceContainer is the absolute name of the resource-only container to create and run
// the Kube-proxy in (Default: /kube-proxy).
ResourceContainer string `json:"resourceContainer"`
ResourceContainer string `json:"kubeletCgroups"`
// udpIdleTimeout is how long an idle UDP connection will be kept open (e.g. '250ms', '2s').
// Must be greater than 0. Only applicable for proxyMode=userspace.
UDPIdleTimeout unversioned.Duration `json:"udpTimeoutMilliseconds"`
@ -223,9 +223,14 @@ type KubeletConfiguration struct {
CloudProvider string `json:"cloudProvider,omitempty"`
// cloudConfigFile is the path to the cloud provider configuration file.
CloudConfigFile string `json:"cloudConfigFile,omitempty"`
// resourceContainer is the absolute name of the resource-only container
// to create and run the Kubelet in.
ResourceContainer string `json:"resourceContainer,omitempty"`
// KubeletCgroups is the absolute name of cgroups to isolate the kubelet in.
KubeletCgroups string `json:"kubeletCgroups,omitempty"`
// Cgroups that container runtime is expected to be isolated in.
RuntimeCgroups string `json:"runtimeCgroups,omitempty"`
// SystemCgroups is absolute name of cgroups in which to place
// all non-kernel processes that are not already in a container. Empty
// for no container. Rolling back the flag requires a reboot.
SystemCgroups string `json:"systemContainer,omitempty"`
// cgroupRoot is the root cgroup to use for pods. This is handled by the
// container runtime on a best effort basis.
CgroupRoot string `json:"cgroupRoot,omitempty"`
@ -241,10 +246,6 @@ type KubeletConfiguration struct {
// rktStage1Image is the image to use as stage1. Local paths and
// http/https URLs are supported.
RktStage1Image string `json:"rktStage1Image,omitempty"`
// systemContainer is the resource-only container in which to place
// all non-kernel processes that are not already in a container. Empty
// for no container. Rolling back the flag requires a reboot.
SystemContainer string `json:"systemContainer"`
// configureCBR0 enables the kublet to configure cbr0 based on
// Node.Spec.PodCIDR.
ConfigureCBR0 bool `json:"configureCbr0"`

File diff suppressed because it is too large Load Diff

View File

@ -25,15 +25,19 @@ type ContainerManager interface {
// Runs the container manager's housekeeping.
// - Ensures that the Docker daemon is in a container.
// - Creates the system container where all non-containerized processes run.
Start(NodeConfig) error
Start() error
// Returns resources allocated to system containers in the machine.
// These containers include the system and Kubernetes services.
SystemContainersLimit() api.ResourceList
// Returns resources allocated to system cgroups in the machine.
// These cgroups include the system and Kubernetes services.
SystemCgroupsLimit() api.ResourceList
// Returns a NodeConfig that is being used by the container manager.
GetNodeConfig() NodeConfig
}
type NodeConfig struct {
DockerDaemonContainerName string
SystemContainerName string
KubeletContainerName string
RuntimeCgroupsName string
SystemCgroupsName string
KubeletCgroupsName string
ContainerRuntime string
}

View File

@ -66,7 +66,7 @@ type systemContainer struct {
manager *fs.Manager
}
func newSystemContainer(containerName string) *systemContainer {
func newSystemCgroups(containerName string) *systemContainer {
return &systemContainer{
name: containerName,
manager: createManager(containerName),
@ -114,11 +114,11 @@ func validateSystemRequirements(mountUtil mount.Interface) error {
// TODO(vmarmol): Add limits to the system containers.
// Takes the absolute name of the specified containers.
// Empty container name disables use of the specified container.
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface) (ContainerManager, error) {
func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.Interface, nodeConfig NodeConfig) (ContainerManager, error) {
return &containerManagerImpl{
cadvisorInterface: cadvisorInterface,
mountUtil: mountUtil,
NodeConfig: NodeConfig{},
NodeConfig: nodeConfig,
}, nil
}
@ -192,70 +192,113 @@ func (cm *containerManagerImpl) setupNode() error {
}
systemContainers := []*systemContainer{}
if cm.DockerDaemonContainerName != "" {
cont := newSystemContainer(cm.DockerDaemonContainerName)
if cm.ContainerRuntime == "docker" {
if cm.RuntimeCgroupsName != "" {
cont := newSystemCgroups(cm.RuntimeCgroupsName)
info, err := cm.cadvisorInterface.MachineInfo()
var capacity = api.ResourceList{}
if err != nil {
} else {
capacity = cadvisor.CapacityFromMachineInfo(info)
}
memoryLimit := (int64(capacity.Memory().Value() * DockerMemoryLimitThresholdPercent / 100))
if memoryLimit < MinDockerMemoryLimit {
glog.Warningf("Memory limit %d for container %s is too small, reset it to %d", memoryLimit, cm.RuntimeCgroupsName, MinDockerMemoryLimit)
memoryLimit = MinDockerMemoryLimit
}
info, err := cm.cadvisorInterface.MachineInfo()
var capacity = api.ResourceList{}
if err != nil {
} else {
capacity = cadvisor.CapacityFromMachineInfo(info)
}
memoryLimit := (int64(capacity.Memory().Value() * DockerMemoryLimitThresholdPercent / 100))
if memoryLimit < MinDockerMemoryLimit {
glog.Warningf("Memory limit %d for container %s is too small, reset it to %d", memoryLimit, cm.DockerDaemonContainerName, MinDockerMemoryLimit)
memoryLimit = MinDockerMemoryLimit
}
glog.V(2).Infof("Configure resource-only container %s with memory limit: %d", cm.RuntimeCgroupsName, memoryLimit)
glog.V(2).Infof("Configure resource-only container %s with memory limit: %d", cm.DockerDaemonContainerName, memoryLimit)
dockerContainer := &fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: cm.DockerDaemonContainerName,
Resources: &configs.Resources{
Memory: memoryLimit,
MemorySwap: -1,
AllowAllDevices: true,
dockerContainer := &fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: cm.RuntimeCgroupsName,
Resources: &configs.Resources{
Memory: memoryLimit,
MemorySwap: -1,
AllowAllDevices: true,
},
},
},
}
cont.ensureStateFunc = func(manager *fs.Manager) error {
return ensureDockerInContainer(cm.cadvisorInterface, -900, dockerContainer)
}
systemContainers = append(systemContainers, cont)
} else {
cont, err := getContainerNameForProcess("docker")
if err != nil {
glog.Error(err)
} else {
cm.RuntimeCgroupsName = cont
}
}
cont.ensureStateFunc = func(manager *fs.Manager) error {
return ensureDockerInContainer(cm.cadvisorInterface, -900, dockerContainer)
}
systemContainers = append(systemContainers, cont)
}
if cm.SystemContainerName != "" {
if cm.SystemContainerName == "/" {
if cm.SystemCgroupsName != "" {
if cm.SystemCgroupsName == "/" {
return fmt.Errorf("system container cannot be root (\"/\")")
}
cont := newSystemCgroups(cm.SystemCgroupsName)
rootContainer := &fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: "/",
},
}
manager := createManager(cm.SystemContainerName)
err := ensureSystemContainer(rootContainer, manager)
if err != nil {
return err
cont.ensureStateFunc = func(manager *fs.Manager) error {
return ensureSystemCgroups(rootContainer, manager)
}
systemContainers = append(systemContainers, newSystemContainer(cm.SystemContainerName))
systemContainers = append(systemContainers, cont)
}
if cm.KubeletContainerName != "" {
systemContainers = append(systemContainers, newSystemContainer(cm.KubeletContainerName))
if cm.KubeletCgroupsName != "" {
cont := newSystemCgroups(cm.KubeletCgroupsName)
manager := fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: cm.KubeletCgroupsName,
Resources: &configs.Resources{
AllowAllDevices: true,
},
},
}
cont.ensureStateFunc = func(_ *fs.Manager) error {
return manager.Apply(os.Getpid())
}
systemContainers = append(systemContainers, cont)
} else {
cont, err := getContainer(os.Getpid())
if err != nil {
glog.Error("failed to find cgroups of kubelet - %v", err)
} else {
cm.KubeletCgroupsName = cont
}
}
cm.systemContainers = systemContainers
return nil
}
func (cm *containerManagerImpl) Start(nodeConfig NodeConfig) error {
cm.NodeConfig = nodeConfig
func getContainerNameForProcess(name string) (string, error) {
pids, err := getPidsForProcess(name)
if err != nil {
return "", fmt.Errorf("failed to detect process id for %q - %v", name, err)
}
if len(pids) == 0 {
return "", nil
}
cont, err := getContainer(pids[0])
if err != nil {
return "", err
}
return cont, nil
}
func (cm *containerManagerImpl) GetNodeConfig() NodeConfig {
return cm.NodeConfig
}
func (cm *containerManagerImpl) Start() error {
// Setup the node
if err := cm.setupNode(); err != nil {
return err
@ -285,7 +328,7 @@ func (cm *containerManagerImpl) Start(nodeConfig NodeConfig) error {
return nil
}
func (cm *containerManagerImpl) SystemContainersLimit() api.ResourceList {
func (cm *containerManagerImpl) SystemCgroupsLimit() api.ResourceList {
cpuLimit := int64(0)
// Sum up resources of all external containers.
@ -313,16 +356,13 @@ func isProcessRunningInHost(pid int) (bool, error) {
return initMntNs == processMntNs, nil
}
// Ensures that the Docker daemon is in the desired container.
func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manager *fs.Manager) error {
// What container is Docker in?
out, err := exec.Command("pidof", "docker").Output()
func getPidsForProcess(name string) ([]int, error) {
out, err := exec.Command("pidof", "name").Output()
if err != nil {
return fmt.Errorf("failed to find pid of Docker container: %v", err)
return []int{}, fmt.Errorf("failed to find pid of %q: %v", name, err)
}
// The output of pidof is a list of pids.
// Docker may be forking and thus there would be more than one result.
pids := []int{}
for _, pidStr := range strings.Split(strings.TrimSpace(string(out)), " ") {
pid, err := strconv.Atoi(pidStr)
@ -331,7 +371,15 @@ func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manag
}
pids = append(pids, pid)
}
return pids, nil
}
// Ensures that the Docker daemon is in the desired container.
func ensureDockerInContainer(cadvisor cadvisor.Interface, oomScoreAdj int, manager *fs.Manager) error {
pids, err := getPidsForProcess("docker")
if err != nil {
return err
}
// Move if the pid is not already in the desired container.
errs := []error{}
for _, pid := range pids {
@ -387,7 +435,7 @@ func getContainer(pid int) (string, error) {
// The reason of leaving kernel threads at root cgroup is that we don't want to tie the
// execution of these threads with to-be defined /system quota and create priority inversions.
//
func ensureSystemContainer(rootContainer *fs.Manager, manager *fs.Manager) error {
func ensureSystemCgroups(rootContainer *fs.Manager, manager *fs.Manager) error {
// Move non-kernel PIDs to the system container.
attemptsRemaining := 10
var errs []error

View File

@ -25,15 +25,19 @@ type containerManagerStub struct{}
var _ ContainerManager = &containerManagerStub{}
func (cm *containerManagerStub) Start(_ NodeConfig) error {
func (cm *containerManagerStub) Start() error {
glog.V(2).Infof("Starting stub container manager")
return nil
}
func (cm *containerManagerStub) SystemContainersLimit() api.ResourceList {
func (cm *containerManagerStub) SystemCgroupsLimit() api.ResourceList {
return api.ResourceList{}
}
func (cm *containerManagerStub) GetNodeConfig() NodeConfig {
return NodeConfig{}
}
func NewStubContainerManager() ContainerManager {
return &containerManagerStub{}
}

View File

@ -31,14 +31,18 @@ type unsupportedContainerManager struct {
var _ ContainerManager = &unsupportedContainerManager{}
func (unsupportedContainerManager) Start(_ NodeConfig) error {
func (unsupportedContainerManager) Start() error {
return fmt.Errorf("Container Manager is unsupported in this build")
}
func (unsupportedContainerManager) SystemContainersLimit() api.ResourceList {
func (unsupportedContainerManager) SystemCgroupsLimit() api.ResourceList {
return api.ResourceList{}
}
func NewContainerManager(mounter mount.Interface, cadvisorInterface cadvisor.Interface) (ContainerManager, error) {
func (unsupportedContainerManager) GetNodeConfig() NodeConfig {
return NodeConfig{}
}
func NewContainerManager(_ mount.Interface, _ cadvisor.Interface, _ NodeConfig) (ContainerManager, error) {
return &unsupportedContainerManager{}, nil
}

View File

@ -178,7 +178,6 @@ func NewMainKubelet(
cloud cloudprovider.Interface,
nodeLabels map[string]string,
nodeStatusUpdateFrequency time.Duration,
resourceContainer string,
osInterface kubecontainer.OSInterface,
cgroupRoot string,
containerRuntime string,
@ -186,8 +185,6 @@ func NewMainKubelet(
rktStage1Image string,
mounter mount.Interface,
writer kubeio.Writer,
dockerDaemonContainer string,
systemContainer string,
configureCBR0 bool,
nonMasqueradeCIDR string,
podCIDR string,
@ -215,9 +212,6 @@ func NewMainKubelet(
if resyncInterval <= 0 {
return nil, fmt.Errorf("invalid sync frequency %d", resyncInterval)
}
if systemContainer != "" && cgroupRoot == "" {
return nil, fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
dockerClient = dockertools.NewInstrumentedDockerInterface(dockerClient)
serviceStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
@ -311,25 +305,24 @@ func NewMainKubelet(
nodeRef: nodeRef,
nodeLabels: nodeLabels,
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
resourceContainer: resourceContainer,
os: osInterface,
oomWatcher: oomWatcher,
cgroupRoot: cgroupRoot,
mounter: mounter,
writer: writer,
configureCBR0: configureCBR0,
nonMasqueradeCIDR: nonMasqueradeCIDR,
reconcileCIDR: reconcileCIDR,
maxPods: maxPods,
syncLoopMonitor: atomic.Value{},
resolverConfig: resolverConfig,
cpuCFSQuota: cpuCFSQuota,
daemonEndpoints: daemonEndpoints,
containerManager: containerManager,
flannelExperimentalOverlay: flannelExperimentalOverlay,
flannelHelper: NewFlannelHelper(),
nodeIP: nodeIP,
clock: util.RealClock{},
os: osInterface,
oomWatcher: oomWatcher,
cgroupRoot: cgroupRoot,
mounter: mounter,
writer: writer,
configureCBR0: configureCBR0,
nonMasqueradeCIDR: nonMasqueradeCIDR,
reconcileCIDR: reconcileCIDR,
maxPods: maxPods,
syncLoopMonitor: atomic.Value{},
resolverConfig: resolverConfig,
cpuCFSQuota: cpuCFSQuota,
daemonEndpoints: daemonEndpoints,
containerManager: containerManager,
flannelExperimentalOverlay: flannelExperimentalOverlay,
flannelHelper: NewFlannelHelper(),
nodeIP: nodeIP,
clock: util.RealClock{},
outOfDiskTransitionFrequency: outOfDiskTransitionFrequency,
reservation: reservation,
enableCustomMetrics: enableCustomMetrics,
@ -414,8 +407,6 @@ func NewMainKubelet(
return nil, err
}
klet.containerRuntime = rktRuntime
// No Docker daemon to put in a container.
dockerDaemonContainer = ""
default:
return nil, fmt.Errorf("unsupported container runtime %q specified", containerRuntime)
}
@ -438,13 +429,6 @@ func NewMainKubelet(
}
klet.imageManager = imageManager
// Setup container manager, can fail if the devices hierarchy is not mounted
// (it is required by Docker however).
klet.nodeConfig = cm.NodeConfig{
DockerDaemonContainerName: dockerDaemonContainer,
SystemContainerName: systemContainer,
KubeletContainerName: resourceContainer,
}
klet.runtimeState.setRuntimeSync(klet.clock.Now())
klet.runner = klet.containerRuntime
@ -613,10 +597,6 @@ type Kubelet struct {
// Store kubecontainer.PodStatus for all pods.
podCache kubecontainer.Cache
// The name of the resource-only container to run the Kubelet in (empty for no container).
// Name must be absolute.
resourceContainer string
os kubecontainer.OSInterface
// Watcher of out of memory events.
@ -913,42 +893,32 @@ func (kl *Kubelet) StartGarbageCollection() {
// initializeModules will initialize internal modules that do not require the container runtime to be up.
// Note that the modules here must not depend on modules that are not initialized here.
func (kl *Kubelet) initializeModules() error {
// Step 1: Move Kubelet to a container, if required.
if kl.resourceContainer != "" {
// Fixme: I need to reside inside ContainerManager interface.
err := util.RunInResourceContainer(kl.resourceContainer)
if err != nil {
glog.Warningf("Failed to move Kubelet to container %q: %v", kl.resourceContainer, err)
}
glog.Infof("Running in container %q", kl.resourceContainer)
}
// Step 2: Promethues metrics.
// Step 1: Promethues metrics.
metrics.Register(kl.runtimeCache)
// Step 3: Setup filesystem directories.
// Step 2: Setup filesystem directories.
if err := kl.setupDataDirs(); err != nil {
return err
}
// Step 4: If the container logs directory does not exist, create it.
// Step 3: If the container logs directory does not exist, create it.
if _, err := os.Stat(containerLogsDir); err != nil {
if err := kl.os.Mkdir(containerLogsDir, 0755); err != nil {
glog.Errorf("Failed to create directory %q: %v", containerLogsDir, err)
}
}
// Step 5: Start the image manager.
// Step 4: Start the image manager.
if err := kl.imageManager.Start(); err != nil {
return fmt.Errorf("Failed to start ImageManager, images may not be garbage collected: %v", err)
}
// Step 6: Start container manager.
if err := kl.containerManager.Start(kl.nodeConfig); err != nil {
// Step 5: Start container manager.
if err := kl.containerManager.Start(); err != nil {
return fmt.Errorf("Failed to start ContainerManager %v", err)
}
// Step 7: Start out of memory watcher.
// Step 6: Start out of memory watcher.
if err := kl.oomWatcher.Start(kl.nodeRef); err != nil {
return fmt.Errorf("Failed to start OOM watcher %v", err)
}
@ -3513,7 +3483,7 @@ func (kl *Kubelet) updatePodCIDR(cidr string) {
}
}
func (kl *Kubelet) GetNodeConfig() cm.NodeConfig {
return kl.nodeConfig
return kl.containerManager.GetNodeConfig()
}
var minRsrc = resource.MustParse("1k")

View File

@ -119,9 +119,9 @@ func (sb *summaryBuilder) build() (*Summary, error) {
}
systemContainers := map[string]string{
SystemContainerKubelet: sb.nodeConfig.KubeletContainerName,
SystemContainerRuntime: sb.nodeConfig.DockerDaemonContainerName, // TODO: add support for other runtimes
SystemContainerMisc: sb.nodeConfig.SystemContainerName,
SystemContainerKubelet: sb.nodeConfig.KubeletCgroupsName,
SystemContainerRuntime: sb.nodeConfig.RuntimeCgroupsName,
SystemContainerMisc: sb.nodeConfig.SystemCgroupsName,
}
for sys, name := range systemContainers {
if info, ok := sb.infos[name]; ok {

View File

@ -48,9 +48,9 @@ func TestBuildSummary(t *testing.T) {
node := api.Node{}
node.Name = "FooNode"
nodeConfig := cm.NodeConfig{
DockerDaemonContainerName: "/docker-daemon",
SystemContainerName: "/system",
KubeletContainerName: "/kubelet",
RuntimeCgroupsName: "/docker-daemon",
SystemCgroupsName: "/system",
KubeletCgroupsName: "/kubelet",
}
const (
namespace0 = "test0"