dockershim: move docker to the given cgruop

This change add a container manager inside the dockershim to move docker daemon
and associated processes to a specified cgroup. The original kubelet container
manager will continue checking the name of the cgroup, so that kubelet know how
to report runtime stats.
pull/6/head
Yu-Ju Hong 2016-10-10 13:56:53 -07:00
parent 37122c2636
commit 87aaf4c0ac
10 changed files with 297 additions and 21 deletions

View File

@ -424,13 +424,14 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
}
kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, kubeDeps.CAdvisorInterface, cm.NodeConfig{
RuntimeCgroupsName: s.RuntimeCgroups,
SystemCgroupsName: s.SystemCgroups,
KubeletCgroupsName: s.KubeletCgroups,
ContainerRuntime: s.ContainerRuntime,
CgroupsPerQOS: s.CgroupsPerQOS,
CgroupRoot: s.CgroupRoot,
ProtectKernelDefaults: s.ProtectKernelDefaults,
RuntimeCgroupsName: s.RuntimeCgroups,
SystemCgroupsName: s.SystemCgroups,
KubeletCgroupsName: s.KubeletCgroups,
ContainerRuntime: s.ContainerRuntime,
CgroupsPerQOS: s.CgroupsPerQOS,
CgroupRoot: s.CgroupRoot,
ProtectKernelDefaults: s.ProtectKernelDefaults,
RuntimeIntegrationType: s.ExperimentalRuntimeIntegrationType,
})
if err != nil {
return err

View File

@ -49,13 +49,14 @@ type ContainerManager interface {
}
type NodeConfig struct {
RuntimeCgroupsName string
SystemCgroupsName string
KubeletCgroupsName string
ContainerRuntime string
CgroupsPerQOS bool
CgroupRoot string
ProtectKernelDefaults bool
RuntimeCgroupsName string
SystemCgroupsName string
KubeletCgroupsName string
ContainerRuntime string
CgroupsPerQOS bool
CgroupRoot string
ProtectKernelDefaults bool
RuntimeIntegrationType string
}
type Status struct {

View File

@ -327,7 +327,27 @@ func (cm *containerManagerImpl) setupNode() error {
systemContainers := []*systemContainer{}
if cm.ContainerRuntime == "docker" {
dockerVersion := getDockerVersion(cm.cadvisorInterface)
if cm.RuntimeCgroupsName != "" {
if cm.RuntimeIntegrationType == "cri" {
// If kubelet uses CRI, dockershim will manage the cgroups and oom
// score for the docker processes.
// In the future, NodeSpec should mandate the cgroup that the
// runtime processes need to be in. For now, we still check the
// cgroup for docker periodically, so that kubelet can recognize
// the cgroup for docker and serve stats for the runtime.
// TODO(#27097): Fix this after NodeSpec is clearly defined.
cm.periodicTasks = append(cm.periodicTasks, func() {
glog.V(4).Infof("[ContainerManager]: Adding periodic tasks for docker CRI integration")
cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
if err != nil {
glog.Error(err)
return
}
glog.V(2).Infof("[ContainerManager]: Discovered runtime cgroups name: %s", cont)
cm.Lock()
defer cm.Unlock()
cm.RuntimeCgroupsName = cont
})
} else if cm.RuntimeCgroupsName != "" {
cont := newSystemCgroups(cm.RuntimeCgroupsName)
var capacity = api.ResourceList{}
if info, err := cm.cadvisorInterface.MachineInfo(); err == nil {
@ -353,13 +373,13 @@ func (cm *containerManagerImpl) setupNode() error {
},
}
cont.ensureStateFunc = func(manager *fs.Manager) error {
return ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
return EnsureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
}
systemContainers = append(systemContainers, cont)
} else {
cm.periodicTasks = append(cm.periodicTasks, func() {
glog.V(10).Infof("Adding docker daemon periodic tasks")
if err := ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
if err := EnsureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
glog.Error(err)
return
}
@ -572,7 +592,10 @@ func getPidsForProcess(name, pidFile string) ([]int, error) {
}
// Ensures that the Docker daemon is in the desired container.
func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, manager *fs.Manager) error {
// Temporarily export the function to be used by dockershim.
// TODO(yujuhong): Move this function to dockershim once kubelet migrates to
// dockershim as the default.
func EnsureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, manager *fs.Manager) error {
type process struct{ name, file string }
dockerProcs := []process{{dockerProcessName, dockerPidFile}}
if dockerVersion.GTE(containerdVersion) {

View File

@ -31,6 +31,7 @@ go_library(
"//pkg/kubelet/api:go_default_library",
"//pkg/kubelet/api/v1alpha1/runtime:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/dockershim/cm:go_default_library",
"//pkg/kubelet/dockertools:go_default_library",
"//pkg/kubelet/leaky:go_default_library",
"//pkg/kubelet/network:go_default_library",

View File

@ -0,0 +1,30 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"container_manager.go",
"container_manager_linux.go",
],
tags = ["automanaged"],
deps = [
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/dockertools:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/util/wait:go_default_library",
"//vendor:github.com/blang/semver",
"//vendor:github.com/golang/glog",
"//vendor:github.com/opencontainers/runc/libcontainer/cgroups/fs",
"//vendor:github.com/opencontainers/runc/libcontainer/configs",
],
)

View File

@ -0,0 +1,21 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
type ContainerManager interface {
Start() error
}

View File

@ -0,0 +1,147 @@
// +build linux
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"fmt"
"io/ioutil"
"regexp"
"strconv"
"time"
"github.com/blang/semver"
"github.com/golang/glog"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/util/wait"
)
const (
// The percent of the machine memory capacity.
dockerMemoryLimitThresholdPercent = kubecm.DockerMemoryLimitThresholdPercent
// The minimum memory limit allocated to docker container.
minDockerMemoryLimit = kubecm.MinDockerMemoryLimit
// The Docker OOM score adjustment.
dockerOOMScoreAdj = qos.DockerOOMScoreAdj
)
var (
memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`)
)
func NewContainerManager(cgroupsName string, client dockertools.DockerInterface) ContainerManager {
return &containerManager{
cgroupsName: cgroupsName,
client: client,
}
}
type containerManager struct {
// Docker client.
client dockertools.DockerInterface
// Name of the cgroups.
cgroupsName string
// Manager for the cgroups.
cgroupsManager *fs.Manager
}
func (m *containerManager) Start() error {
// TODO: check if the required cgroups are mounted.
if len(m.cgroupsName) != 0 {
manager, err := createCgroupManager(m.cgroupsName)
if err != nil {
return err
}
m.cgroupsManager = manager
}
go wait.Until(m.doWork, 5*time.Minute, wait.NeverStop)
return nil
}
func (m *containerManager) doWork() {
v, err := m.client.Version()
if err != nil {
glog.Errorf("Unable to get docker version: %v", err)
return
}
version, err := semver.Parse(v.Version)
if err != nil {
glog.Errorf("Unable to parse docker version %q: %v", v.Version, err)
return
}
// EnsureDockerInConatiner does two things.
// 1. Ensure processes run in the cgroups if m.cgroupsManager is not nil.
// 2. Ensure processes have the OOM score applied.
if err := kubecm.EnsureDockerInContainer(version, dockerOOMScoreAdj, m.cgroupsManager); err != nil {
glog.Errorf("Unable to ensure the docker processes run in the desired containers")
}
}
func createCgroupManager(name string) (*fs.Manager, error) {
var memoryLimit uint64
memoryCapacity, err := getMemoryCapacity()
if err != nil || memoryCapacity*dockerMemoryLimitThresholdPercent/100 < minDockerMemoryLimit {
memoryLimit = minDockerMemoryLimit
}
glog.V(2).Infof("Configure resource-only container %q with memory limit: %d", name, memoryLimit)
allowAllDevices := true
cm := &fs.Manager{
Cgroups: &configs.Cgroup{
Parent: "/",
Name: name,
Resources: &configs.Resources{
Memory: int64(memoryLimit),
MemorySwap: -1,
AllowAllDevices: &allowAllDevices,
},
},
}
return cm, nil
}
// getMemoryCapacity returns the memory capacity on the machine in bytes.
func getMemoryCapacity() (uint64, error) {
out, err := ioutil.ReadFile("/proc/meminfo")
if err != nil {
return 0, err
}
return parseCapacity(out, memoryCapacityRegexp)
}
// parseCapacity matches a Regexp in a []byte, returning the resulting value in bytes.
// Assumes that the value matched by the Regexp is in KB.
func parseCapacity(b []byte, r *regexp.Regexp) (uint64, error) {
matches := r.FindSubmatch(b)
if len(matches) != 2 {
return 0, fmt.Errorf("failed to match regexp in output: %q", string(b))
}
m, err := strconv.ParseUint(string(matches[1]), 10, 64)
if err != nil {
return 0, err
}
// Convert to bytes.
return m * 1024, err
}

View File

@ -0,0 +1,34 @@
// +build !linux
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cm
import (
"k8s.io/kubernetes/pkg/kubelet/dockertools"
)
type unsupportedContainerManager struct {
}
func NewContainerManager(_ string, _ dockertools.DockerInterface) ContainerManager {
return &unsupportedContainerManager{}
}
func (m *unsupportedContainerManager) Start() error {
return fmt.Errorf("Container Manager is unsupported in this build")
}

View File

@ -26,6 +26,7 @@ import (
internalApi "k8s.io/kubernetes/pkg/kubelet/api"
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/kubelet/network/cni"
@ -92,10 +93,11 @@ type NetworkPluginSettings struct {
var internalLabelKeys []string = []string{containerTypeLabelKey, containerLogPathLabelKey, sandboxIDLabelKey}
// NOTE: Anything passed to DockerService should be eventually handled in another way when we switch to running the shim as a different process.
func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings) (DockerService, error) {
func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings, cgroupsName string) (DockerService, error) {
c := dockertools.NewInstrumentedDockerInterface(client)
ds := &dockerService{
seccompProfileRoot: seccompProfileRoot,
client: dockertools.NewInstrumentedDockerInterface(client),
client: c,
os: kubecontainer.RealOS{},
podSandboxImage: podSandboxImage,
streamingRuntime: &streamingRuntime{
@ -104,6 +106,7 @@ func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot str
// TODO(#35747) - Either deprecate nsenter exec handling, or add support for it here.
execHandler: &dockertools.NativeExecHandler{},
},
containerManager: cm.NewContainerManager(cgroupsName, client),
}
if streamingConfig != nil {
var err error
@ -135,6 +138,7 @@ type DockerService interface {
internalApi.RuntimeService
internalApi.ImageManagerService
DockerLegacyService
Start() error
}
// DockerLegacyService is an interface that embeds all legacy methods for
@ -142,6 +146,7 @@ type DockerService interface {
type DockerLegacyService interface {
// Supporting legacy methods for docker.
GetContainerLogs(pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) (err error)
LegacyExec(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error
LegacyAttach(id kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error
LegacyPortForward(sandboxID string, port uint16, stream io.ReadWriteCloser) error
@ -155,6 +160,7 @@ type dockerService struct {
streamingRuntime *streamingRuntime
streamingServer streaming.Server
networkPlugin network.NetworkPlugin
containerManager cm.ContainerManager
}
// Version returns the runtime name, runtime version and runtime API version
@ -214,3 +220,8 @@ type dockerNetworkHost struct {
network.LegacyHost
*namespaceGetter
}
// Start initializes and starts components in dockerService.
func (ds *dockerService) Start() error {
return ds.containerManager.Start()
}

View File

@ -509,7 +509,10 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
case "cri":
// Use the new CRI shim for docker. This is needed for testing the
// docker integration through CRI, and may be removed in the future.
dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings)
dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings, kubeCfg.RuntimeCgroups)
if err != nil {
return nil, err
}
runtimeService := dockerService.(internalApi.RuntimeService)
imageService := dockerService.(internalApi.ImageManagerService)
@ -537,6 +540,10 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
return nil, err
}
}
// TODO: Find a better place to start the service.
if err := dockerService.Start(); err != nil {
return nil, err
}
// kubelet defers to the runtime shim to setup networking. Setting
// this to nil will prevent it from trying to invoke the plugin.