mirror of https://github.com/k3s-io/k3s
dockershim: move docker to the given cgruop
This change add a container manager inside the dockershim to move docker daemon and associated processes to a specified cgroup. The original kubelet container manager will continue checking the name of the cgroup, so that kubelet know how to report runtime stats.pull/6/head
parent
37122c2636
commit
87aaf4c0ac
|
@ -424,13 +424,14 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.KubeletDeps) (err error) {
|
|||
return fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
|
||||
}
|
||||
kubeDeps.ContainerManager, err = cm.NewContainerManager(kubeDeps.Mounter, kubeDeps.CAdvisorInterface, cm.NodeConfig{
|
||||
RuntimeCgroupsName: s.RuntimeCgroups,
|
||||
SystemCgroupsName: s.SystemCgroups,
|
||||
KubeletCgroupsName: s.KubeletCgroups,
|
||||
ContainerRuntime: s.ContainerRuntime,
|
||||
CgroupsPerQOS: s.CgroupsPerQOS,
|
||||
CgroupRoot: s.CgroupRoot,
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
RuntimeCgroupsName: s.RuntimeCgroups,
|
||||
SystemCgroupsName: s.SystemCgroups,
|
||||
KubeletCgroupsName: s.KubeletCgroups,
|
||||
ContainerRuntime: s.ContainerRuntime,
|
||||
CgroupsPerQOS: s.CgroupsPerQOS,
|
||||
CgroupRoot: s.CgroupRoot,
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
RuntimeIntegrationType: s.ExperimentalRuntimeIntegrationType,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -49,13 +49,14 @@ type ContainerManager interface {
|
|||
}
|
||||
|
||||
type NodeConfig struct {
|
||||
RuntimeCgroupsName string
|
||||
SystemCgroupsName string
|
||||
KubeletCgroupsName string
|
||||
ContainerRuntime string
|
||||
CgroupsPerQOS bool
|
||||
CgroupRoot string
|
||||
ProtectKernelDefaults bool
|
||||
RuntimeCgroupsName string
|
||||
SystemCgroupsName string
|
||||
KubeletCgroupsName string
|
||||
ContainerRuntime string
|
||||
CgroupsPerQOS bool
|
||||
CgroupRoot string
|
||||
ProtectKernelDefaults bool
|
||||
RuntimeIntegrationType string
|
||||
}
|
||||
|
||||
type Status struct {
|
||||
|
|
|
@ -327,7 +327,27 @@ func (cm *containerManagerImpl) setupNode() error {
|
|||
systemContainers := []*systemContainer{}
|
||||
if cm.ContainerRuntime == "docker" {
|
||||
dockerVersion := getDockerVersion(cm.cadvisorInterface)
|
||||
if cm.RuntimeCgroupsName != "" {
|
||||
if cm.RuntimeIntegrationType == "cri" {
|
||||
// If kubelet uses CRI, dockershim will manage the cgroups and oom
|
||||
// score for the docker processes.
|
||||
// In the future, NodeSpec should mandate the cgroup that the
|
||||
// runtime processes need to be in. For now, we still check the
|
||||
// cgroup for docker periodically, so that kubelet can recognize
|
||||
// the cgroup for docker and serve stats for the runtime.
|
||||
// TODO(#27097): Fix this after NodeSpec is clearly defined.
|
||||
cm.periodicTasks = append(cm.periodicTasks, func() {
|
||||
glog.V(4).Infof("[ContainerManager]: Adding periodic tasks for docker CRI integration")
|
||||
cont, err := getContainerNameForProcess(dockerProcessName, dockerPidFile)
|
||||
if err != nil {
|
||||
glog.Error(err)
|
||||
return
|
||||
}
|
||||
glog.V(2).Infof("[ContainerManager]: Discovered runtime cgroups name: %s", cont)
|
||||
cm.Lock()
|
||||
defer cm.Unlock()
|
||||
cm.RuntimeCgroupsName = cont
|
||||
})
|
||||
} else if cm.RuntimeCgroupsName != "" {
|
||||
cont := newSystemCgroups(cm.RuntimeCgroupsName)
|
||||
var capacity = api.ResourceList{}
|
||||
if info, err := cm.cadvisorInterface.MachineInfo(); err == nil {
|
||||
|
@ -353,13 +373,13 @@ func (cm *containerManagerImpl) setupNode() error {
|
|||
},
|
||||
}
|
||||
cont.ensureStateFunc = func(manager *fs.Manager) error {
|
||||
return ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
|
||||
return EnsureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, dockerContainer)
|
||||
}
|
||||
systemContainers = append(systemContainers, cont)
|
||||
} else {
|
||||
cm.periodicTasks = append(cm.periodicTasks, func() {
|
||||
glog.V(10).Infof("Adding docker daemon periodic tasks")
|
||||
if err := ensureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
|
||||
if err := EnsureDockerInContainer(dockerVersion, qos.DockerOOMScoreAdj, nil); err != nil {
|
||||
glog.Error(err)
|
||||
return
|
||||
}
|
||||
|
@ -572,7 +592,10 @@ func getPidsForProcess(name, pidFile string) ([]int, error) {
|
|||
}
|
||||
|
||||
// Ensures that the Docker daemon is in the desired container.
|
||||
func ensureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, manager *fs.Manager) error {
|
||||
// Temporarily export the function to be used by dockershim.
|
||||
// TODO(yujuhong): Move this function to dockershim once kubelet migrates to
|
||||
// dockershim as the default.
|
||||
func EnsureDockerInContainer(dockerVersion semver.Version, oomScoreAdj int, manager *fs.Manager) error {
|
||||
type process struct{ name, file string }
|
||||
dockerProcs := []process{{dockerProcessName, dockerPidFile}}
|
||||
if dockerVersion.GTE(containerdVersion) {
|
||||
|
|
|
@ -31,6 +31,7 @@ go_library(
|
|||
"//pkg/kubelet/api:go_default_library",
|
||||
"//pkg/kubelet/api/v1alpha1/runtime:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/dockershim/cm:go_default_library",
|
||||
"//pkg/kubelet/dockertools:go_default_library",
|
||||
"//pkg/kubelet/leaky:go_default_library",
|
||||
"//pkg/kubelet/network:go_default_library",
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
licenses(["notice"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_binary",
|
||||
"go_library",
|
||||
"go_test",
|
||||
"cgo_library",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"container_manager.go",
|
||||
"container_manager_linux.go",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
deps = [
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/dockertools:go_default_library",
|
||||
"//pkg/kubelet/qos:go_default_library",
|
||||
"//pkg/util/wait:go_default_library",
|
||||
"//vendor:github.com/blang/semver",
|
||||
"//vendor:github.com/golang/glog",
|
||||
"//vendor:github.com/opencontainers/runc/libcontainer/cgroups/fs",
|
||||
"//vendor:github.com/opencontainers/runc/libcontainer/configs",
|
||||
],
|
||||
)
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
type ContainerManager interface {
|
||||
Start() error
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
// +build linux
|
||||
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blang/semver"
|
||||
"github.com/golang/glog"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
)
|
||||
|
||||
const (
|
||||
// The percent of the machine memory capacity.
|
||||
dockerMemoryLimitThresholdPercent = kubecm.DockerMemoryLimitThresholdPercent
|
||||
|
||||
// The minimum memory limit allocated to docker container.
|
||||
minDockerMemoryLimit = kubecm.MinDockerMemoryLimit
|
||||
|
||||
// The Docker OOM score adjustment.
|
||||
dockerOOMScoreAdj = qos.DockerOOMScoreAdj
|
||||
)
|
||||
|
||||
var (
|
||||
memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`)
|
||||
)
|
||||
|
||||
func NewContainerManager(cgroupsName string, client dockertools.DockerInterface) ContainerManager {
|
||||
return &containerManager{
|
||||
cgroupsName: cgroupsName,
|
||||
client: client,
|
||||
}
|
||||
}
|
||||
|
||||
type containerManager struct {
|
||||
// Docker client.
|
||||
client dockertools.DockerInterface
|
||||
// Name of the cgroups.
|
||||
cgroupsName string
|
||||
// Manager for the cgroups.
|
||||
cgroupsManager *fs.Manager
|
||||
}
|
||||
|
||||
func (m *containerManager) Start() error {
|
||||
// TODO: check if the required cgroups are mounted.
|
||||
if len(m.cgroupsName) != 0 {
|
||||
manager, err := createCgroupManager(m.cgroupsName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.cgroupsManager = manager
|
||||
}
|
||||
go wait.Until(m.doWork, 5*time.Minute, wait.NeverStop)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *containerManager) doWork() {
|
||||
v, err := m.client.Version()
|
||||
if err != nil {
|
||||
glog.Errorf("Unable to get docker version: %v", err)
|
||||
return
|
||||
}
|
||||
version, err := semver.Parse(v.Version)
|
||||
if err != nil {
|
||||
glog.Errorf("Unable to parse docker version %q: %v", v.Version, err)
|
||||
return
|
||||
}
|
||||
// EnsureDockerInConatiner does two things.
|
||||
// 1. Ensure processes run in the cgroups if m.cgroupsManager is not nil.
|
||||
// 2. Ensure processes have the OOM score applied.
|
||||
if err := kubecm.EnsureDockerInContainer(version, dockerOOMScoreAdj, m.cgroupsManager); err != nil {
|
||||
glog.Errorf("Unable to ensure the docker processes run in the desired containers")
|
||||
}
|
||||
}
|
||||
|
||||
func createCgroupManager(name string) (*fs.Manager, error) {
|
||||
var memoryLimit uint64
|
||||
memoryCapacity, err := getMemoryCapacity()
|
||||
if err != nil || memoryCapacity*dockerMemoryLimitThresholdPercent/100 < minDockerMemoryLimit {
|
||||
memoryLimit = minDockerMemoryLimit
|
||||
}
|
||||
glog.V(2).Infof("Configure resource-only container %q with memory limit: %d", name, memoryLimit)
|
||||
|
||||
allowAllDevices := true
|
||||
cm := &fs.Manager{
|
||||
Cgroups: &configs.Cgroup{
|
||||
Parent: "/",
|
||||
Name: name,
|
||||
Resources: &configs.Resources{
|
||||
Memory: int64(memoryLimit),
|
||||
MemorySwap: -1,
|
||||
AllowAllDevices: &allowAllDevices,
|
||||
},
|
||||
},
|
||||
}
|
||||
return cm, nil
|
||||
}
|
||||
|
||||
// getMemoryCapacity returns the memory capacity on the machine in bytes.
|
||||
func getMemoryCapacity() (uint64, error) {
|
||||
out, err := ioutil.ReadFile("/proc/meminfo")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return parseCapacity(out, memoryCapacityRegexp)
|
||||
}
|
||||
|
||||
// parseCapacity matches a Regexp in a []byte, returning the resulting value in bytes.
|
||||
// Assumes that the value matched by the Regexp is in KB.
|
||||
func parseCapacity(b []byte, r *regexp.Regexp) (uint64, error) {
|
||||
matches := r.FindSubmatch(b)
|
||||
if len(matches) != 2 {
|
||||
return 0, fmt.Errorf("failed to match regexp in output: %q", string(b))
|
||||
}
|
||||
m, err := strconv.ParseUint(string(matches[1]), 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Convert to bytes.
|
||||
return m * 1024, err
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// +build !linux
|
||||
|
||||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cm
|
||||
|
||||
import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
)
|
||||
|
||||
type unsupportedContainerManager struct {
|
||||
}
|
||||
|
||||
func NewContainerManager(_ string, _ dockertools.DockerInterface) ContainerManager {
|
||||
return &unsupportedContainerManager{}
|
||||
}
|
||||
|
||||
func (m *unsupportedContainerManager) Start() error {
|
||||
return fmt.Errorf("Container Manager is unsupported in this build")
|
||||
}
|
|
@ -26,6 +26,7 @@ import (
|
|||
internalApi "k8s.io/kubernetes/pkg/kubelet/api"
|
||||
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockertools"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network/cni"
|
||||
|
@ -92,10 +93,11 @@ type NetworkPluginSettings struct {
|
|||
var internalLabelKeys []string = []string{containerTypeLabelKey, containerLogPathLabelKey, sandboxIDLabelKey}
|
||||
|
||||
// NOTE: Anything passed to DockerService should be eventually handled in another way when we switch to running the shim as a different process.
|
||||
func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings) (DockerService, error) {
|
||||
func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot string, podSandboxImage string, streamingConfig *streaming.Config, pluginSettings *NetworkPluginSettings, cgroupsName string) (DockerService, error) {
|
||||
c := dockertools.NewInstrumentedDockerInterface(client)
|
||||
ds := &dockerService{
|
||||
seccompProfileRoot: seccompProfileRoot,
|
||||
client: dockertools.NewInstrumentedDockerInterface(client),
|
||||
client: c,
|
||||
os: kubecontainer.RealOS{},
|
||||
podSandboxImage: podSandboxImage,
|
||||
streamingRuntime: &streamingRuntime{
|
||||
|
@ -104,6 +106,7 @@ func NewDockerService(client dockertools.DockerInterface, seccompProfileRoot str
|
|||
// TODO(#35747) - Either deprecate nsenter exec handling, or add support for it here.
|
||||
execHandler: &dockertools.NativeExecHandler{},
|
||||
},
|
||||
containerManager: cm.NewContainerManager(cgroupsName, client),
|
||||
}
|
||||
if streamingConfig != nil {
|
||||
var err error
|
||||
|
@ -135,6 +138,7 @@ type DockerService interface {
|
|||
internalApi.RuntimeService
|
||||
internalApi.ImageManagerService
|
||||
DockerLegacyService
|
||||
Start() error
|
||||
}
|
||||
|
||||
// DockerLegacyService is an interface that embeds all legacy methods for
|
||||
|
@ -142,6 +146,7 @@ type DockerService interface {
|
|||
type DockerLegacyService interface {
|
||||
// Supporting legacy methods for docker.
|
||||
GetContainerLogs(pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) (err error)
|
||||
|
||||
LegacyExec(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error
|
||||
LegacyAttach(id kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error
|
||||
LegacyPortForward(sandboxID string, port uint16, stream io.ReadWriteCloser) error
|
||||
|
@ -155,6 +160,7 @@ type dockerService struct {
|
|||
streamingRuntime *streamingRuntime
|
||||
streamingServer streaming.Server
|
||||
networkPlugin network.NetworkPlugin
|
||||
containerManager cm.ContainerManager
|
||||
}
|
||||
|
||||
// Version returns the runtime name, runtime version and runtime API version
|
||||
|
@ -214,3 +220,8 @@ type dockerNetworkHost struct {
|
|||
network.LegacyHost
|
||||
*namespaceGetter
|
||||
}
|
||||
|
||||
// Start initializes and starts components in dockerService.
|
||||
func (ds *dockerService) Start() error {
|
||||
return ds.containerManager.Start()
|
||||
}
|
||||
|
|
|
@ -509,7 +509,10 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||
case "cri":
|
||||
// Use the new CRI shim for docker. This is needed for testing the
|
||||
// docker integration through CRI, and may be removed in the future.
|
||||
dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings)
|
||||
dockerService, err := dockershim.NewDockerService(klet.dockerClient, kubeCfg.SeccompProfileRoot, kubeCfg.PodInfraContainerImage, nil, &pluginSettings, kubeCfg.RuntimeCgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
runtimeService := dockerService.(internalApi.RuntimeService)
|
||||
imageService := dockerService.(internalApi.ImageManagerService)
|
||||
|
||||
|
@ -537,6 +540,10 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
|
|||
return nil, err
|
||||
}
|
||||
}
|
||||
// TODO: Find a better place to start the service.
|
||||
if err := dockerService.Start(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// kubelet defers to the runtime shim to setup networking. Setting
|
||||
// this to nil will prevent it from trying to invoke the plugin.
|
||||
|
|
Loading…
Reference in New Issue