2015-04-02 20:14:52 +00:00
|
|
|
/*
|
2015-05-01 16:19:44 +00:00
|
|
|
Copyright 2015 The Kubernetes Authors All rights reserved.
|
2015-04-02 20:14:52 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package dockertools
|
|
|
|
|
|
|
|
import (
|
2015-04-22 17:20:05 +00:00
|
|
|
"bytes"
|
2015-04-02 20:14:52 +00:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
2015-04-22 17:20:05 +00:00
|
|
|
"os/exec"
|
2015-04-02 20:14:52 +00:00
|
|
|
"path"
|
2015-06-21 03:57:22 +00:00
|
|
|
"sort"
|
2015-04-02 20:14:52 +00:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
2015-04-09 18:57:53 +00:00
|
|
|
"sync"
|
2015-05-08 16:48:31 +00:00
|
|
|
"time"
|
2015-04-02 20:14:52 +00:00
|
|
|
|
2015-08-05 22:05:17 +00:00
|
|
|
docker "github.com/fsouza/go-dockerclient"
|
|
|
|
"github.com/golang/glog"
|
|
|
|
"github.com/golang/groupcache/lru"
|
2015-08-04 00:28:33 +00:00
|
|
|
cadvisorApi "github.com/google/cadvisor/info/v1"
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/api"
|
|
|
|
"k8s.io/kubernetes/pkg/api/latest"
|
2015-09-17 22:21:55 +00:00
|
|
|
"k8s.io/kubernetes/pkg/api/unversioned"
|
2015-09-03 21:40:58 +00:00
|
|
|
"k8s.io/kubernetes/pkg/client/record"
|
2015-08-05 22:03:47 +00:00
|
|
|
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
|
|
|
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
|
|
|
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
|
|
|
"k8s.io/kubernetes/pkg/kubelet/network"
|
2015-09-06 11:53:20 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/network/hairpin"
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/prober"
|
2015-08-04 00:28:33 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/qos"
|
2015-10-09 17:24:31 +00:00
|
|
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/probe"
|
|
|
|
"k8s.io/kubernetes/pkg/securitycontext"
|
|
|
|
"k8s.io/kubernetes/pkg/types"
|
|
|
|
"k8s.io/kubernetes/pkg/util"
|
2015-08-04 00:28:33 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util/oom"
|
|
|
|
"k8s.io/kubernetes/pkg/util/procfs"
|
2015-09-09 17:45:01 +00:00
|
|
|
"k8s.io/kubernetes/pkg/util/sets"
|
2015-04-09 18:57:53 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
maxReasonCacheEntries = 200
|
2015-05-06 03:50:45 +00:00
|
|
|
|
2015-06-23 23:36:06 +00:00
|
|
|
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
|
|
|
|
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
|
|
|
|
// hence, setting ndots to be 5.
|
|
|
|
ndotsDNSOption = "options ndots:5\n"
|
2015-08-20 01:57:58 +00:00
|
|
|
// In order to avoid unnecessary SIGKILLs, give every container a minimum grace
|
|
|
|
// period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
|
|
|
|
// potentially dangerous.
|
|
|
|
// TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
|
|
|
|
// SIGTERM for certain process types, which may justify setting this to 0.
|
|
|
|
minimumGracePeriodInSeconds = 2
|
|
|
|
|
|
|
|
kubernetesNameLabel = "io.kubernetes.pod.name"
|
|
|
|
kubernetesPodLabel = "io.kubernetes.pod.data"
|
|
|
|
kubernetesTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
|
|
|
|
kubernetesContainerLabel = "io.kubernetes.container.name"
|
2015-09-09 21:00:41 +00:00
|
|
|
|
|
|
|
DockerNetnsFmt = "/proc/%v/ns/net"
|
2015-04-02 20:14:52 +00:00
|
|
|
)
|
|
|
|
|
2015-05-01 21:39:52 +00:00
|
|
|
// DockerManager implements the Runtime interface.
|
|
|
|
var _ kubecontainer.Runtime = &DockerManager{}
|
2015-04-30 17:12:23 +00:00
|
|
|
|
2015-06-09 00:53:24 +00:00
|
|
|
// TODO: make this a TTL based pull (if image older than X policy, pull)
|
|
|
|
var podInfraContainerImagePullPolicy = api.PullIfNotPresent
|
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
type DockerManager struct {
|
2015-04-23 21:16:59 +00:00
|
|
|
client DockerInterface
|
|
|
|
recorder record.EventRecorder
|
|
|
|
containerRefManager *kubecontainer.RefManager
|
2015-04-27 20:03:55 +00:00
|
|
|
os kubecontainer.OSInterface
|
2015-08-04 00:28:33 +00:00
|
|
|
machineInfo *cadvisorApi.MachineInfo
|
2015-04-23 21:16:59 +00:00
|
|
|
|
2015-06-04 21:36:59 +00:00
|
|
|
// The image name of the pod infra container.
|
|
|
|
podInfraContainerImage string
|
2015-04-09 18:57:53 +00:00
|
|
|
// reasonCache stores the failure reason of the last container creation
|
|
|
|
// and/or start in a string, keyed by <pod_UID>_<container_name>. The goal
|
|
|
|
// is to propagate this reason to the container status. This endeavor is
|
|
|
|
// "best-effort" for two reasons:
|
|
|
|
// 1. The cache is not persisted.
|
|
|
|
// 2. We use an LRU cache to avoid extra garbage collection work. This
|
|
|
|
// means that some entries may be recycled before a pod has been
|
|
|
|
// deleted.
|
2015-09-16 14:07:25 +00:00
|
|
|
reasonCache reasonInfoCache
|
2015-08-10 17:28:39 +00:00
|
|
|
// TODO(yifan): Record the pull failure so we can eliminate the image checking
|
2015-06-04 21:36:59 +00:00
|
|
|
// in GetPodStatus()?
|
2015-08-10 17:28:39 +00:00
|
|
|
// Lower level docker image puller.
|
|
|
|
dockerPuller DockerPuller
|
|
|
|
|
|
|
|
// wrapped image puller.
|
|
|
|
imagePuller kubecontainer.ImagePuller
|
2015-04-27 20:03:55 +00:00
|
|
|
|
|
|
|
// Root of the Docker runtime.
|
|
|
|
dockerRoot string
|
|
|
|
|
|
|
|
// Directory of container logs.
|
|
|
|
containerLogsDir string
|
2015-04-28 18:02:29 +00:00
|
|
|
|
|
|
|
// Network plugin.
|
|
|
|
networkPlugin network.NetworkPlugin
|
2015-04-29 00:51:21 +00:00
|
|
|
|
|
|
|
// Health check prober.
|
2015-05-01 22:25:11 +00:00
|
|
|
prober prober.Prober
|
2015-05-01 01:37:15 +00:00
|
|
|
|
|
|
|
// Generator of runtime container options.
|
|
|
|
generator kubecontainer.RunContainerOptionsGenerator
|
|
|
|
|
|
|
|
// Runner of lifecycle events.
|
|
|
|
runner kubecontainer.HandlerRunner
|
|
|
|
|
2015-05-27 12:51:01 +00:00
|
|
|
// Handler used to execute commands in containers.
|
|
|
|
execHandler ExecHandler
|
2015-08-04 00:28:33 +00:00
|
|
|
|
|
|
|
// Used to set OOM scores of processes.
|
2015-09-28 08:00:43 +00:00
|
|
|
oomAdjuster *oom.OOMAdjuster
|
2015-08-04 00:28:33 +00:00
|
|
|
|
|
|
|
// Get information from /proc mount.
|
|
|
|
procFs procfs.ProcFsInterface
|
2015-09-01 13:27:01 +00:00
|
|
|
|
|
|
|
// If true, enforce container cpu limits with CFS quota support
|
|
|
|
cpuCFSQuota bool
|
2015-10-03 15:39:15 +00:00
|
|
|
|
|
|
|
// Container GC manager
|
|
|
|
containerGC *containerGC
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-04-23 21:16:59 +00:00
|
|
|
func NewDockerManager(
|
|
|
|
client DockerInterface,
|
|
|
|
recorder record.EventRecorder,
|
2015-08-25 17:39:41 +00:00
|
|
|
prober prober.Prober,
|
2015-04-23 21:16:59 +00:00
|
|
|
containerRefManager *kubecontainer.RefManager,
|
2015-08-04 00:28:33 +00:00
|
|
|
machineInfo *cadvisorApi.MachineInfo,
|
2015-04-23 21:16:59 +00:00
|
|
|
podInfraContainerImage string,
|
|
|
|
qps float32,
|
2015-04-27 20:03:55 +00:00
|
|
|
burst int,
|
|
|
|
containerLogsDir string,
|
2015-04-28 18:02:29 +00:00
|
|
|
osInterface kubecontainer.OSInterface,
|
2015-04-29 00:51:21 +00:00
|
|
|
networkPlugin network.NetworkPlugin,
|
2015-05-01 01:37:15 +00:00
|
|
|
generator kubecontainer.RunContainerOptionsGenerator,
|
2015-10-09 17:24:31 +00:00
|
|
|
httpClient kubetypes.HttpGetter,
|
2015-08-04 00:28:33 +00:00
|
|
|
execHandler ExecHandler,
|
2015-09-28 08:00:43 +00:00
|
|
|
oomAdjuster *oom.OOMAdjuster,
|
2015-09-01 13:27:01 +00:00
|
|
|
procFs procfs.ProcFsInterface,
|
2015-10-02 13:45:46 +00:00
|
|
|
cpuCFSQuota bool,
|
|
|
|
imageBackOff *util.Backoff) *DockerManager {
|
|
|
|
|
2015-04-27 20:03:55 +00:00
|
|
|
// Work out the location of the Docker runtime, defaulting to /var/lib/docker
|
|
|
|
// if there are any problems.
|
|
|
|
dockerRoot := "/var/lib/docker"
|
|
|
|
dockerInfo, err := client.Info()
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to execute Info() call to the Docker client: %v", err)
|
|
|
|
glog.Warningf("Using fallback default of /var/lib/docker for location of Docker runtime")
|
|
|
|
} else {
|
|
|
|
driverStatus := dockerInfo.Get("DriverStatus")
|
|
|
|
// The DriverStatus is a*string* which represents a list of list of strings (pairs) e.g.
|
|
|
|
// DriverStatus=[["Root Dir","/var/lib/docker/aufs"],["Backing Filesystem","extfs"],["Dirs","279"]]
|
|
|
|
// Strip out the square brakcets and quotes.
|
|
|
|
s := strings.Replace(driverStatus, "[", "", -1)
|
|
|
|
s = strings.Replace(s, "]", "", -1)
|
|
|
|
s = strings.Replace(s, `"`, "", -1)
|
|
|
|
// Separate by commas.
|
|
|
|
ss := strings.Split(s, ",")
|
|
|
|
// Search for the Root Dir string
|
|
|
|
for i, k := range ss {
|
|
|
|
if k == "Root Dir" && i+1 < len(ss) {
|
|
|
|
// Discard the /aufs suffix.
|
|
|
|
dockerRoot, _ = path.Split(ss[i+1])
|
|
|
|
// Trim the last slash.
|
|
|
|
dockerRoot = strings.TrimSuffix(dockerRoot, "/")
|
|
|
|
glog.Infof("Setting dockerRoot to %s", dockerRoot)
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-16 14:07:25 +00:00
|
|
|
reasonCache := reasonInfoCache{cache: lru.New(maxReasonCacheEntries)}
|
2015-05-27 12:51:01 +00:00
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
dm := &DockerManager{
|
2015-08-04 00:28:33 +00:00
|
|
|
client: client,
|
|
|
|
recorder: recorder,
|
|
|
|
containerRefManager: containerRefManager,
|
|
|
|
os: osInterface,
|
|
|
|
machineInfo: machineInfo,
|
2015-06-04 21:36:59 +00:00
|
|
|
podInfraContainerImage: podInfraContainerImage,
|
2015-04-13 19:24:01 +00:00
|
|
|
reasonCache: reasonCache,
|
2015-08-10 17:28:39 +00:00
|
|
|
dockerPuller: newDockerPuller(client, qps, burst),
|
2015-04-27 20:03:55 +00:00
|
|
|
dockerRoot: dockerRoot,
|
|
|
|
containerLogsDir: containerLogsDir,
|
2015-04-28 18:02:29 +00:00
|
|
|
networkPlugin: networkPlugin,
|
2015-08-25 17:39:41 +00:00
|
|
|
prober: prober,
|
2015-05-01 01:37:15 +00:00
|
|
|
generator: generator,
|
2015-05-27 12:51:01 +00:00
|
|
|
execHandler: execHandler,
|
2015-08-04 00:28:33 +00:00
|
|
|
oomAdjuster: oomAdjuster,
|
|
|
|
procFs: procFs,
|
2015-09-01 13:27:01 +00:00
|
|
|
cpuCFSQuota: cpuCFSQuota,
|
2015-04-13 19:24:01 +00:00
|
|
|
}
|
2015-05-01 01:37:15 +00:00
|
|
|
dm.runner = lifecycle.NewHandlerRunner(httpClient, dm, dm)
|
2015-10-02 13:45:46 +00:00
|
|
|
dm.imagePuller = kubecontainer.NewImagePuller(recorder, dm, imageBackOff)
|
2015-10-03 15:39:15 +00:00
|
|
|
dm.containerGC = NewContainerGC(client, containerLogsDir)
|
2015-05-01 22:25:11 +00:00
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
return dm
|
2015-04-09 18:57:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// A cache which stores strings keyed by <pod_UID>_<container_name>.
|
2015-09-16 14:07:25 +00:00
|
|
|
type reasonInfoCache struct {
|
2015-04-09 18:57:53 +00:00
|
|
|
lock sync.RWMutex
|
|
|
|
cache *lru.Cache
|
|
|
|
}
|
2015-09-16 14:07:25 +00:00
|
|
|
type reasonInfo struct {
|
|
|
|
reason string
|
|
|
|
message string
|
|
|
|
}
|
2015-04-09 18:57:53 +00:00
|
|
|
|
2015-09-16 14:07:25 +00:00
|
|
|
func (sc *reasonInfoCache) composeKey(uid types.UID, name string) string {
|
2015-04-09 18:57:53 +00:00
|
|
|
return fmt.Sprintf("%s_%s", uid, name)
|
|
|
|
}
|
|
|
|
|
2015-09-16 14:07:25 +00:00
|
|
|
func (sc *reasonInfoCache) Add(uid types.UID, name string, reason, message string) {
|
2015-04-20 03:26:07 +00:00
|
|
|
sc.lock.Lock()
|
|
|
|
defer sc.lock.Unlock()
|
2015-09-16 14:07:25 +00:00
|
|
|
sc.cache.Add(sc.composeKey(uid, name), reasonInfo{reason, message})
|
2015-04-09 18:57:53 +00:00
|
|
|
}
|
|
|
|
|
2015-09-16 14:07:25 +00:00
|
|
|
func (sc *reasonInfoCache) Remove(uid types.UID, name string) {
|
2015-04-20 03:26:07 +00:00
|
|
|
sc.lock.Lock()
|
|
|
|
defer sc.lock.Unlock()
|
|
|
|
sc.cache.Remove(sc.composeKey(uid, name))
|
2015-04-09 18:57:53 +00:00
|
|
|
}
|
|
|
|
|
2015-09-16 14:07:25 +00:00
|
|
|
func (sc *reasonInfoCache) Get(uid types.UID, name string) (reasonInfo, bool) {
|
2015-04-20 03:26:07 +00:00
|
|
|
sc.lock.RLock()
|
|
|
|
defer sc.lock.RUnlock()
|
|
|
|
value, ok := sc.cache.Get(sc.composeKey(uid, name))
|
2015-04-09 18:57:53 +00:00
|
|
|
if ok {
|
2015-09-16 14:07:25 +00:00
|
|
|
return value.(reasonInfo), ok
|
2015-04-09 18:57:53 +00:00
|
|
|
} else {
|
2015-09-16 14:07:25 +00:00
|
|
|
return reasonInfo{"", ""}, ok
|
2015-04-09 18:57:53 +00:00
|
|
|
}
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-04-29 03:25:25 +00:00
|
|
|
// GetContainerLogs returns logs of a specific container. By
|
|
|
|
// default, it returns a snapshot of the container log. Set 'follow' to true to
|
|
|
|
// stream the log. Set 'follow' to false and specify the number of lines (e.g.
|
2015-04-02 20:14:52 +00:00
|
|
|
// "100" or "all") to tail the log.
|
|
|
|
// TODO: Make 'RawTerminal' option flagable.
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) GetContainerLogs(pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) (err error) {
|
2015-09-10 03:46:11 +00:00
|
|
|
var since int64
|
|
|
|
if logOptions.SinceSeconds != nil {
|
|
|
|
t := unversioned.Now().Add(-time.Duration(*logOptions.SinceSeconds) * time.Second)
|
|
|
|
since = t.Unix()
|
|
|
|
}
|
|
|
|
if logOptions.SinceTime != nil {
|
|
|
|
since = logOptions.SinceTime.Unix()
|
|
|
|
}
|
2015-04-02 20:14:52 +00:00
|
|
|
opts := docker.LogsOptions{
|
2015-10-07 17:58:05 +00:00
|
|
|
Container: containerID.ID,
|
2015-04-02 20:14:52 +00:00
|
|
|
Stdout: true,
|
|
|
|
Stderr: true,
|
|
|
|
OutputStream: stdout,
|
|
|
|
ErrorStream: stderr,
|
2015-09-10 03:46:11 +00:00
|
|
|
Timestamps: logOptions.Timestamps,
|
|
|
|
Since: since,
|
|
|
|
Follow: logOptions.Follow,
|
2015-04-02 20:14:52 +00:00
|
|
|
RawTerminal: false,
|
|
|
|
}
|
|
|
|
|
2015-09-10 03:46:11 +00:00
|
|
|
if !logOptions.Follow && logOptions.TailLines != nil {
|
|
|
|
opts.Tail = strconv.FormatInt(*logOptions.TailLines, 10)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-04-20 03:26:07 +00:00
|
|
|
err = dm.client.Logs(opts)
|
2015-04-02 20:14:52 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var (
|
|
|
|
// ErrNoContainersInPod is returned when there are no containers for a given pod
|
2015-09-08 16:50:19 +00:00
|
|
|
ErrNoContainersInPod = errors.New("NoContainersInPod")
|
2015-04-02 20:14:52 +00:00
|
|
|
|
|
|
|
// ErrNoPodInfraContainerInPod is returned when there is no pod infra container for a given pod
|
2015-09-08 16:50:19 +00:00
|
|
|
ErrNoPodInfraContainerInPod = errors.New("NoPodInfraContainerInPod")
|
2015-04-02 20:14:52 +00:00
|
|
|
|
|
|
|
// ErrContainerCannotRun is returned when a container is created, but cannot run properly
|
2015-09-08 16:50:19 +00:00
|
|
|
ErrContainerCannotRun = errors.New("ContainerCannotRun")
|
2015-04-02 20:14:52 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Internal information kept for containers from inspection
|
|
|
|
type containerStatusResult struct {
|
|
|
|
status api.ContainerStatus
|
|
|
|
ip string
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
2015-06-09 00:51:57 +00:00
|
|
|
func (dm *DockerManager) inspectContainer(dockerID, containerName, tPath string, pod *api.Pod) *containerStatusResult {
|
2015-04-02 20:14:52 +00:00
|
|
|
result := containerStatusResult{api.ContainerStatus{}, "", nil}
|
|
|
|
|
2015-04-20 03:26:07 +00:00
|
|
|
inspectResult, err := dm.client.InspectContainer(dockerID)
|
2015-09-08 07:34:10 +00:00
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
if err != nil {
|
|
|
|
result.err = err
|
|
|
|
return &result
|
|
|
|
}
|
|
|
|
if inspectResult == nil {
|
|
|
|
// Why did we not get an error?
|
|
|
|
return &result
|
|
|
|
}
|
|
|
|
|
2015-09-21 17:24:55 +00:00
|
|
|
glog.V(4).Infof("Container inspect result: %+v", *inspectResult)
|
2015-04-02 20:14:52 +00:00
|
|
|
result.status = api.ContainerStatus{
|
|
|
|
Name: containerName,
|
|
|
|
Image: inspectResult.Config.Image,
|
|
|
|
ImageID: DockerPrefix + inspectResult.Image,
|
|
|
|
ContainerID: DockerPrefix + dockerID,
|
|
|
|
}
|
|
|
|
|
|
|
|
if inspectResult.State.Running {
|
|
|
|
result.status.State.Running = &api.ContainerStateRunning{
|
2015-09-17 22:21:55 +00:00
|
|
|
StartedAt: unversioned.NewTime(inspectResult.State.StartedAt),
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-06-09 00:51:57 +00:00
|
|
|
if containerName == PodInfraContainerName {
|
2015-09-08 07:34:10 +00:00
|
|
|
if inspectResult.NetworkSettings != nil {
|
|
|
|
result.ip = inspectResult.NetworkSettings.IPAddress
|
|
|
|
}
|
|
|
|
// override the above if a network plugin exists
|
|
|
|
if dm.networkPlugin.Name() != network.DefaultPluginName {
|
2015-10-09 17:24:31 +00:00
|
|
|
netStatus, err := dm.networkPlugin.Status(pod.Namespace, pod.Name, kubetypes.DockerID(dockerID))
|
2015-09-08 07:34:10 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("NetworkPlugin %s failed on the status hook for pod '%s' - %v", dm.networkPlugin.Name(), pod.Name, err)
|
|
|
|
} else if netStatus != nil {
|
|
|
|
result.ip = netStatus.IP.String()
|
|
|
|
}
|
|
|
|
}
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-10-06 04:51:07 +00:00
|
|
|
} else if !inspectResult.State.FinishedAt.IsZero() || inspectResult.State.ExitCode != 0 {
|
|
|
|
// When a container fails to start State.ExitCode is non-zero, FinishedAt and StartedAt are both zero
|
2015-04-02 20:14:52 +00:00
|
|
|
reason := ""
|
2015-10-06 04:51:07 +00:00
|
|
|
message := inspectResult.State.Error
|
|
|
|
finishedAt := unversioned.NewTime(inspectResult.State.FinishedAt)
|
|
|
|
startedAt := unversioned.NewTime(inspectResult.State.StartedAt)
|
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
// Note: An application might handle OOMKilled gracefully.
|
|
|
|
// In that case, the container is oom killed, but the exit
|
|
|
|
// code could be 0.
|
|
|
|
if inspectResult.State.OOMKilled {
|
2015-09-08 16:50:19 +00:00
|
|
|
reason = "OOMKilled"
|
2015-10-06 04:51:07 +00:00
|
|
|
} else if inspectResult.State.ExitCode == 0 {
|
|
|
|
reason = "Completed"
|
|
|
|
} else if !inspectResult.State.FinishedAt.IsZero() {
|
2015-09-08 16:50:19 +00:00
|
|
|
reason = "Error"
|
2015-10-06 04:51:07 +00:00
|
|
|
} else {
|
|
|
|
// finishedAt is zero and ExitCode is nonZero occurs when docker fails to start the container
|
|
|
|
reason = ErrContainerCannotRun.Error()
|
|
|
|
// Adjust time to the time docker attempted to run the container, otherwise startedAt and finishedAt will be set to epoch, which is misleading
|
|
|
|
finishedAt = unversioned.NewTime(inspectResult.Created)
|
|
|
|
startedAt = unversioned.NewTime(inspectResult.Created)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-05-27 22:02:11 +00:00
|
|
|
result.status.State.Terminated = &api.ContainerStateTerminated{
|
2015-10-06 04:51:07 +00:00
|
|
|
ExitCode: inspectResult.State.ExitCode,
|
|
|
|
Message: message,
|
|
|
|
Reason: reason,
|
|
|
|
StartedAt: startedAt,
|
|
|
|
FinishedAt: finishedAt,
|
2015-04-08 18:53:31 +00:00
|
|
|
ContainerID: DockerPrefix + dockerID,
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
if tPath != "" {
|
|
|
|
path, found := inspectResult.Volumes[tPath]
|
|
|
|
if found {
|
|
|
|
data, err := ioutil.ReadFile(path)
|
|
|
|
if err != nil {
|
2015-07-01 21:20:42 +00:00
|
|
|
result.status.State.Terminated.Message = fmt.Sprintf("Error on reading termination-log %s: %v", path, err)
|
2015-04-02 20:14:52 +00:00
|
|
|
} else {
|
2015-05-27 22:02:11 +00:00
|
|
|
result.status.State.Terminated.Message = string(data)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &result
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetPodStatus returns docker related status for all containers in the pod as
|
|
|
|
// well as the infrastructure container.
|
2015-04-20 03:26:07 +00:00
|
|
|
func (dm *DockerManager) GetPodStatus(pod *api.Pod) (*api.PodStatus, error) {
|
2015-04-02 20:14:52 +00:00
|
|
|
podFullName := kubecontainer.GetPodFullName(pod)
|
|
|
|
uid := pod.UID
|
|
|
|
manifest := pod.Spec
|
|
|
|
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
oldStatuses := make(map[string]api.ContainerStatus, len(pod.Spec.Containers))
|
2015-09-17 22:21:55 +00:00
|
|
|
lastObservedTime := make(map[string]unversioned.Time, len(pod.Spec.Containers))
|
2015-08-28 18:17:35 +00:00
|
|
|
// Record the last time we observed a container termination.
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
for _, status := range pod.Status.ContainerStatuses {
|
|
|
|
oldStatuses[status.Name] = status
|
2015-05-27 22:02:11 +00:00
|
|
|
if status.LastTerminationState.Terminated != nil {
|
2015-08-28 18:17:35 +00:00
|
|
|
timestamp, ok := lastObservedTime[status.Name]
|
|
|
|
if !ok || timestamp.Before(status.LastTerminationState.Terminated.FinishedAt) {
|
|
|
|
lastObservedTime[status.Name] = status.LastTerminationState.Terminated.FinishedAt
|
|
|
|
}
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
var podStatus api.PodStatus
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
statuses := make(map[string]*api.ContainerStatus, len(pod.Spec.Containers))
|
2015-04-02 20:14:52 +00:00
|
|
|
|
|
|
|
expectedContainers := make(map[string]api.Container)
|
|
|
|
for _, container := range manifest.Containers {
|
|
|
|
expectedContainers[container.Name] = container
|
|
|
|
}
|
|
|
|
expectedContainers[PodInfraContainerName] = api.Container{}
|
|
|
|
|
2015-04-20 03:26:07 +00:00
|
|
|
containers, err := dm.client.ListContainers(docker.ListContainersOptions{All: true})
|
2015-04-02 20:14:52 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-09-09 17:45:01 +00:00
|
|
|
containerDone := sets.NewString()
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
// Loop through list of running and exited docker containers to construct
|
|
|
|
// the statuses. We assume docker returns a list of containers sorted in
|
|
|
|
// reverse by time.
|
2015-04-02 20:14:52 +00:00
|
|
|
for _, value := range containers {
|
|
|
|
if len(value.Names) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
dockerName, _, err := ParseDockerName(value.Names[0])
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if dockerName.PodFullName != podFullName {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if uid != "" && dockerName.PodUID != uid {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
dockerContainerName := dockerName.ContainerName
|
|
|
|
c, found := expectedContainers[dockerContainerName]
|
|
|
|
if !found {
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
continue
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
terminationMessagePath := c.TerminationMessagePath
|
|
|
|
if containerDone.Has(dockerContainerName) {
|
2015-04-02 20:14:52 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
var terminationState *api.ContainerState = nil
|
|
|
|
// Inspect the container.
|
2015-06-09 00:51:57 +00:00
|
|
|
result := dm.inspectContainer(value.ID, dockerContainerName, terminationMessagePath, pod)
|
2015-04-02 20:14:52 +00:00
|
|
|
if result.err != nil {
|
2015-04-09 00:52:31 +00:00
|
|
|
return nil, result.err
|
2015-05-27 22:02:11 +00:00
|
|
|
} else if result.status.State.Terminated != nil {
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
terminationState = &result.status.State
|
|
|
|
}
|
|
|
|
|
|
|
|
if containerStatus, found := statuses[dockerContainerName]; found {
|
2015-05-27 22:02:11 +00:00
|
|
|
if containerStatus.LastTerminationState.Terminated == nil && terminationState != nil {
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
// Populate the last termination state.
|
|
|
|
containerStatus.LastTerminationState = *terminationState
|
|
|
|
}
|
2015-08-28 18:17:35 +00:00
|
|
|
if terminationState == nil {
|
|
|
|
// Not a dead container.
|
|
|
|
continue
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
}
|
2015-08-28 18:17:35 +00:00
|
|
|
// Only count dead containers terminated after last time we observed,
|
|
|
|
lastObservedTime, ok := lastObservedTime[dockerContainerName]
|
|
|
|
if !ok || terminationState.Terminated.FinishedAt.After(lastObservedTime.Time) {
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
containerStatus.RestartCount += 1
|
2015-08-28 18:17:35 +00:00
|
|
|
} else {
|
|
|
|
// The container finished before the last observation. No
|
|
|
|
// need to examine/count the older containers. Mark the
|
|
|
|
// container name as done.
|
|
|
|
containerDone.Insert(dockerContainerName)
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
}
|
|
|
|
continue
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-04-09 18:57:53 +00:00
|
|
|
if dockerContainerName == PodInfraContainerName {
|
2015-04-02 20:14:52 +00:00
|
|
|
// Found network container
|
2015-04-09 18:57:53 +00:00
|
|
|
if result.status.State.Running != nil {
|
|
|
|
podStatus.PodIP = result.ip
|
|
|
|
}
|
2015-04-02 20:14:52 +00:00
|
|
|
} else {
|
2015-04-09 18:57:53 +00:00
|
|
|
// Add user container information.
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
if oldStatus, found := oldStatuses[dockerContainerName]; found {
|
|
|
|
// Use the last observed restart count if it's available.
|
|
|
|
result.status.RestartCount = oldStatus.RestartCount
|
|
|
|
}
|
|
|
|
statuses[dockerContainerName] = &result.status
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-13 12:59:15 +00:00
|
|
|
// Handle the containers for which we cannot find any associated active or dead docker containers or are in restart backoff
|
2015-04-09 18:57:53 +00:00
|
|
|
for _, container := range manifest.Containers {
|
2015-08-13 12:59:15 +00:00
|
|
|
if containerStatus, found := statuses[container.Name]; found {
|
2015-09-16 14:07:25 +00:00
|
|
|
reasonInfo, ok := dm.reasonCache.Get(uid, container.Name)
|
|
|
|
if ok && reasonInfo.reason == kubecontainer.ErrCrashLoopBackOff.Error() {
|
2015-09-02 23:36:57 +00:00
|
|
|
// We need to increment the restart count if we are going to
|
|
|
|
// move the current state to last terminated state.
|
|
|
|
if containerStatus.State.Terminated != nil {
|
|
|
|
lastObservedTime, ok := lastObservedTime[container.Name]
|
|
|
|
if !ok || containerStatus.State.Terminated.FinishedAt.After(lastObservedTime.Time) {
|
|
|
|
containerStatus.RestartCount += 1
|
|
|
|
}
|
|
|
|
}
|
2015-08-13 12:59:15 +00:00
|
|
|
containerStatus.LastTerminationState = containerStatus.State
|
2015-10-02 13:45:46 +00:00
|
|
|
containerStatus.State = api.ContainerState{
|
|
|
|
Waiting: &api.ContainerStateWaiting{
|
|
|
|
Reason: reasonInfo.reason,
|
|
|
|
Message: reasonInfo.message,
|
|
|
|
},
|
|
|
|
}
|
2015-08-13 12:59:15 +00:00
|
|
|
}
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-04-02 20:14:52 +00:00
|
|
|
var containerStatus api.ContainerStatus
|
2015-04-20 23:38:21 +00:00
|
|
|
containerStatus.Name = container.Name
|
|
|
|
containerStatus.Image = container.Image
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
if oldStatus, found := oldStatuses[container.Name]; found {
|
|
|
|
// Some states may be lost due to GC; apply the last observed
|
|
|
|
// values if possible.
|
|
|
|
containerStatus.RestartCount = oldStatus.RestartCount
|
|
|
|
containerStatus.LastTerminationState = oldStatus.LastTerminationState
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
// TODO(dchen1107): docker/docker/issues/8365 to figure out if the image exists
|
2015-10-02 13:45:46 +00:00
|
|
|
reasonInfo, ok := dm.reasonCache.Get(uid, container.Name)
|
|
|
|
if !ok {
|
|
|
|
// default position for a container
|
|
|
|
// At this point there are no active or dead containers, the reasonCache is empty (no entry or the entry has expired)
|
|
|
|
// its reasonable to say the container is being created till a more accurate reason is logged
|
|
|
|
containerStatus.State = api.ContainerState{
|
|
|
|
Waiting: &api.ContainerStateWaiting{
|
|
|
|
Reason: fmt.Sprintf("ContainerCreating"),
|
|
|
|
Message: fmt.Sprintf("Image: %s is ready, container is creating", container.Image),
|
|
|
|
},
|
2015-04-09 18:57:53 +00:00
|
|
|
}
|
2015-10-02 13:45:46 +00:00
|
|
|
} else if reasonInfo.reason == kubecontainer.ErrImagePullBackOff.Error() ||
|
|
|
|
reasonInfo.reason == kubecontainer.ErrImageInspect.Error() ||
|
|
|
|
reasonInfo.reason == kubecontainer.ErrImagePull.Error() ||
|
|
|
|
reasonInfo.reason == kubecontainer.ErrImageNeverPull.Error() {
|
|
|
|
// mark it as waiting, reason will be filled bellow
|
|
|
|
containerStatus.State = api.ContainerState{Waiting: &api.ContainerStateWaiting{}}
|
|
|
|
} else if reasonInfo.reason == kubecontainer.ErrRunContainer.Error() {
|
|
|
|
// mark it as waiting, reason will be filled bellow
|
|
|
|
containerStatus.State = api.ContainerState{Waiting: &api.ContainerStateWaiting{}}
|
2015-04-09 18:57:53 +00:00
|
|
|
}
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
statuses[container.Name] = &containerStatus
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
podStatus.ContainerStatuses = make([]api.ContainerStatus, 0)
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
for containerName, status := range statuses {
|
|
|
|
if status.State.Waiting != nil {
|
2015-10-02 13:45:46 +00:00
|
|
|
status.State.Running = nil
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
// For containers in the waiting state, fill in a specific reason if it is recorded.
|
2015-09-16 14:07:25 +00:00
|
|
|
if reasonInfo, ok := dm.reasonCache.Get(uid, containerName); ok {
|
|
|
|
status.State.Waiting.Reason = reasonInfo.reason
|
|
|
|
status.State.Waiting.Message = reasonInfo.message
|
Kubelet: persist restart count of a container
Currently, restart count are generated by examine dead docker containers, which
are subject to background garbage collection. Therefore, the restart count is
capped at 5 and can decrement if GC happens.
This change leverages the container statuses recorded in the pod status as a
reference point. If a container finished after the last observation, restart
count is incremented on top of the last observed count. If container is created
after last observation, but GC'd before the current observation time, kubelet
would not be aware of the existence of such a container, and would not increase
the restart count accordingly. However, the chance of this should be low, given
that pod statuses are reported frequently. Also, the restart cound would still
be increasing monotonically (with the exception of container insepct error).
2015-04-11 05:33:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
podStatus.ContainerStatuses = append(podStatus.ContainerStatuses, *status)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-06-22 19:31:46 +00:00
|
|
|
// Sort the container statuses since clients of this interface expect the list
|
|
|
|
// of containers in a pod to behave like the output of `docker list`, which has a
|
|
|
|
// deterministic order.
|
2015-10-09 17:24:31 +00:00
|
|
|
sort.Sort(kubetypes.SortedContainerStatuses(podStatus.ContainerStatuses))
|
2015-04-02 20:14:52 +00:00
|
|
|
return &podStatus, nil
|
|
|
|
}
|
|
|
|
|
2015-05-12 21:49:35 +00:00
|
|
|
// makeEnvList converts EnvVar list to a list of strings, in the form of
|
|
|
|
// '<key>=<value>', which can be understood by docker.
|
|
|
|
func makeEnvList(envs []kubecontainer.EnvVar) (result []string) {
|
|
|
|
for _, env := range envs {
|
|
|
|
result = append(result, fmt.Sprintf("%s=%s", env.Name, env.Value))
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// makeMountBindings converts the mount list to a list of strings that
|
|
|
|
// can be understood by docker.
|
|
|
|
// Each element in the string is in the form of:
|
|
|
|
// '<HostPath>:<ContainerPath>', or
|
|
|
|
// '<HostPath>:<ContainerPath>:ro', if the path is read only.
|
|
|
|
func makeMountBindings(mounts []kubecontainer.Mount) (result []string) {
|
|
|
|
for _, m := range mounts {
|
|
|
|
bind := fmt.Sprintf("%s:%s", m.HostPath, m.ContainerPath)
|
|
|
|
if m.ReadOnly {
|
|
|
|
bind += ":ro"
|
|
|
|
}
|
|
|
|
result = append(result, bind)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func makePortsAndBindings(portMappings []kubecontainer.PortMapping) (map[docker.Port]struct{}, map[docker.Port][]docker.PortBinding) {
|
|
|
|
exposedPorts := map[docker.Port]struct{}{}
|
|
|
|
portBindings := map[docker.Port][]docker.PortBinding{}
|
|
|
|
for _, port := range portMappings {
|
|
|
|
exteriorPort := port.HostPort
|
|
|
|
if exteriorPort == 0 {
|
|
|
|
// No need to do port binding when HostPort is not specified
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
interiorPort := port.ContainerPort
|
|
|
|
// Some of this port stuff is under-documented voodoo.
|
|
|
|
// See http://stackoverflow.com/questions/20428302/binding-a-port-to-a-host-interface-using-the-rest-api
|
|
|
|
var protocol string
|
|
|
|
switch strings.ToUpper(string(port.Protocol)) {
|
|
|
|
case "UDP":
|
|
|
|
protocol = "/udp"
|
|
|
|
case "TCP":
|
|
|
|
protocol = "/tcp"
|
|
|
|
default:
|
|
|
|
glog.Warningf("Unknown protocol %q: defaulting to TCP", port.Protocol)
|
|
|
|
protocol = "/tcp"
|
|
|
|
}
|
2015-08-31 12:53:02 +00:00
|
|
|
|
2015-05-12 21:49:35 +00:00
|
|
|
dockerPort := docker.Port(strconv.Itoa(interiorPort) + protocol)
|
|
|
|
exposedPorts[dockerPort] = struct{}{}
|
2015-08-31 12:53:02 +00:00
|
|
|
|
|
|
|
hostBinding := docker.PortBinding{
|
2015-08-30 16:13:33 +00:00
|
|
|
HostPort: strconv.Itoa(exteriorPort),
|
|
|
|
HostIP: port.HostIP,
|
|
|
|
}
|
|
|
|
|
2015-08-31 12:53:02 +00:00
|
|
|
// Allow multiple host ports bind to same docker port
|
|
|
|
if existedBindings, ok := portBindings[dockerPort]; ok {
|
|
|
|
// If a docker port already map to a host port, just append the host ports
|
2015-08-30 16:13:33 +00:00
|
|
|
portBindings[dockerPort] = append(existedBindings, hostBinding)
|
|
|
|
} else {
|
|
|
|
// Otherwise, it's fresh new port binding
|
|
|
|
portBindings[dockerPort] = []docker.PortBinding{
|
|
|
|
hostBinding,
|
|
|
|
}
|
2015-05-12 21:49:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return exposedPorts, portBindings
|
|
|
|
}
|
|
|
|
|
2015-05-12 21:18:00 +00:00
|
|
|
func (dm *DockerManager) runContainer(
|
|
|
|
pod *api.Pod,
|
|
|
|
container *api.Container,
|
|
|
|
opts *kubecontainer.RunContainerOptions,
|
|
|
|
ref *api.ObjectReference,
|
|
|
|
netMode string,
|
2015-08-19 05:02:10 +00:00
|
|
|
ipcMode string,
|
2015-09-15 16:43:59 +00:00
|
|
|
utsMode string,
|
2015-10-07 17:58:05 +00:00
|
|
|
pidMode string) (kubecontainer.ContainerID, error) {
|
2015-05-12 21:18:00 +00:00
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
dockerName := KubeletContainerName{
|
|
|
|
PodFullName: kubecontainer.GetPodFullName(pod),
|
|
|
|
PodUID: pod.UID,
|
|
|
|
ContainerName: container.Name,
|
|
|
|
}
|
2015-05-12 21:49:35 +00:00
|
|
|
exposedPorts, portBindings := makePortsAndBindings(opts.PortMappings)
|
2015-04-02 20:14:52 +00:00
|
|
|
|
|
|
|
// TODO(vmarmol): Handle better.
|
|
|
|
// Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char).
|
|
|
|
const hostnameMaxLen = 63
|
|
|
|
containerHostname := pod.Name
|
|
|
|
if len(containerHostname) > hostnameMaxLen {
|
|
|
|
containerHostname = containerHostname[:hostnameMaxLen]
|
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
|
|
|
|
// Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
|
|
|
|
// while the Kubelet is down and there is no information available to recover the pod. This includes
|
|
|
|
// termination information like the termination grace period and the pre stop hooks.
|
|
|
|
// TODO: keep these labels up to date if the pod changes
|
2015-08-08 01:52:23 +00:00
|
|
|
namespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
2015-05-04 16:54:42 +00:00
|
|
|
labels := map[string]string{
|
2015-08-20 01:57:58 +00:00
|
|
|
kubernetesNameLabel: namespacedName.String(),
|
|
|
|
}
|
|
|
|
if pod.Spec.TerminationGracePeriodSeconds != nil {
|
|
|
|
labels[kubernetesTerminationGracePeriodLabel] = strconv.FormatInt(*pod.Spec.TerminationGracePeriodSeconds, 10)
|
2015-05-04 16:54:42 +00:00
|
|
|
}
|
2015-05-06 03:50:45 +00:00
|
|
|
if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
|
|
|
|
// TODO: This is kind of hacky, we should really just encode the bits we need.
|
2015-09-10 19:30:47 +00:00
|
|
|
data, err := latest.GroupOrDie("").Codec.Encode(pod)
|
2015-05-06 03:50:45 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to encode pod: %s for prestop hook", pod.Name)
|
|
|
|
} else {
|
|
|
|
labels[kubernetesPodLabel] = string(data)
|
|
|
|
labels[kubernetesContainerLabel] = container.Name
|
|
|
|
}
|
|
|
|
}
|
2015-05-19 06:53:41 +00:00
|
|
|
memoryLimit := container.Resources.Limits.Memory().Value()
|
2015-07-30 19:59:22 +00:00
|
|
|
cpuRequest := container.Resources.Requests.Cpu()
|
|
|
|
cpuLimit := container.Resources.Limits.Cpu()
|
|
|
|
var cpuShares int64
|
|
|
|
// If request is not specified, but limit is, we want request to default to limit.
|
|
|
|
// API server does this for new containers, but we repeat this logic in Kubelet
|
|
|
|
// for containers running on existing Kubernetes clusters.
|
|
|
|
if cpuRequest.Amount == nil && cpuLimit.Amount != nil {
|
|
|
|
cpuShares = milliCPUToShares(cpuLimit.MilliValue())
|
|
|
|
} else {
|
|
|
|
// if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number
|
|
|
|
// of CPU shares.
|
|
|
|
cpuShares = milliCPUToShares(cpuRequest.MilliValue())
|
|
|
|
}
|
2015-09-01 13:27:01 +00:00
|
|
|
|
2015-08-13 12:59:15 +00:00
|
|
|
_, containerName := BuildDockerName(dockerName, container)
|
2015-04-02 20:14:52 +00:00
|
|
|
dockerOpts := docker.CreateContainerOptions{
|
2015-08-13 12:59:15 +00:00
|
|
|
Name: containerName,
|
2015-04-02 20:14:52 +00:00
|
|
|
Config: &docker.Config{
|
2015-05-12 21:49:35 +00:00
|
|
|
Env: makeEnvList(opts.Envs),
|
2015-04-02 20:14:52 +00:00
|
|
|
ExposedPorts: exposedPorts,
|
|
|
|
Hostname: containerHostname,
|
|
|
|
Image: container.Image,
|
2015-05-19 06:53:41 +00:00
|
|
|
// Memory and CPU are set here for older versions of Docker (pre-1.6).
|
|
|
|
Memory: memoryLimit,
|
2015-06-06 01:12:05 +00:00
|
|
|
MemorySwap: -1,
|
2015-05-19 06:53:41 +00:00
|
|
|
CPUShares: cpuShares,
|
|
|
|
WorkingDir: container.WorkingDir,
|
|
|
|
Labels: labels,
|
2015-08-05 18:25:27 +00:00
|
|
|
// Interactive containers:
|
|
|
|
OpenStdin: container.Stdin,
|
|
|
|
Tty: container.TTY,
|
2015-04-02 20:14:52 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2015-05-22 22:21:03 +00:00
|
|
|
setEntrypointAndCommand(container, opts, &dockerOpts)
|
2015-04-02 20:14:52 +00:00
|
|
|
|
|
|
|
glog.V(3).Infof("Container %v/%v/%v: setting entrypoint \"%v\" and command \"%v\"", pod.Namespace, pod.Name, container.Name, dockerOpts.Config.Entrypoint, dockerOpts.Config.Cmd)
|
|
|
|
|
2015-05-05 23:02:13 +00:00
|
|
|
securityContextProvider := securitycontext.NewSimpleSecurityContextProvider()
|
|
|
|
securityContextProvider.ModifyContainerConfig(pod, container, dockerOpts.Config)
|
2015-04-20 03:26:07 +00:00
|
|
|
dockerContainer, err := dm.client.CreateContainer(dockerOpts)
|
2015-04-02 20:14:52 +00:00
|
|
|
if err != nil {
|
|
|
|
if ref != nil {
|
2015-08-11 07:25:10 +00:00
|
|
|
dm.recorder.Eventf(ref, "Failed", "Failed to create docker container with error: %v", err)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ref != nil {
|
2015-08-11 07:25:10 +00:00
|
|
|
dm.recorder.Eventf(ref, "Created", "Created with docker id %v", util.ShortenString(dockerContainer.ID, 12))
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-05-12 21:49:35 +00:00
|
|
|
binds := makeMountBindings(opts.Mounts)
|
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
// The reason we create and mount the log file in here (not in kubelet) is because
|
|
|
|
// the file's location depends on the ID of the container, and we need to create and
|
|
|
|
// mount the file before actually starting the container.
|
|
|
|
// TODO(yifan): Consider to pull this logic out since we might need to reuse it in
|
|
|
|
// other container runtime.
|
|
|
|
if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 {
|
|
|
|
containerLogPath := path.Join(opts.PodContainerDir, dockerContainer.ID)
|
|
|
|
fs, err := os.Create(containerLogPath)
|
|
|
|
if err != nil {
|
|
|
|
// TODO: Clean up the previouly created dir? return the error?
|
|
|
|
glog.Errorf("Error on creating termination-log file %q: %v", containerLogPath, err)
|
|
|
|
} else {
|
|
|
|
fs.Close() // Close immediately; we're just doing a `touch` here
|
|
|
|
b := fmt.Sprintf("%s:%s", containerLogPath, container.TerminationMessagePath)
|
2015-05-12 21:49:35 +00:00
|
|
|
binds = append(binds, b)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
hc := &docker.HostConfig{
|
|
|
|
PortBindings: portBindings,
|
2015-05-12 21:49:35 +00:00
|
|
|
Binds: binds,
|
2015-05-12 21:18:00 +00:00
|
|
|
NetworkMode: netMode,
|
|
|
|
IpcMode: ipcMode,
|
2015-08-19 05:02:10 +00:00
|
|
|
UTSMode: utsMode,
|
2015-09-15 16:43:59 +00:00
|
|
|
PidMode: pidMode,
|
2015-05-19 06:53:41 +00:00
|
|
|
// Memory and CPU are set here for newer versions of Docker (1.6+).
|
2015-06-06 01:12:05 +00:00
|
|
|
Memory: memoryLimit,
|
|
|
|
MemorySwap: -1,
|
|
|
|
CPUShares: cpuShares,
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-09-01 13:27:01 +00:00
|
|
|
|
|
|
|
if dm.cpuCFSQuota {
|
|
|
|
// if cpuLimit.Amount is nil, then the appropriate default value is returned to allow full usage of cpu resource.
|
|
|
|
cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
|
|
|
|
|
|
|
|
hc.CPUQuota = cpuQuota
|
|
|
|
hc.CPUPeriod = cpuPeriod
|
|
|
|
}
|
|
|
|
|
2015-04-02 20:14:52 +00:00
|
|
|
if len(opts.DNS) > 0 {
|
|
|
|
hc.DNS = opts.DNS
|
|
|
|
}
|
|
|
|
if len(opts.DNSSearch) > 0 {
|
|
|
|
hc.DNSSearch = opts.DNSSearch
|
|
|
|
}
|
2015-04-24 00:07:52 +00:00
|
|
|
if len(opts.CgroupParent) > 0 {
|
|
|
|
hc.CgroupParent = opts.CgroupParent
|
|
|
|
}
|
2015-05-05 23:02:13 +00:00
|
|
|
securityContextProvider.ModifyHostConfig(pod, container, hc)
|
2015-04-02 20:14:52 +00:00
|
|
|
|
2015-04-20 03:26:07 +00:00
|
|
|
if err = dm.client.StartContainer(dockerContainer.ID, hc); err != nil {
|
2015-04-02 20:14:52 +00:00
|
|
|
if ref != nil {
|
2015-08-11 07:25:10 +00:00
|
|
|
dm.recorder.Eventf(ref, "Failed",
|
2015-06-12 07:12:36 +00:00
|
|
|
"Failed to start with docker id %v with error: %v", util.ShortenString(dockerContainer.ID, 12), err)
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
if ref != nil {
|
2015-08-11 07:25:10 +00:00
|
|
|
dm.recorder.Eventf(ref, "Started", "Started with docker id %v", util.ShortenString(dockerContainer.ID, 12))
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
2015-10-09 17:24:31 +00:00
|
|
|
return kubetypes.DockerID(dockerContainer.ID).ContainerID(), nil
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-05-22 22:21:03 +00:00
|
|
|
func setEntrypointAndCommand(container *api.Container, opts *kubecontainer.RunContainerOptions, dockerOpts *docker.CreateContainerOptions) {
|
|
|
|
command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
|
|
|
|
|
|
|
|
dockerOpts.Config.Entrypoint = command
|
|
|
|
dockerOpts.Config.Cmd = args
|
2015-04-02 20:14:52 +00:00
|
|
|
}
|
|
|
|
|
2015-04-29 20:09:03 +00:00
|
|
|
// A helper function to get the KubeletContainerName and hash from a docker
|
|
|
|
// container.
|
|
|
|
func getDockerContainerNameInfo(c *docker.APIContainers) (*KubeletContainerName, uint64, error) {
|
|
|
|
if len(c.Names) == 0 {
|
|
|
|
return nil, 0, fmt.Errorf("cannot parse empty docker container name: %#v", c.Names)
|
|
|
|
}
|
|
|
|
dockerName, hash, err := ParseDockerName(c.Names[0])
|
|
|
|
if err != nil {
|
|
|
|
return nil, 0, fmt.Errorf("parse docker container name %q error: %v", c.Names[0], err)
|
|
|
|
}
|
|
|
|
return dockerName, hash, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get pod UID, name, and namespace by examining the container names.
|
|
|
|
func getPodInfoFromContainer(c *docker.APIContainers) (types.UID, string, string, error) {
|
|
|
|
dockerName, _, err := getDockerContainerNameInfo(c)
|
|
|
|
if err != nil {
|
|
|
|
return types.UID(""), "", "", err
|
|
|
|
}
|
|
|
|
name, namespace, err := kubecontainer.ParsePodFullName(dockerName.PodFullName)
|
|
|
|
if err != nil {
|
|
|
|
return types.UID(""), "", "", fmt.Errorf("parse pod full name %q error: %v", dockerName.PodFullName, err)
|
|
|
|
}
|
|
|
|
return dockerName.PodUID, name, namespace, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetContainers returns a list of running containers if |all| is false;
|
|
|
|
// otherwise, it returns all containers.
|
|
|
|
func (dm *DockerManager) GetContainers(all bool) ([]*kubecontainer.Container, error) {
|
|
|
|
containers, err := GetKubeletDockerContainers(dm.client, all)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
// Convert DockerContainers to []*kubecontainer.Container
|
|
|
|
result := make([]*kubecontainer.Container, 0, len(containers))
|
|
|
|
for _, c := range containers {
|
2015-04-30 17:12:23 +00:00
|
|
|
converted, err := toRuntimeContainer(c)
|
2015-04-29 20:09:03 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Error examining the container: %v", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
result = append(result, converted)
|
|
|
|
}
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2015-04-20 03:26:07 +00:00
|
|
|
func (dm *DockerManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
|
2015-06-09 21:01:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
metrics.ContainerManagerLatency.WithLabelValues("GetPods").Observe(metrics.SinceInMicroseconds(start))
|
|
|
|
}()
|
2015-04-13 21:00:02 +00:00
|
|
|
pods := make(map[types.UID]*kubecontainer.Pod)
|
|
|
|
var result []*kubecontainer.Pod
|
|
|
|
|
2015-04-20 03:26:07 +00:00
|
|
|
containers, err := GetKubeletDockerContainers(dm.client, all)
|
2015-04-13 21:00:02 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Group containers by pod.
|
|
|
|
for _, c := range containers {
|
2015-04-30 17:12:23 +00:00
|
|
|
converted, err := toRuntimeContainer(c)
|
2015-04-29 20:09:03 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Error examining the container: %v", err)
|
2015-04-13 21:00:02 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-04-29 20:09:03 +00:00
|
|
|
|
|
|
|
podUID, podName, podNamespace, err := getPodInfoFromContainer(c)
|
2015-04-13 21:00:02 +00:00
|
|
|
if err != nil {
|
2015-04-29 20:09:03 +00:00
|
|
|
glog.Errorf("Error examining the container: %v", err)
|
2015-04-13 21:00:02 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-04-29 20:09:03 +00:00
|
|
|
|
|
|
|
pod, found := pods[podUID]
|
2015-04-13 21:00:02 +00:00
|
|
|
if !found {
|
|
|
|
pod = &kubecontainer.Pod{
|
2015-04-29 20:09:03 +00:00
|
|
|
ID: podUID,
|
|
|
|
Name: podName,
|
|
|
|
Namespace: podNamespace,
|
2015-04-13 21:00:02 +00:00
|
|
|
}
|
2015-04-29 20:09:03 +00:00
|
|
|
pods[podUID] = pod
|
|
|
|
}
|
|
|
|
pod.Containers = append(pod.Containers, converted)
|
2015-04-13 21:00:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Convert map to list.
|
|
|
|
for _, c := range pods {
|
|
|
|
result = append(result, c)
|
|
|
|
}
|
|
|
|
return result, nil
|
|
|
|
}
|
2015-04-13 19:24:01 +00:00
|
|
|
|
2015-04-30 17:12:23 +00:00
|
|
|
// List all images in the local storage.
|
|
|
|
func (dm *DockerManager) ListImages() ([]kubecontainer.Image, error) {
|
|
|
|
var images []kubecontainer.Image
|
|
|
|
|
|
|
|
dockerImages, err := dm.client.ListImages(docker.ListImagesOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return images, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, di := range dockerImages {
|
|
|
|
image, err := toRuntimeImage(&di)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
images = append(images, *image)
|
|
|
|
}
|
|
|
|
return images, nil
|
|
|
|
}
|
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
// TODO(vmarmol): Consider unexporting.
|
2015-04-30 17:12:23 +00:00
|
|
|
// PullImage pulls an image from network to local storage.
|
2015-05-08 17:30:59 +00:00
|
|
|
func (dm *DockerManager) PullImage(image kubecontainer.ImageSpec, secrets []api.Secret) error {
|
2015-08-10 17:28:39 +00:00
|
|
|
return dm.dockerPuller.Pull(image.Image, secrets)
|
2015-04-13 19:24:01 +00:00
|
|
|
}
|
|
|
|
|
2015-04-30 17:12:23 +00:00
|
|
|
// IsImagePresent checks whether the container image is already in the local storage.
|
2015-05-06 21:42:03 +00:00
|
|
|
func (dm *DockerManager) IsImagePresent(image kubecontainer.ImageSpec) (bool, error) {
|
2015-08-10 17:28:39 +00:00
|
|
|
return dm.dockerPuller.IsImagePresent(image.Image)
|
2015-04-13 19:24:01 +00:00
|
|
|
}
|
2015-04-13 17:02:19 +00:00
|
|
|
|
2015-04-30 17:12:23 +00:00
|
|
|
// Removes the specified image.
|
2015-05-06 21:42:03 +00:00
|
|
|
func (dm *DockerManager) RemoveImage(image kubecontainer.ImageSpec) error {
|
|
|
|
return dm.client.RemoveImage(image.Image)
|
2015-04-30 17:12:23 +00:00
|
|
|
}
|
|
|
|
|
2015-04-29 00:51:21 +00:00
|
|
|
// podInfraContainerChanged returns true if the pod infra container has changed.
|
|
|
|
func (dm *DockerManager) podInfraContainerChanged(pod *api.Pod, podInfraContainer *kubecontainer.Container) (bool, error) {
|
2015-04-13 17:02:19 +00:00
|
|
|
networkMode := ""
|
|
|
|
var ports []api.ContainerPort
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
dockerPodInfraContainer, err := dm.client.InspectContainer(podInfraContainer.ID.ID)
|
2015-04-13 17:02:19 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check network mode.
|
|
|
|
if dockerPodInfraContainer.HostConfig != nil {
|
|
|
|
networkMode = dockerPodInfraContainer.HostConfig.NetworkMode
|
|
|
|
}
|
2015-09-14 21:56:51 +00:00
|
|
|
if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostNetwork {
|
2015-04-13 17:02:19 +00:00
|
|
|
if networkMode != "host" {
|
2015-09-14 21:56:51 +00:00
|
|
|
glog.V(4).Infof("host: %v, %v", pod.Spec.SecurityContext.HostNetwork, networkMode)
|
2015-04-13 17:02:19 +00:00
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Docker only exports ports from the pod infra container. Let's
|
|
|
|
// collect all of the relevant ports and export them.
|
|
|
|
for _, container := range pod.Spec.Containers {
|
|
|
|
ports = append(ports, container.Ports...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
expectedPodInfraContainer := &api.Container{
|
2015-06-09 00:53:24 +00:00
|
|
|
Name: PodInfraContainerName,
|
|
|
|
Image: dm.podInfraContainerImage,
|
|
|
|
Ports: ports,
|
|
|
|
ImagePullPolicy: podInfraContainerImagePullPolicy,
|
2015-04-13 17:02:19 +00:00
|
|
|
}
|
2015-05-15 23:14:08 +00:00
|
|
|
return podInfraContainer.Hash != kubecontainer.HashContainer(expectedPodInfraContainer), nil
|
2015-04-13 17:02:19 +00:00
|
|
|
}
|
2015-04-21 20:02:50 +00:00
|
|
|
|
|
|
|
type dockerVersion docker.APIVersion
|
|
|
|
|
|
|
|
func NewVersion(input string) (dockerVersion, error) {
|
|
|
|
version, err := docker.NewAPIVersion(input)
|
|
|
|
return dockerVersion(version), err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dv dockerVersion) String() string {
|
|
|
|
return docker.APIVersion(dv).String()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dv dockerVersion) Compare(other string) (int, error) {
|
|
|
|
a := docker.APIVersion(dv)
|
|
|
|
b, err := docker.NewAPIVersion(other)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if a.LessThan(b) {
|
|
|
|
return -1, nil
|
|
|
|
}
|
|
|
|
if a.GreaterThan(b) {
|
|
|
|
return 1, nil
|
|
|
|
}
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dm *DockerManager) Version() (kubecontainer.Version, error) {
|
|
|
|
env, err := dm.client.Version()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("docker: failed to get docker version: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
apiVersion := env.Get("ApiVersion")
|
|
|
|
version, err := docker.NewAPIVersion(apiVersion)
|
|
|
|
if err != nil {
|
2015-05-04 20:14:55 +00:00
|
|
|
glog.Errorf("docker: failed to parse docker server version %q: %v", apiVersion, err)
|
2015-04-21 20:02:50 +00:00
|
|
|
return nil, fmt.Errorf("docker: failed to parse docker server version %q: %v", apiVersion, err)
|
|
|
|
}
|
|
|
|
return dockerVersion(version), nil
|
|
|
|
}
|
2015-04-22 17:20:05 +00:00
|
|
|
|
|
|
|
// The first version of docker that supports exec natively is 1.3.0 == API 1.15
|
|
|
|
var dockerAPIVersionWithExec = "1.15"
|
|
|
|
|
|
|
|
func (dm *DockerManager) nativeExecSupportExists() (bool, error) {
|
|
|
|
version, err := dm.Version()
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
result, err := version.Compare(dockerAPIVersionWithExec)
|
|
|
|
if result >= 0 {
|
|
|
|
return true, err
|
|
|
|
}
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) getRunInContainerCommand(containerID kubecontainer.ContainerID, cmd []string) (*exec.Cmd, error) {
|
2015-04-22 17:20:05 +00:00
|
|
|
args := append([]string{"exec"}, cmd...)
|
|
|
|
command := exec.Command("/usr/sbin/nsinit", args...)
|
2015-10-07 17:58:05 +00:00
|
|
|
command.Dir = fmt.Sprintf("/var/lib/docker/execdriver/native/%s", containerID.ID)
|
2015-04-22 17:20:05 +00:00
|
|
|
return command, nil
|
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) runInContainerUsingNsinit(containerID kubecontainer.ContainerID, cmd []string) ([]byte, error) {
|
2015-04-22 17:20:05 +00:00
|
|
|
c, err := dm.getRunInContainerCommand(containerID, cmd)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return c.CombinedOutput()
|
|
|
|
}
|
|
|
|
|
|
|
|
// RunInContainer uses nsinit to run the command inside the container identified by containerID
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) RunInContainer(containerID kubecontainer.ContainerID, cmd []string) ([]byte, error) {
|
2015-04-22 17:20:05 +00:00
|
|
|
// If native exec support does not exist in the local docker daemon use nsinit.
|
|
|
|
useNativeExec, err := dm.nativeExecSupportExists()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if !useNativeExec {
|
2015-06-12 22:54:22 +00:00
|
|
|
glog.V(2).Infof("Using nsinit to run the command %+v inside container %s", cmd, containerID)
|
2015-04-22 17:20:05 +00:00
|
|
|
return dm.runInContainerUsingNsinit(containerID, cmd)
|
|
|
|
}
|
2015-06-12 22:54:22 +00:00
|
|
|
glog.V(2).Infof("Using docker native exec to run cmd %+v inside container %s", cmd, containerID)
|
2015-04-22 17:20:05 +00:00
|
|
|
createOpts := docker.CreateExecOptions{
|
2015-10-07 17:58:05 +00:00
|
|
|
Container: containerID.ID,
|
2015-04-22 17:20:05 +00:00
|
|
|
Cmd: cmd,
|
|
|
|
AttachStdin: false,
|
|
|
|
AttachStdout: true,
|
|
|
|
AttachStderr: true,
|
|
|
|
Tty: false,
|
|
|
|
}
|
|
|
|
execObj, err := dm.client.CreateExec(createOpts)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to run in container - Exec setup failed - %v", err)
|
|
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
|
|
startOpts := docker.StartExecOptions{
|
|
|
|
Detach: false,
|
|
|
|
Tty: false,
|
2015-05-04 23:01:32 +00:00
|
|
|
OutputStream: &buf,
|
|
|
|
ErrorStream: &buf,
|
2015-04-22 17:20:05 +00:00
|
|
|
RawTerminal: false,
|
|
|
|
}
|
2015-05-04 23:01:32 +00:00
|
|
|
err = dm.client.StartExec(execObj.ID, startOpts)
|
2015-05-08 16:48:31 +00:00
|
|
|
if err != nil {
|
2015-06-12 22:54:22 +00:00
|
|
|
glog.V(2).Infof("StartExec With error: %v", err)
|
2015-05-08 16:48:31 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2015-07-06 00:03:10 +00:00
|
|
|
ticker := time.NewTicker(2 * time.Second)
|
|
|
|
defer ticker.Stop()
|
2015-05-08 16:48:31 +00:00
|
|
|
for {
|
|
|
|
inspect, err2 := dm.client.InspectExec(execObj.ID)
|
|
|
|
if err2 != nil {
|
2015-06-12 22:54:22 +00:00
|
|
|
glog.V(2).Infof("InspectExec %s failed with error: %+v", execObj.ID, err2)
|
2015-05-08 16:48:31 +00:00
|
|
|
return buf.Bytes(), err2
|
|
|
|
}
|
|
|
|
if !inspect.Running {
|
|
|
|
if inspect.ExitCode != 0 {
|
2015-06-12 22:54:22 +00:00
|
|
|
glog.V(2).Infof("InspectExec %s exit with result %+v", execObj.ID, inspect)
|
2015-05-08 16:48:31 +00:00
|
|
|
err = &dockerExitError{inspect}
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
2015-07-06 00:03:10 +00:00
|
|
|
<-ticker.C
|
2015-05-08 16:48:31 +00:00
|
|
|
}
|
2015-05-04 23:01:32 +00:00
|
|
|
|
|
|
|
return buf.Bytes(), err
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
|
|
|
|
2015-05-08 16:48:31 +00:00
|
|
|
type dockerExitError struct {
|
|
|
|
Inspect *docker.ExecInspect
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *dockerExitError) String() string {
|
|
|
|
return d.Error()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *dockerExitError) Error() string {
|
|
|
|
return fmt.Sprintf("Error executing in Docker Container: %d", d.Inspect.ExitCode)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *dockerExitError) Exited() bool {
|
|
|
|
return !d.Inspect.Running
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *dockerExitError) ExitStatus() int {
|
|
|
|
return d.Inspect.ExitCode
|
|
|
|
}
|
|
|
|
|
2015-05-27 12:51:01 +00:00
|
|
|
// ExecInContainer runs the command inside the container identified by containerID.
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) ExecInContainer(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool) error {
|
2015-05-27 12:51:01 +00:00
|
|
|
if dm.execHandler == nil {
|
|
|
|
return errors.New("unable to exec without an exec handler")
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
container, err := dm.client.InspectContainer(containerID.ID)
|
2015-04-22 17:20:05 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !container.State.Running {
|
2015-08-08 01:52:23 +00:00
|
|
|
return fmt.Errorf("container not running (%s)", container.ID)
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
|
|
|
|
2015-05-27 12:51:01 +00:00
|
|
|
return dm.execHandler.ExecInContainer(dm.client, container, cmd, stdin, stdout, stderr, tty)
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) AttachContainer(containerID kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool) error {
|
2015-07-28 04:48:55 +00:00
|
|
|
opts := docker.AttachToContainerOptions{
|
2015-10-07 17:58:05 +00:00
|
|
|
Container: containerID.ID,
|
2015-07-28 04:48:55 +00:00
|
|
|
InputStream: stdin,
|
|
|
|
OutputStream: stdout,
|
|
|
|
ErrorStream: stderr,
|
2015-07-28 22:56:27 +00:00
|
|
|
Stream: true,
|
2015-07-28 04:48:55 +00:00
|
|
|
Logs: true,
|
|
|
|
Stdin: stdin != nil,
|
|
|
|
Stdout: stdout != nil,
|
|
|
|
Stderr: stderr != nil,
|
|
|
|
RawTerminal: tty,
|
|
|
|
}
|
|
|
|
return dm.client.AttachToContainer(opts)
|
|
|
|
}
|
|
|
|
|
2015-06-05 21:10:45 +00:00
|
|
|
func noPodInfraContainerError(podName, podNamespace string) error {
|
|
|
|
return fmt.Errorf("cannot find pod infra container in pod %q", kubecontainer.BuildPodFullName(podName, podNamespace))
|
|
|
|
}
|
|
|
|
|
2015-04-22 17:20:05 +00:00
|
|
|
// PortForward executes socat in the pod's network namespace and copies
|
|
|
|
// data between stream (representing the user's local connection on their
|
|
|
|
// computer) and the specified port in the container.
|
|
|
|
//
|
|
|
|
// TODO:
|
|
|
|
// - match cgroups of container
|
|
|
|
// - should we support nsenter + socat on the host? (current impl)
|
|
|
|
// - should we support nsenter + socat in a container, running with elevated privs and --pid=host?
|
|
|
|
func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream io.ReadWriteCloser) error {
|
|
|
|
podInfraContainer := pod.FindContainerByName(PodInfraContainerName)
|
|
|
|
if podInfraContainer == nil {
|
2015-06-05 21:10:45 +00:00
|
|
|
return noPodInfraContainerError(pod.Name, pod.Namespace)
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
2015-10-07 17:58:05 +00:00
|
|
|
container, err := dm.client.InspectContainer(podInfraContainer.ID.ID)
|
2015-04-22 17:20:05 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !container.State.Running {
|
2015-08-08 01:52:23 +00:00
|
|
|
return fmt.Errorf("container not running (%s)", container.ID)
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
containerPid := container.State.Pid
|
2015-09-22 20:29:51 +00:00
|
|
|
socatPath, lookupErr := exec.LookPath("socat")
|
2015-04-22 17:20:05 +00:00
|
|
|
if lookupErr != nil {
|
2015-09-22 20:29:51 +00:00
|
|
|
return fmt.Errorf("unable to do port forwarding: socat not found.")
|
2015-04-22 17:20:05 +00:00
|
|
|
}
|
2015-09-22 20:29:51 +00:00
|
|
|
|
|
|
|
args := []string{"-t", fmt.Sprintf("%d", containerPid), "-n", socatPath, "-", fmt.Sprintf("TCP4:localhost:%d", port)}
|
|
|
|
|
|
|
|
nsenterPath, lookupErr := exec.LookPath("nsenter")
|
|
|
|
if lookupErr != nil {
|
|
|
|
return fmt.Errorf("unable to do port forwarding: nsenter not found.")
|
|
|
|
}
|
|
|
|
|
|
|
|
command := exec.Command(nsenterPath, args...)
|
2015-04-22 17:20:05 +00:00
|
|
|
command.Stdout = stream
|
2015-09-22 20:29:51 +00:00
|
|
|
|
|
|
|
// If we use Stdin, command.Run() won't return until the goroutine that's copying
|
|
|
|
// from stream finishes. Unfortunately, if you have a client like telnet connected
|
|
|
|
// via port forwarding, as long as the user's telnet client is connected to the user's
|
|
|
|
// local listener that port forwarding sets up, the telnet session never exits. This
|
|
|
|
// means that even if socat has finished running, command.Run() won't ever return
|
|
|
|
// (because the client still has the connection and stream open).
|
|
|
|
//
|
|
|
|
// The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
|
|
|
|
// when the command (socat) exits.
|
|
|
|
inPipe, err := command.StdinPipe()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("unable to do port forwarding: error creating stdin pipe: %v", err)
|
|
|
|
}
|
|
|
|
go func() {
|
|
|
|
io.Copy(inPipe, stream)
|
|
|
|
inPipe.Close()
|
|
|
|
}()
|
|
|
|
|
2015-04-22 17:20:05 +00:00
|
|
|
return command.Run()
|
|
|
|
}
|
2015-04-23 22:40:54 +00:00
|
|
|
|
2015-09-09 21:00:41 +00:00
|
|
|
// Get the IP address of a container's interface using nsenter
|
|
|
|
func (dm *DockerManager) GetContainerIP(containerID, interfaceName string) (string, error) {
|
|
|
|
_, lookupErr := exec.LookPath("nsenter")
|
|
|
|
if lookupErr != nil {
|
|
|
|
return "", fmt.Errorf("Unable to obtain IP address of container: missing nsenter.")
|
|
|
|
}
|
|
|
|
container, err := dm.client.InspectContainer(containerID)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !container.State.Running {
|
|
|
|
return "", fmt.Errorf("container not running (%s)", container.ID)
|
|
|
|
}
|
|
|
|
|
|
|
|
containerPid := container.State.Pid
|
|
|
|
extractIPCmd := fmt.Sprintf("ip -4 addr show %s | grep inet | awk -F\" \" '{print $2}'", interfaceName)
|
|
|
|
args := []string{"-t", fmt.Sprintf("%d", containerPid), "-n", "--", "bash", "-c", extractIPCmd}
|
|
|
|
command := exec.Command("nsenter", args...)
|
|
|
|
out, err := command.CombinedOutput()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return string(out), nil
|
|
|
|
}
|
|
|
|
|
2015-04-27 22:34:01 +00:00
|
|
|
// Kills all containers in the specified pod
|
2015-08-20 01:57:58 +00:00
|
|
|
func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod) error {
|
2015-04-28 18:02:29 +00:00
|
|
|
// Send the kills in parallel since they may take a long time. Len + 1 since there
|
|
|
|
// can be Len errors + the networkPlugin teardown error.
|
2015-08-20 01:57:58 +00:00
|
|
|
errs := make(chan error, len(runningPod.Containers)+1)
|
2015-04-27 22:34:01 +00:00
|
|
|
wg := sync.WaitGroup{}
|
2015-08-20 01:57:58 +00:00
|
|
|
var (
|
|
|
|
networkContainer *kubecontainer.Container
|
|
|
|
networkSpec *api.Container
|
|
|
|
)
|
|
|
|
for _, container := range runningPod.Containers {
|
2015-04-27 22:34:01 +00:00
|
|
|
wg.Add(1)
|
|
|
|
go func(container *kubecontainer.Container) {
|
|
|
|
defer util.HandleCrash()
|
2015-06-13 03:49:32 +00:00
|
|
|
defer wg.Done()
|
2015-04-28 18:02:29 +00:00
|
|
|
|
2015-08-20 01:57:58 +00:00
|
|
|
var containerSpec *api.Container
|
|
|
|
if pod != nil {
|
|
|
|
for i, c := range pod.Spec.Containers {
|
|
|
|
if c.Name == container.Name {
|
|
|
|
containerSpec = &pod.Spec.Containers[i]
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-28 18:02:29 +00:00
|
|
|
// TODO: Handle this without signaling the pod infra container to
|
|
|
|
// adapt to the generic container runtime.
|
|
|
|
if container.Name == PodInfraContainerName {
|
2015-06-13 03:49:32 +00:00
|
|
|
// Store the container runtime for later deletion.
|
|
|
|
// We do this so that PreStop handlers can run in the network namespace.
|
2015-08-20 01:57:58 +00:00
|
|
|
networkContainer = container
|
|
|
|
networkSpec = containerSpec
|
2015-06-13 03:49:32 +00:00
|
|
|
return
|
2015-04-28 18:02:29 +00:00
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
|
|
|
|
err := dm.KillContainerInPod(container.ID, containerSpec, pod)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
|
2015-04-27 22:34:01 +00:00
|
|
|
errs <- err
|
|
|
|
}
|
|
|
|
}(container)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
2015-08-20 01:57:58 +00:00
|
|
|
if networkContainer != nil {
|
2015-10-09 17:24:31 +00:00
|
|
|
if err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, kubetypes.DockerID(networkContainer.ID.ID)); err != nil {
|
2015-06-13 03:49:32 +00:00
|
|
|
glog.Errorf("Failed tearing down the infra container: %v", err)
|
|
|
|
errs <- err
|
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod); err != nil {
|
|
|
|
glog.Errorf("Failed to delete container: %v; Skipping pod %q", err, runningPod.ID)
|
2015-06-13 03:49:32 +00:00
|
|
|
errs <- err
|
|
|
|
}
|
|
|
|
}
|
2015-04-27 22:34:01 +00:00
|
|
|
close(errs)
|
|
|
|
if len(errs) > 0 {
|
|
|
|
errList := []error{}
|
|
|
|
for err := range errs {
|
|
|
|
errList = append(errList, err)
|
|
|
|
}
|
|
|
|
return fmt.Errorf("failed to delete containers (%v)", errList)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-08-20 01:57:58 +00:00
|
|
|
// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
|
|
|
|
// and will attempt to lookup the other information if missing.
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod) error {
|
2015-08-20 01:57:58 +00:00
|
|
|
switch {
|
2015-10-07 17:58:05 +00:00
|
|
|
case containerID.IsEmpty():
|
2015-08-20 01:57:58 +00:00
|
|
|
// Locate the container.
|
|
|
|
pods, err := dm.GetPods(false)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
|
|
|
|
targetContainer := targetPod.FindContainerByName(container.Name)
|
|
|
|
if targetContainer == nil {
|
|
|
|
return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
|
|
|
|
}
|
|
|
|
containerID = targetContainer.ID
|
2015-05-21 22:36:44 +00:00
|
|
|
|
2015-08-20 01:57:58 +00:00
|
|
|
case container == nil || pod == nil:
|
|
|
|
// Read information about the container from labels
|
2015-10-07 17:58:05 +00:00
|
|
|
inspect, err := dm.client.InspectContainer(containerID.ID)
|
2015-08-20 01:57:58 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect)
|
|
|
|
if cerr != nil {
|
|
|
|
glog.Errorf("unable to access pod data from container: %v", err)
|
|
|
|
}
|
|
|
|
if container == nil {
|
|
|
|
container = storedContainer
|
|
|
|
}
|
|
|
|
if pod == nil {
|
|
|
|
pod = storedPod
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dm.killContainer(containerID, container, pod)
|
2015-08-19 00:34:49 +00:00
|
|
|
}
|
2015-06-03 00:36:58 +00:00
|
|
|
|
2015-08-20 01:57:58 +00:00
|
|
|
// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
|
|
|
|
// KillContainerInPod if information must be retrieved first.
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod) error {
|
|
|
|
ID := containerID.ID
|
2015-08-20 01:57:58 +00:00
|
|
|
name := ID
|
|
|
|
if container != nil {
|
|
|
|
name = fmt.Sprintf("%s %s", name, container.Name)
|
2015-06-03 00:36:58 +00:00
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
if pod != nil {
|
|
|
|
name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name)
|
2015-08-19 00:34:49 +00:00
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
|
|
|
|
gracePeriod := int64(minimumGracePeriodInSeconds)
|
|
|
|
if pod != nil {
|
|
|
|
switch {
|
|
|
|
case pod.DeletionGracePeriodSeconds != nil:
|
|
|
|
gracePeriod = *pod.DeletionGracePeriodSeconds
|
|
|
|
case pod.Spec.TerminationGracePeriodSeconds != nil:
|
|
|
|
gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
|
|
|
|
}
|
|
|
|
}
|
|
|
|
glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod)
|
2015-09-17 22:21:55 +00:00
|
|
|
start := unversioned.Now()
|
2015-08-20 01:57:58 +00:00
|
|
|
|
|
|
|
if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
|
|
|
|
glog.V(4).Infof("Running preStop hook for container %q", name)
|
|
|
|
done := make(chan struct{})
|
|
|
|
go func() {
|
|
|
|
defer close(done)
|
|
|
|
defer util.HandleCrash()
|
2015-10-07 17:58:05 +00:00
|
|
|
if err := dm.runner.Run(containerID, pod, container, container.Lifecycle.PreStop); err != nil {
|
2015-08-20 01:57:58 +00:00
|
|
|
glog.Errorf("preStop hook for container %q failed: %v", name, err)
|
2015-06-03 00:36:58 +00:00
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
}()
|
|
|
|
select {
|
|
|
|
case <-time.After(time.Duration(gracePeriod) * time.Second):
|
|
|
|
glog.V(2).Infof("preStop hook for container %q did not complete in %d seconds", name, gracePeriod)
|
|
|
|
case <-done:
|
|
|
|
glog.V(4).Infof("preStop hook for container %q completed", name)
|
2015-06-03 00:36:58 +00:00
|
|
|
}
|
2015-09-17 22:21:55 +00:00
|
|
|
gracePeriod -= int64(unversioned.Now().Sub(start.Time).Seconds())
|
2015-06-03 00:36:58 +00:00
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
|
|
|
|
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
|
|
|
|
if gracePeriod < minimumGracePeriodInSeconds {
|
|
|
|
gracePeriod = minimumGracePeriodInSeconds
|
|
|
|
}
|
|
|
|
err := dm.client.StopContainer(ID, uint(gracePeriod))
|
|
|
|
if _, ok := err.(*docker.ContainerNotRunning); ok && err != nil {
|
|
|
|
glog.V(4).Infof("Container %q has already exited", name)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if err == nil {
|
2015-09-17 22:21:55 +00:00
|
|
|
glog.V(2).Infof("Container %q exited after %s", name, unversioned.Now().Sub(start.Time))
|
2015-08-20 01:57:58 +00:00
|
|
|
} else {
|
2015-09-17 22:21:55 +00:00
|
|
|
glog.V(2).Infof("Container %q termination failed after %s: %v", name, unversioned.Now().Sub(start.Time), err)
|
2015-08-20 01:57:58 +00:00
|
|
|
}
|
2015-10-07 17:58:05 +00:00
|
|
|
ref, ok := dm.containerRefManager.GetRef(containerID)
|
2015-08-19 00:34:49 +00:00
|
|
|
if !ok {
|
2015-08-20 01:57:58 +00:00
|
|
|
glog.Warningf("No ref for pod '%q'", name)
|
2015-08-19 00:34:49 +00:00
|
|
|
} else {
|
|
|
|
// TODO: pass reason down here, and state, or move this call up the stack.
|
|
|
|
dm.recorder.Eventf(ref, "Killing", "Killing with docker id %v", util.ShortenString(ID, 12))
|
2015-10-07 17:58:05 +00:00
|
|
|
dm.containerRefManager.ClearRef(containerID)
|
2015-06-03 00:36:58 +00:00
|
|
|
}
|
2015-08-19 00:34:49 +00:00
|
|
|
return err
|
2015-06-03 00:36:58 +00:00
|
|
|
}
|
|
|
|
|
2015-08-20 01:57:58 +00:00
|
|
|
var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container")
|
|
|
|
|
|
|
|
// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
|
|
|
|
func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, container *api.Container, err error) {
|
|
|
|
if inspect == nil && inspect.Config == nil && inspect.Config.Labels == nil {
|
|
|
|
return nil, nil, errNoPodOnContainer
|
|
|
|
}
|
|
|
|
labels := inspect.Config.Labels
|
|
|
|
|
|
|
|
// the pod data may not be set
|
|
|
|
if body, found := labels[kubernetesPodLabel]; found {
|
|
|
|
pod = &api.Pod{}
|
2015-09-10 19:30:47 +00:00
|
|
|
if err = latest.GroupOrDie("").Codec.DecodeInto([]byte(body), pod); err == nil {
|
2015-08-20 01:57:58 +00:00
|
|
|
name := labels[kubernetesContainerLabel]
|
|
|
|
for ix := range pod.Spec.Containers {
|
|
|
|
if pod.Spec.Containers[ix].Name == name {
|
|
|
|
container = &pod.Spec.Containers[ix]
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if container == nil {
|
|
|
|
err = fmt.Errorf("unable to find container %s in pod %v", name, pod)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
pod = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
|
|
|
|
// field (the one used by kill)
|
|
|
|
if pod == nil {
|
|
|
|
if period, ok := labels[kubernetesTerminationGracePeriodLabel]; ok {
|
|
|
|
if seconds, err := strconv.ParseInt(period, 10, 64); err == nil {
|
|
|
|
pod = &api.Pod{}
|
|
|
|
pod.DeletionGracePeriodSeconds = &seconds
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-04-23 23:38:22 +00:00
|
|
|
// Run a single container from a pod. Returns the docker container ID
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Container, netMode, ipcMode, pidMode string) (kubecontainer.ContainerID, error) {
|
2015-06-09 21:01:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
metrics.ContainerManagerLatency.WithLabelValues("runContainerInPod").Observe(metrics.SinceInMicroseconds(start))
|
|
|
|
}()
|
|
|
|
|
2015-04-23 23:38:22 +00:00
|
|
|
ref, err := kubecontainer.GenerateContainerRef(pod, container)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Couldn't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err)
|
|
|
|
}
|
|
|
|
|
2015-05-12 21:18:00 +00:00
|
|
|
opts, err := dm.generator.GenerateRunContainerOptions(pod, container)
|
2015-04-23 23:38:22 +00:00
|
|
|
if err != nil {
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-04-23 23:38:22 +00:00
|
|
|
}
|
|
|
|
|
2015-08-19 05:02:10 +00:00
|
|
|
utsMode := ""
|
2015-09-14 21:56:51 +00:00
|
|
|
if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostNetwork {
|
2015-08-19 05:02:10 +00:00
|
|
|
utsMode = "host"
|
|
|
|
}
|
2015-09-15 16:43:59 +00:00
|
|
|
id, err := dm.runContainer(pod, container, opts, ref, netMode, ipcMode, utsMode, pidMode)
|
2015-04-23 23:38:22 +00:00
|
|
|
if err != nil {
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-04-23 23:38:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Remember this reference so we can report events about this container
|
|
|
|
if ref != nil {
|
|
|
|
dm.containerRefManager.SetRef(id, ref)
|
|
|
|
}
|
|
|
|
|
|
|
|
if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
|
2015-05-01 01:37:15 +00:00
|
|
|
handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
|
2015-04-23 23:38:22 +00:00
|
|
|
if handlerErr != nil {
|
2015-10-07 17:58:05 +00:00
|
|
|
dm.KillContainerInPod(id, container, pod)
|
|
|
|
return kubecontainer.ContainerID{}, fmt.Errorf("failed to call event handler: %v", handlerErr)
|
2015-04-23 23:38:22 +00:00
|
|
|
}
|
|
|
|
}
|
2015-04-27 20:03:55 +00:00
|
|
|
|
|
|
|
// Create a symbolic link to the Docker container log file using a name which captures the
|
|
|
|
// full pod name, the container name and the Docker container ID. Cluster level logging will
|
|
|
|
// capture these symbolic filenames which can be used for search terms in Elasticsearch or for
|
|
|
|
// labels for Cloud Logging.
|
|
|
|
podFullName := kubecontainer.GetPodFullName(pod)
|
2015-10-07 17:58:05 +00:00
|
|
|
containerLogFile := path.Join(dm.dockerRoot, "containers", id.ID, fmt.Sprintf("%s-json.log", id.ID))
|
|
|
|
symlinkFile := LogSymlink(dm.containerLogsDir, podFullName, container.Name, id.ID)
|
2015-04-27 20:03:55 +00:00
|
|
|
if err = dm.os.Symlink(containerLogFile, symlinkFile); err != nil {
|
|
|
|
glog.Errorf("Failed to create symbolic link to the log file of pod %q container %q: %v", podFullName, container.Name, err)
|
|
|
|
}
|
2015-06-15 21:20:17 +00:00
|
|
|
|
2015-08-04 00:28:33 +00:00
|
|
|
// Container information is used in adjusting OOM scores and adding ndots.
|
2015-10-07 17:58:05 +00:00
|
|
|
containerInfo, err := dm.client.InspectContainer(id.ID)
|
2015-06-15 21:20:17 +00:00
|
|
|
if err != nil {
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-06-15 21:20:17 +00:00
|
|
|
}
|
|
|
|
// Ensure the PID actually exists, else we'll move ourselves.
|
|
|
|
if containerInfo.State.Pid == 0 {
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, fmt.Errorf("failed to get init PID for Docker container %q", id)
|
2015-06-15 21:20:17 +00:00
|
|
|
}
|
2015-08-04 00:28:33 +00:00
|
|
|
|
|
|
|
// Set OOM score of the container based on the priority of the container.
|
|
|
|
// Processes in lower-priority pods should be killed first if the system runs out of memory.
|
|
|
|
// The main pod infrastructure container is considered high priority, since if it is killed the
|
|
|
|
// whole pod will die.
|
|
|
|
var oomScoreAdj int
|
2015-06-15 21:20:17 +00:00
|
|
|
if container.Name == PodInfraContainerName {
|
2015-09-28 08:00:43 +00:00
|
|
|
oomScoreAdj = qos.PodInfraOOMAdj
|
2015-06-15 21:20:17 +00:00
|
|
|
} else {
|
2015-09-28 08:00:43 +00:00
|
|
|
oomScoreAdj = qos.GetContainerOOMScoreAdjust(container, dm.machineInfo.MemoryCapacity)
|
2015-08-04 00:28:33 +00:00
|
|
|
}
|
|
|
|
cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid)
|
|
|
|
if err != nil {
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-08-04 00:28:33 +00:00
|
|
|
}
|
2015-09-28 08:00:43 +00:00
|
|
|
if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
|
2015-10-07 17:58:05 +00:00
|
|
|
return kubecontainer.ContainerID{}, err
|
2015-08-04 00:28:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// currently, Docker does not have a flag by which the ndots option can be passed.
|
2015-08-08 21:29:57 +00:00
|
|
|
// (A separate issue has been filed with Docker to add a ndots flag)
|
2015-08-04 00:28:33 +00:00
|
|
|
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
|
|
|
|
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
|
|
|
|
// we modify it when the pause container is created since it is the first container created in the pod since it holds
|
|
|
|
// the networking namespace.
|
2015-08-19 08:56:19 +00:00
|
|
|
if container.Name == PodInfraContainerName && utsMode != "host" {
|
2015-08-04 00:28:33 +00:00
|
|
|
err = addNDotsOption(containerInfo.ResolvConfPath)
|
2015-06-15 21:20:17 +00:00
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
return id, err
|
2015-04-23 23:38:22 +00:00
|
|
|
}
|
2015-04-27 17:45:10 +00:00
|
|
|
|
2015-06-23 23:36:06 +00:00
|
|
|
func addNDotsOption(resolvFilePath string) error {
|
|
|
|
if len(resolvFilePath) == 0 {
|
|
|
|
glog.Errorf("DNS ResolvConfPath is empty.")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
|
|
|
|
return fmt.Errorf("DNS ResolvConfPath specified but does not exist. It could not be updated: %s", resolvFilePath)
|
|
|
|
}
|
|
|
|
|
|
|
|
glog.V(4).Infof("DNS ResolvConfPath exists: %s. Will attempt to add ndots option: %s", resolvFilePath, ndotsDNSOption)
|
|
|
|
|
|
|
|
if err := appendToFile(resolvFilePath, ndotsDNSOption); err != nil {
|
|
|
|
glog.Errorf("resolv.conf could not be updated. err:%v", err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func appendToFile(filePath, stringToAppend string) error {
|
|
|
|
f, err := os.OpenFile(filePath, os.O_APPEND|os.O_WRONLY, 0644)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
_, err = f.WriteString(stringToAppend)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
// createPodInfraContainer starts the pod infra container for a pod. Returns the docker container ID of the newly created container.
|
2015-10-09 17:24:31 +00:00
|
|
|
func (dm *DockerManager) createPodInfraContainer(pod *api.Pod) (kubetypes.DockerID, error) {
|
2015-06-09 21:01:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
metrics.ContainerManagerLatency.WithLabelValues("createPodInfraContainer").Observe(metrics.SinceInMicroseconds(start))
|
|
|
|
}()
|
2015-04-27 17:45:10 +00:00
|
|
|
// Use host networking if specified.
|
|
|
|
netNamespace := ""
|
|
|
|
var ports []api.ContainerPort
|
|
|
|
|
2015-09-09 21:00:41 +00:00
|
|
|
if dm.networkPlugin.Name() == "cni" {
|
|
|
|
netNamespace = "none"
|
|
|
|
}
|
|
|
|
|
2015-09-14 21:56:51 +00:00
|
|
|
if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostNetwork {
|
2015-04-27 17:45:10 +00:00
|
|
|
netNamespace = "host"
|
|
|
|
} else {
|
|
|
|
// Docker only exports ports from the pod infra container. Let's
|
|
|
|
// collect all of the relevant ports and export them.
|
|
|
|
for _, container := range pod.Spec.Containers {
|
|
|
|
ports = append(ports, container.Ports...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
container := &api.Container{
|
2015-06-09 00:53:24 +00:00
|
|
|
Name: PodInfraContainerName,
|
|
|
|
Image: dm.podInfraContainerImage,
|
|
|
|
Ports: ports,
|
|
|
|
ImagePullPolicy: podInfraContainerImagePullPolicy,
|
2015-04-27 17:45:10 +00:00
|
|
|
}
|
2015-06-09 00:53:24 +00:00
|
|
|
|
|
|
|
// No pod secrets for the infra container.
|
2015-10-02 13:45:46 +00:00
|
|
|
// The message isnt needed for the Infra container
|
|
|
|
if err, _ := dm.imagePuller.PullImage(pod, container, nil); err != nil {
|
2015-09-08 07:34:10 +00:00
|
|
|
return "", err
|
2015-04-27 17:45:10 +00:00
|
|
|
}
|
|
|
|
|
2015-09-21 15:34:02 +00:00
|
|
|
id, err := dm.runContainerInPod(pod, container, netNamespace, getIPCMode(pod), getPidMode(pod))
|
2015-04-27 17:45:10 +00:00
|
|
|
if err != nil {
|
2015-09-08 07:34:10 +00:00
|
|
|
return "", err
|
2015-04-27 17:45:10 +00:00
|
|
|
}
|
|
|
|
|
2015-10-09 17:24:31 +00:00
|
|
|
return kubetypes.DockerID(id.ID), nil
|
2015-04-27 17:45:10 +00:00
|
|
|
}
|
2015-04-29 00:51:21 +00:00
|
|
|
|
|
|
|
// TODO(vmarmol): This will soon be made non-public when its only use is internal.
|
|
|
|
// Structure keeping information on changes that need to happen for a pod. The semantics is as follows:
|
|
|
|
// - startInfraContainer is true if new Infra Containers have to be started and old one (if running) killed.
|
|
|
|
// Additionally if it is true then containersToKeep have to be empty
|
2015-09-15 02:25:13 +00:00
|
|
|
// - infraContainerId have to be set if and only if startInfraContainer is false. It stores dockerID of running Infra Container
|
2015-04-29 00:51:21 +00:00
|
|
|
// - containersToStart keeps indices of Specs of containers that have to be started.
|
|
|
|
// - containersToKeep stores mapping from dockerIDs of running containers to indices of their Specs for containers that
|
|
|
|
// should be kept running. If startInfraContainer is false then it contains an entry for infraContainerId (mapped to -1).
|
|
|
|
// It shouldn't be the case where containersToStart is empty and containersToKeep contains only infraContainerId. In such case
|
|
|
|
// Infra Container should be killed, hence it's removed from this map.
|
|
|
|
// - all running containers which are NOT contained in containersToKeep should be killed.
|
|
|
|
type empty struct{}
|
|
|
|
type PodContainerChangesSpec struct {
|
|
|
|
StartInfraContainer bool
|
2015-10-09 17:24:31 +00:00
|
|
|
InfraContainerId kubetypes.DockerID
|
2015-04-29 00:51:21 +00:00
|
|
|
ContainersToStart map[int]empty
|
2015-10-09 17:24:31 +00:00
|
|
|
ContainersToKeep map[kubetypes.DockerID]int
|
2015-04-29 00:51:21 +00:00
|
|
|
}
|
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus) (PodContainerChangesSpec, error) {
|
2015-06-09 21:01:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
metrics.ContainerManagerLatency.WithLabelValues("computePodContainerChanges").Observe(metrics.SinceInMicroseconds(start))
|
|
|
|
}()
|
|
|
|
|
2015-04-29 00:51:21 +00:00
|
|
|
podFullName := kubecontainer.GetPodFullName(pod)
|
|
|
|
uid := pod.UID
|
|
|
|
glog.V(4).Infof("Syncing Pod %+v, podFullName: %q, uid: %q", pod, podFullName, uid)
|
|
|
|
|
|
|
|
containersToStart := make(map[int]empty)
|
2015-10-09 17:24:31 +00:00
|
|
|
containersToKeep := make(map[kubetypes.DockerID]int)
|
2015-04-29 00:51:21 +00:00
|
|
|
|
|
|
|
var err error
|
2015-10-09 17:24:31 +00:00
|
|
|
var podInfraContainerID kubetypes.DockerID
|
2015-04-29 00:51:21 +00:00
|
|
|
var changed bool
|
|
|
|
podInfraContainer := runningPod.FindContainerByName(PodInfraContainerName)
|
|
|
|
if podInfraContainer != nil {
|
|
|
|
glog.V(4).Infof("Found pod infra container for %q", podFullName)
|
|
|
|
changed, err = dm.podInfraContainerChanged(pod, podInfraContainer)
|
|
|
|
if err != nil {
|
|
|
|
return PodContainerChangesSpec{}, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-07 16:53:27 +00:00
|
|
|
createPodInfraContainer := true
|
2015-04-29 00:51:21 +00:00
|
|
|
if podInfraContainer == nil {
|
|
|
|
glog.V(2).Infof("Need to restart pod infra container for %q because it is not found", podFullName)
|
|
|
|
} else if changed {
|
|
|
|
glog.V(2).Infof("Need to restart pod infra container for %q because it is changed", podFullName)
|
|
|
|
} else {
|
|
|
|
glog.V(4).Infof("Pod infra container looks good, keep it %q", podFullName)
|
|
|
|
createPodInfraContainer = false
|
2015-10-09 17:24:31 +00:00
|
|
|
podInfraContainerID = kubetypes.DockerID(podInfraContainer.ID.ID)
|
2015-04-29 00:51:21 +00:00
|
|
|
containersToKeep[podInfraContainerID] = -1
|
|
|
|
}
|
|
|
|
|
|
|
|
for index, container := range pod.Spec.Containers {
|
2015-05-15 23:14:08 +00:00
|
|
|
expectedHash := kubecontainer.HashContainer(&container)
|
2015-04-29 00:51:21 +00:00
|
|
|
|
|
|
|
c := runningPod.FindContainerByName(container.Name)
|
|
|
|
if c == nil {
|
2015-08-25 17:39:41 +00:00
|
|
|
if kubecontainer.ShouldContainerBeRestarted(&container, pod, &podStatus) {
|
2015-04-29 00:51:21 +00:00
|
|
|
// If we are here it means that the container is dead and should be restarted, or never existed and should
|
|
|
|
// be created. We may be inserting this ID again if the container has changed and it has
|
|
|
|
// RestartPolicy::Always, but it's not a big deal.
|
|
|
|
glog.V(3).Infof("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
|
|
|
|
containersToStart[index] = empty{}
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-10-09 17:24:31 +00:00
|
|
|
containerID := kubetypes.DockerID(c.ID.ID)
|
2015-04-29 00:51:21 +00:00
|
|
|
hash := c.Hash
|
|
|
|
glog.V(3).Infof("pod %q container %q exists as %v", podFullName, container.Name, containerID)
|
|
|
|
|
|
|
|
if createPodInfraContainer {
|
|
|
|
// createPodInfraContainer == true and Container exists
|
2015-10-07 16:53:27 +00:00
|
|
|
// If we're creating infra container everything will be killed anyway
|
2015-04-29 00:51:21 +00:00
|
|
|
// If RestartPolicy is Always or OnFailure we restart containers that were running before we
|
|
|
|
// killed them when restarting Infra Container.
|
|
|
|
if pod.Spec.RestartPolicy != api.RestartPolicyNever {
|
|
|
|
glog.V(1).Infof("Infra Container is being recreated. %q will be restarted.", container.Name)
|
|
|
|
containersToStart[index] = empty{}
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// At this point, the container is running and pod infra container is good.
|
|
|
|
// We will look for changes and check healthiness for the container.
|
|
|
|
containerChanged := hash != 0 && hash != expectedHash
|
|
|
|
if containerChanged {
|
|
|
|
glog.Infof("pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", podFullName, container.Name, hash, expectedHash)
|
|
|
|
containersToStart[index] = empty{}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-10-07 17:58:05 +00:00
|
|
|
result, err := dm.prober.ProbeLiveness(pod, podStatus, container, c.ID, c.Created)
|
2015-04-29 00:51:21 +00:00
|
|
|
if err != nil {
|
|
|
|
// TODO(vmarmol): examine this logic.
|
|
|
|
glog.V(2).Infof("probe no-error: %q", container.Name)
|
|
|
|
containersToKeep[containerID] = index
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if result == probe.Success {
|
|
|
|
glog.V(4).Infof("probe success: %q", container.Name)
|
|
|
|
containersToKeep[containerID] = index
|
|
|
|
continue
|
|
|
|
}
|
2015-10-06 20:14:26 +00:00
|
|
|
if pod.Spec.RestartPolicy != api.RestartPolicyNever {
|
|
|
|
glog.Infof("pod %q container %q is unhealthy (probe result: %v), it will be killed and re-created.", podFullName, container.Name, result)
|
|
|
|
containersToStart[index] = empty{}
|
|
|
|
}
|
2015-04-29 00:51:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// After the loop one of the following should be true:
|
|
|
|
// - createPodInfraContainer is true and containersToKeep is empty.
|
|
|
|
// (In fact, when createPodInfraContainer is false, containersToKeep will not be touched).
|
|
|
|
// - createPodInfraContainer is false and containersToKeep contains at least ID of Infra Container
|
|
|
|
|
|
|
|
// If Infra container is the last running one, we don't want to keep it.
|
|
|
|
if !createPodInfraContainer && len(containersToStart) == 0 && len(containersToKeep) == 1 {
|
2015-10-09 17:24:31 +00:00
|
|
|
containersToKeep = make(map[kubetypes.DockerID]int)
|
2015-04-29 00:51:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return PodContainerChangesSpec{
|
|
|
|
StartInfraContainer: createPodInfraContainer,
|
|
|
|
InfraContainerId: podInfraContainerID,
|
|
|
|
ContainersToStart: containersToStart,
|
|
|
|
ContainersToKeep: containersToKeep,
|
|
|
|
}, nil
|
|
|
|
}
|
2015-05-01 01:37:15 +00:00
|
|
|
|
2015-05-08 18:54:44 +00:00
|
|
|
// updateReasonCache updates the failure reason based on the latest error.
|
2015-09-16 14:07:25 +00:00
|
|
|
func (dm *DockerManager) updateReasonCache(pod *api.Pod, container *api.Container, briefError string, err error) {
|
|
|
|
if briefError == "" || err == nil {
|
2015-05-08 18:54:44 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
errString := err.Error()
|
2015-09-16 14:07:25 +00:00
|
|
|
dm.reasonCache.Add(pod.UID, container.Name, briefError, errString)
|
2015-05-08 18:54:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// clearReasonCache removes the entry in the reason cache.
|
|
|
|
func (dm *DockerManager) clearReasonCache(pod *api.Pod, container *api.Container) {
|
|
|
|
dm.reasonCache.Remove(pod.UID, container.Name)
|
|
|
|
}
|
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
// Sync the running pod to match the specified desired pod.
|
2015-08-13 12:59:15 +00:00
|
|
|
func (dm *DockerManager) SyncPod(pod *api.Pod, runningPod kubecontainer.Pod, podStatus api.PodStatus, pullSecrets []api.Secret, backOff *util.Backoff) error {
|
2015-06-09 21:01:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
metrics.ContainerManagerLatency.WithLabelValues("SyncPod").Observe(metrics.SinceInMicroseconds(start))
|
|
|
|
}()
|
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
podFullName := kubecontainer.GetPodFullName(pod)
|
|
|
|
containerChanges, err := dm.computePodContainerChanges(pod, runningPod, podStatus)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-08-20 01:57:58 +00:00
|
|
|
glog.V(3).Infof("Got container changes for pod %q: %+v", podFullName, containerChanges)
|
2015-05-01 01:37:15 +00:00
|
|
|
|
|
|
|
if containerChanges.StartInfraContainer || (len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0) {
|
|
|
|
if len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0 {
|
|
|
|
glog.V(4).Infof("Killing Infra Container for %q because all other containers are dead.", podFullName)
|
|
|
|
} else {
|
|
|
|
glog.V(4).Infof("Killing Infra Container for %q, will start new one", podFullName)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
|
2015-08-20 01:57:58 +00:00
|
|
|
err = dm.KillPod(pod, runningPod)
|
2015-05-01 01:37:15 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Otherwise kill any containers in this pod which are not specified as ones to keep.
|
|
|
|
for _, container := range runningPod.Containers {
|
2015-10-09 17:24:31 +00:00
|
|
|
_, keep := containerChanges.ContainersToKeep[kubetypes.DockerID(container.ID.ID)]
|
2015-05-01 01:37:15 +00:00
|
|
|
if !keep {
|
|
|
|
glog.V(3).Infof("Killing unwanted container %+v", container)
|
2015-08-20 01:57:58 +00:00
|
|
|
// attempt to find the appropriate container policy
|
|
|
|
var podContainer *api.Container
|
|
|
|
for i, c := range pod.Spec.Containers {
|
|
|
|
if c.Name == container.Name {
|
|
|
|
podContainer = &pod.Spec.Containers[i]
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
err = dm.KillContainerInPod(container.ID, podContainer, pod)
|
2015-05-01 01:37:15 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Error killing container: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we should create infra container then we do it first.
|
|
|
|
podInfraContainerID := containerChanges.InfraContainerId
|
|
|
|
if containerChanges.StartInfraContainer && (len(containerChanges.ContainersToStart) > 0) {
|
|
|
|
glog.V(4).Infof("Creating pod infra container for %q", podFullName)
|
2015-09-08 07:34:10 +00:00
|
|
|
podInfraContainerID, err = dm.createPodInfraContainer(pod)
|
2015-10-06 13:56:00 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to create pod infra container: %v; Skipping pod %q", err, podFullName)
|
|
|
|
return err
|
|
|
|
}
|
2015-09-08 07:34:10 +00:00
|
|
|
|
|
|
|
// Call the networking plugin
|
2015-10-06 13:56:00 +00:00
|
|
|
err = dm.networkPlugin.SetUpPod(pod.Namespace, pod.Name, podInfraContainerID)
|
2015-05-01 01:37:15 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to create pod infra container: %v; Skipping pod %q", err, podFullName)
|
2015-10-06 13:56:00 +00:00
|
|
|
// Delete infra container
|
|
|
|
if delErr := dm.KillContainerInPod(kubecontainer.ContainerID{
|
|
|
|
ID: string(podInfraContainerID),
|
|
|
|
Type: "docker"}, nil, pod); delErr != nil {
|
|
|
|
glog.Warningf("Clear infra container failed for pod %q: %v", podFullName, delErr)
|
|
|
|
}
|
2015-05-01 01:37:15 +00:00
|
|
|
return err
|
|
|
|
}
|
2015-09-06 11:53:20 +00:00
|
|
|
|
|
|
|
// Setup the host interface (FIXME: move to networkPlugin when ready)
|
|
|
|
podInfraContainer, err := dm.client.InspectContainer(string(podInfraContainerID))
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to inspect pod infra container: %v; Skipping pod %q", err, podFullName)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err = hairpin.SetUpContainer(podInfraContainer.State.Pid, "eth0"); err != nil {
|
|
|
|
glog.Warningf("Hairpin setup failed for pod %q: %v", podFullName, err)
|
|
|
|
}
|
2015-05-01 01:37:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Start everything
|
2015-05-08 18:54:44 +00:00
|
|
|
for idx := range containerChanges.ContainersToStart {
|
|
|
|
container := &pod.Spec.Containers[idx]
|
2015-08-13 12:59:15 +00:00
|
|
|
|
|
|
|
// containerChanges.StartInfraContainer causes the containers to be restarted for config reasons
|
|
|
|
// ignore backoff
|
|
|
|
if !containerChanges.StartInfraContainer && dm.doBackOff(pod, container, podStatus, backOff) {
|
|
|
|
glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, podFullName)
|
|
|
|
continue
|
|
|
|
}
|
2015-06-03 14:56:16 +00:00
|
|
|
glog.V(4).Infof("Creating container %+v in pod %v", container, podFullName)
|
2015-10-02 13:45:46 +00:00
|
|
|
err, msg := dm.imagePuller.PullImage(pod, container, pullSecrets)
|
2015-05-08 18:54:44 +00:00
|
|
|
if err != nil {
|
2015-10-02 13:45:46 +00:00
|
|
|
dm.updateReasonCache(pod, container, err.Error(), errors.New(msg))
|
2015-05-01 01:37:15 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-05-08 18:54:44 +00:00
|
|
|
|
2015-08-10 17:30:34 +00:00
|
|
|
if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot {
|
|
|
|
err := dm.verifyNonRoot(container)
|
2015-09-16 14:07:25 +00:00
|
|
|
dm.updateReasonCache(pod, container, "VerifyNonRootError", err)
|
2015-08-10 17:30:34 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-01 01:37:15 +00:00
|
|
|
// TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container
|
2015-09-21 15:34:02 +00:00
|
|
|
// Note: when configuring the pod's containers anything that can be configured by pointing
|
|
|
|
// to the namespace of the infra container should use namespaceMode. This includes things like the net namespace
|
|
|
|
// and IPC namespace. PID mode cannot point to another container right now.
|
|
|
|
// See createPodInfraContainer for infra container setup.
|
2015-05-01 01:37:15 +00:00
|
|
|
namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID)
|
2015-09-21 15:34:02 +00:00
|
|
|
_, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, getPidMode(pod))
|
2015-10-02 13:45:46 +00:00
|
|
|
dm.updateReasonCache(pod, container, kubecontainer.ErrRunContainer.Error(), err)
|
2015-05-01 01:37:15 +00:00
|
|
|
if err != nil {
|
|
|
|
// TODO(bburns) : Perhaps blacklist a container after N failures?
|
2015-05-08 18:54:44 +00:00
|
|
|
glog.Errorf("Error running pod %q container %q: %v", kubecontainer.GetPodFullName(pod), container.Name, err)
|
|
|
|
continue
|
2015-05-01 01:37:15 +00:00
|
|
|
}
|
2015-05-08 18:54:44 +00:00
|
|
|
// Successfully started the container; clear the entry in the failure
|
|
|
|
// reason cache.
|
|
|
|
dm.clearReasonCache(pod, container)
|
2015-05-01 01:37:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2015-08-10 17:30:34 +00:00
|
|
|
|
|
|
|
// verifyNonRoot returns an error if the container or image will run as the root user.
|
|
|
|
func (dm *DockerManager) verifyNonRoot(container *api.Container) error {
|
|
|
|
if securitycontext.HasRunAsUser(container) {
|
|
|
|
if securitycontext.HasRootRunAsUser(container) {
|
|
|
|
return fmt.Errorf("container's runAsUser breaks non-root policy")
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
imgRoot, err := dm.isImageRoot(container.Image)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if imgRoot {
|
|
|
|
return fmt.Errorf("container has no runAsUser and image will run as root")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// isImageRoot returns true if the user directive is not set on the image, the user is set to 0
|
|
|
|
// or the user is set to root. If there is an error inspecting the image this method will return
|
|
|
|
// false and return the error.
|
|
|
|
func (dm *DockerManager) isImageRoot(image string) (bool, error) {
|
|
|
|
img, err := dm.client.InspectImage(image)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
if img == nil || img.Config == nil {
|
|
|
|
return false, fmt.Errorf("unable to inspect image %s, nil Config", image)
|
|
|
|
}
|
|
|
|
|
|
|
|
user := getUidFromUser(img.Config.User)
|
|
|
|
// if no user is defined container will run as root
|
|
|
|
if user == "" {
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
// do not allow non-numeric user directives
|
|
|
|
uid, err := strconv.Atoi(user)
|
|
|
|
if err != nil {
|
|
|
|
return false, fmt.Errorf("unable to validate image is non-root, non-numeric user (%s) is not allowed", user)
|
|
|
|
}
|
|
|
|
// user is numeric, check for 0
|
|
|
|
return uid == 0, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getUidFromUser splits the uid out of a uid:gid string.
|
|
|
|
func getUidFromUser(id string) string {
|
|
|
|
if id == "" {
|
|
|
|
return id
|
|
|
|
}
|
|
|
|
// split instances where the id may contain uid:gid
|
|
|
|
if strings.Contains(id, ":") {
|
|
|
|
return strings.Split(id, ":")[0]
|
|
|
|
}
|
|
|
|
// no gid, just return the id
|
|
|
|
return id
|
|
|
|
}
|
2015-08-13 12:59:15 +00:00
|
|
|
|
|
|
|
func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podStatus api.PodStatus, backOff *util.Backoff) bool {
|
2015-09-17 22:21:55 +00:00
|
|
|
var ts unversioned.Time
|
2015-08-13 12:59:15 +00:00
|
|
|
for _, containerStatus := range podStatus.ContainerStatuses {
|
|
|
|
if containerStatus.Name != container.Name {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// first failure
|
2015-10-06 04:51:07 +00:00
|
|
|
if containerStatus.State.Terminated != nil && !containerStatus.State.Terminated.FinishedAt.IsZero() {
|
2015-08-13 12:59:15 +00:00
|
|
|
ts = containerStatus.State.Terminated.FinishedAt
|
|
|
|
break
|
|
|
|
}
|
|
|
|
// state is waiting and the failure timestamp is in LastTerminationState
|
|
|
|
if (containerStatus.State.Waiting != nil) && (containerStatus.LastTerminationState.Terminated != nil) {
|
|
|
|
ts = containerStatus.LastTerminationState.Terminated.FinishedAt
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// found a container that requires backoff
|
|
|
|
if !ts.IsZero() {
|
|
|
|
dockerName := KubeletContainerName{
|
|
|
|
PodFullName: kubecontainer.GetPodFullName(pod),
|
|
|
|
PodUID: pod.UID,
|
|
|
|
ContainerName: container.Name,
|
|
|
|
}
|
|
|
|
stableName, _ := BuildDockerName(dockerName, container)
|
|
|
|
if backOff.IsInBackOffSince(stableName, ts.Time) {
|
|
|
|
if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
|
|
|
|
dm.recorder.Eventf(ref, "Backoff", "Back-off restarting failed docker container")
|
|
|
|
}
|
2015-09-16 14:07:25 +00:00
|
|
|
err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, kubecontainer.GetPodFullName(pod))
|
|
|
|
dm.updateReasonCache(pod, container, kubecontainer.ErrCrashLoopBackOff.Error(), err)
|
|
|
|
glog.Infof("%s", err.Error())
|
2015-08-13 12:59:15 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
backOff.Next(stableName, ts.Time)
|
|
|
|
}
|
|
|
|
dm.clearReasonCache(pod, container)
|
|
|
|
return false
|
|
|
|
}
|
2015-09-15 16:43:59 +00:00
|
|
|
|
|
|
|
// getPidMode returns the pid mode to use on the docker container based on pod.Spec.HostPID.
|
|
|
|
func getPidMode(pod *api.Pod) string {
|
|
|
|
pidMode := ""
|
2015-09-14 21:56:51 +00:00
|
|
|
if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostPID {
|
2015-09-15 16:43:59 +00:00
|
|
|
pidMode = "host"
|
|
|
|
}
|
|
|
|
return pidMode
|
|
|
|
}
|
2015-08-10 08:14:01 +00:00
|
|
|
|
|
|
|
// getIPCMode returns the ipc mode to use on the docker container based on pod.Spec.HostIPC.
|
2015-09-21 15:34:02 +00:00
|
|
|
func getIPCMode(pod *api.Pod) string {
|
|
|
|
ipcMode := ""
|
2015-09-14 21:56:51 +00:00
|
|
|
if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostIPC {
|
2015-08-10 08:14:01 +00:00
|
|
|
ipcMode = "host"
|
|
|
|
}
|
|
|
|
return ipcMode
|
|
|
|
}
|
2015-09-09 21:00:41 +00:00
|
|
|
|
|
|
|
// GetNetNs returns the network namespace path for the given container
|
2015-10-07 17:58:05 +00:00
|
|
|
func (dm *DockerManager) GetNetNs(containerID kubecontainer.ContainerID) (string, error) {
|
|
|
|
inspectResult, err := dm.client.InspectContainer(containerID.ID)
|
2015-09-09 21:00:41 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Error inspecting container: '%v'", err)
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
netnsPath := fmt.Sprintf(DockerNetnsFmt, inspectResult.State.Pid)
|
|
|
|
return netnsPath, nil
|
|
|
|
}
|
2015-10-03 15:39:15 +00:00
|
|
|
|
|
|
|
// Garbage collection of dead containers
|
2015-10-05 22:35:32 +00:00
|
|
|
func (dm *DockerManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy) error {
|
|
|
|
return dm.containerGC.GarbageCollect(gcPolicy)
|
2015-10-03 15:39:15 +00:00
|
|
|
}
|