2015-04-02 20:14:52 +00:00
/ *
2015-05-01 16:19:44 +00:00
Copyright 2015 The Kubernetes Authors All rights reserved .
2015-04-02 20:14:52 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package dockertools
import (
2015-04-22 17:20:05 +00:00
"bytes"
2015-04-02 20:14:52 +00:00
"errors"
"fmt"
"io"
"io/ioutil"
"os"
2015-04-22 17:20:05 +00:00
"os/exec"
2015-04-02 20:14:52 +00:00
"path"
2016-02-08 17:40:19 +00:00
"regexp"
2015-04-02 20:14:52 +00:00
"strconv"
"strings"
2015-04-09 18:57:53 +00:00
"sync"
2015-05-08 16:48:31 +00:00
"time"
2015-04-02 20:14:52 +00:00
2016-01-22 20:54:23 +00:00
"github.com/coreos/go-semver/semver"
2016-04-04 08:56:49 +00:00
dockertypes "github.com/docker/engine-api/types"
2016-04-14 17:36:13 +00:00
dockercontainer "github.com/docker/engine-api/types/container"
dockerstrslice "github.com/docker/engine-api/types/strslice"
dockernat "github.com/docker/go-connections/nat"
2015-08-05 22:05:17 +00:00
docker "github.com/fsouza/go-dockerclient"
"github.com/golang/glog"
2015-10-16 03:00:28 +00:00
cadvisorapi "github.com/google/cadvisor/info/v1"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2015-09-17 22:21:55 +00:00
"k8s.io/kubernetes/pkg/api/unversioned"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/record"
2015-08-05 22:03:47 +00:00
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/network"
2015-09-06 11:53:20 +00:00
"k8s.io/kubernetes/pkg/kubelet/network/hairpin"
2015-10-19 22:15:59 +00:00
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
2015-08-04 00:28:33 +00:00
"k8s.io/kubernetes/pkg/kubelet/qos"
2015-10-09 17:24:31 +00:00
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
2016-03-14 08:35:49 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/cache"
2015-12-07 21:31:02 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/format"
2016-01-22 05:11:30 +00:00
"k8s.io/kubernetes/pkg/runtime"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/securitycontext"
"k8s.io/kubernetes/pkg/types"
2016-03-09 02:58:24 +00:00
"k8s.io/kubernetes/pkg/util/flowcontrol"
2015-08-04 00:28:33 +00:00
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/procfs"
2016-01-15 07:32:10 +00:00
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
2016-01-11 07:55:51 +00:00
utilstrings "k8s.io/kubernetes/pkg/util/strings"
2015-04-09 18:57:53 +00:00
)
const (
2015-10-21 20:04:10 +00:00
DockerType = "docker"
2016-03-30 01:10:31 +00:00
minimumDockerAPIVersion = "1.20"
2015-10-21 20:04:10 +00:00
2016-01-26 15:03:37 +00:00
dockerv110APIVersion = "1.21"
2015-06-23 23:36:06 +00:00
// ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
// we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
// hence, setting ndots to be 5.
ndotsDNSOption = "options ndots:5\n"
2015-08-20 01:57:58 +00:00
// In order to avoid unnecessary SIGKILLs, give every container a minimum grace
// period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
// potentially dangerous.
// TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
// SIGTERM for certain process types, which may justify setting this to 0.
minimumGracePeriodInSeconds = 2
2015-09-09 21:00:41 +00:00
DockerNetnsFmt = "/proc/%v/ns/net"
2016-01-22 21:14:41 +00:00
// String used to detect docker host mode for various namespaces (e.g.
// networking). Must match the value returned by docker inspect -f
// '{{.HostConfig.NetworkMode}}'.
namespaceModeHost = "host"
2016-02-23 21:27:28 +00:00
// Remote API version for docker daemon version v1.10
// https://docs.docker.com/engine/reference/api/docker_remote_api/
dockerV110APIVersion = "1.22"
2015-04-02 20:14:52 +00:00
)
2016-02-23 21:27:28 +00:00
var (
// DockerManager implements the Runtime interface.
_ kubecontainer . Runtime = & DockerManager { }
2015-04-30 17:12:23 +00:00
2016-02-23 21:27:28 +00:00
// TODO: make this a TTL based pull (if image older than X policy, pull)
podInfraContainerImagePullPolicy = api . PullIfNotPresent
// Default set of security options. Seccomp is disabled by default until
// github issue #20870 is resolved.
defaultSecurityOpt = [ ] string { "seccomp:unconfined" }
)
2015-06-09 00:53:24 +00:00
2015-04-02 20:14:52 +00:00
type DockerManager struct {
2015-04-23 21:16:59 +00:00
client DockerInterface
recorder record . EventRecorder
containerRefManager * kubecontainer . RefManager
2015-04-27 20:03:55 +00:00
os kubecontainer . OSInterface
2015-10-16 03:00:28 +00:00
machineInfo * cadvisorapi . MachineInfo
2015-04-23 21:16:59 +00:00
2015-06-04 21:36:59 +00:00
// The image name of the pod infra container.
podInfraContainerImage string
2016-02-05 15:47:06 +00:00
// (Optional) Additional environment variables to be set for the pod infra container.
podInfraContainerEnv [ ] api . EnvVar
2016-01-27 08:08:49 +00:00
2016-01-27 01:46:15 +00:00
// TODO(yifan): Record the pull failure so we can eliminate the image checking?
2015-08-10 17:28:39 +00:00
// Lower level docker image puller.
dockerPuller DockerPuller
// wrapped image puller.
imagePuller kubecontainer . ImagePuller
2015-04-27 20:03:55 +00:00
// Root of the Docker runtime.
dockerRoot string
// Directory of container logs.
containerLogsDir string
2015-04-28 18:02:29 +00:00
// Network plugin.
networkPlugin network . NetworkPlugin
2015-04-29 00:51:21 +00:00
2015-10-19 22:15:59 +00:00
// Health check results.
livenessManager proberesults . Manager
2015-05-01 01:37:15 +00:00
2016-01-28 23:57:38 +00:00
// RuntimeHelper that wraps kubelet to generate runtime container options.
runtimeHelper kubecontainer . RuntimeHelper
2015-05-01 01:37:15 +00:00
// Runner of lifecycle events.
runner kubecontainer . HandlerRunner
2015-05-27 12:51:01 +00:00
// Handler used to execute commands in containers.
execHandler ExecHandler
2015-08-04 00:28:33 +00:00
// Used to set OOM scores of processes.
2015-09-28 08:00:43 +00:00
oomAdjuster * oom . OOMAdjuster
2015-08-04 00:28:33 +00:00
// Get information from /proc mount.
2015-11-13 23:47:25 +00:00
procFs procfs . ProcFSInterface
2015-09-01 13:27:01 +00:00
// If true, enforce container cpu limits with CFS quota support
cpuCFSQuota bool
2015-10-03 15:39:15 +00:00
// Container GC manager
containerGC * containerGC
2016-01-21 16:44:28 +00:00
// Support for gathering custom metrics.
enableCustomMetrics bool
2016-02-08 18:52:30 +00:00
// If true, the "hairpin mode" flag is set on container interfaces.
// A false value means the kubelet just backs off from setting it,
// it might already be true.
configureHairpinMode bool
2016-02-26 09:06:26 +00:00
// The api version cache of docker daemon.
2016-03-14 08:35:49 +00:00
versionCache * cache . VersionCache
2015-04-02 20:14:52 +00:00
}
2016-03-02 21:29:53 +00:00
// A subset of the pod.Manager interface extracted for testing purposes.
type podGetter interface {
GetPodByUID ( types . UID ) ( * api . Pod , bool )
}
2016-02-05 15:47:06 +00:00
func PodInfraContainerEnv ( env map [ string ] string ) kubecontainer . Option {
return func ( rt kubecontainer . Runtime ) {
dm := rt . ( * DockerManager )
for k , v := range env {
dm . podInfraContainerEnv = append ( dm . podInfraContainerEnv , api . EnvVar {
Name : k ,
Value : v ,
} )
}
}
}
2015-04-23 21:16:59 +00:00
func NewDockerManager (
client DockerInterface ,
recorder record . EventRecorder ,
2015-10-19 22:15:59 +00:00
livenessManager proberesults . Manager ,
2015-04-23 21:16:59 +00:00
containerRefManager * kubecontainer . RefManager ,
2016-03-02 21:29:53 +00:00
podGetter podGetter ,
2015-10-16 03:00:28 +00:00
machineInfo * cadvisorapi . MachineInfo ,
2015-04-23 21:16:59 +00:00
podInfraContainerImage string ,
qps float32 ,
2015-04-27 20:03:55 +00:00
burst int ,
containerLogsDir string ,
2015-04-28 18:02:29 +00:00
osInterface kubecontainer . OSInterface ,
2015-04-29 00:51:21 +00:00
networkPlugin network . NetworkPlugin ,
2016-01-28 23:57:38 +00:00
runtimeHelper kubecontainer . RuntimeHelper ,
2015-10-09 17:24:31 +00:00
httpClient kubetypes . HttpGetter ,
2015-08-04 00:28:33 +00:00
execHandler ExecHandler ,
2015-09-28 08:00:43 +00:00
oomAdjuster * oom . OOMAdjuster ,
2015-11-13 23:47:25 +00:00
procFs procfs . ProcFSInterface ,
2015-10-02 13:45:46 +00:00
cpuCFSQuota bool ,
2016-03-09 02:58:24 +00:00
imageBackOff * flowcontrol . Backoff ,
2016-01-21 16:44:28 +00:00
serializeImagePulls bool ,
2016-02-05 15:47:06 +00:00
enableCustomMetrics bool ,
2016-02-08 18:52:30 +00:00
hairpinMode bool ,
2016-02-05 15:47:06 +00:00
options ... kubecontainer . Option ) * DockerManager {
2016-03-25 18:57:14 +00:00
// Wrap the docker client with instrumentedDockerInterface
client = newInstrumentedDockerInterface ( client )
2015-10-02 13:45:46 +00:00
2015-04-27 20:03:55 +00:00
// Work out the location of the Docker runtime, defaulting to /var/lib/docker
// if there are any problems.
dockerRoot := "/var/lib/docker"
dockerInfo , err := client . Info ( )
if err != nil {
glog . Errorf ( "Failed to execute Info() call to the Docker client: %v" , err )
glog . Warningf ( "Using fallback default of /var/lib/docker for location of Docker runtime" )
} else {
2015-12-17 18:24:30 +00:00
dockerRoot = dockerInfo . Get ( "DockerRootDir" )
glog . Infof ( "Setting dockerRoot to %s" , dockerRoot )
2015-04-27 20:03:55 +00:00
}
2015-05-01 01:37:15 +00:00
dm := & DockerManager {
2015-08-04 00:28:33 +00:00
client : client ,
recorder : recorder ,
containerRefManager : containerRefManager ,
os : osInterface ,
machineInfo : machineInfo ,
2015-06-04 21:36:59 +00:00
podInfraContainerImage : podInfraContainerImage ,
2015-08-10 17:28:39 +00:00
dockerPuller : newDockerPuller ( client , qps , burst ) ,
2015-04-27 20:03:55 +00:00
dockerRoot : dockerRoot ,
containerLogsDir : containerLogsDir ,
2015-04-28 18:02:29 +00:00
networkPlugin : networkPlugin ,
2015-10-19 22:15:59 +00:00
livenessManager : livenessManager ,
2016-01-28 23:57:38 +00:00
runtimeHelper : runtimeHelper ,
2015-05-27 12:51:01 +00:00
execHandler : execHandler ,
2015-08-04 00:28:33 +00:00
oomAdjuster : oomAdjuster ,
procFs : procFs ,
2015-09-01 13:27:01 +00:00
cpuCFSQuota : cpuCFSQuota ,
2016-01-21 16:44:28 +00:00
enableCustomMetrics : enableCustomMetrics ,
2016-02-08 18:52:30 +00:00
configureHairpinMode : hairpinMode ,
2015-04-13 19:24:01 +00:00
}
2015-05-01 01:37:15 +00:00
dm . runner = lifecycle . NewHandlerRunner ( httpClient , dm , dm )
2015-10-20 21:49:44 +00:00
if serializeImagePulls {
2015-10-26 07:18:45 +00:00
dm . imagePuller = kubecontainer . NewSerializedImagePuller ( kubecontainer . FilterEventRecorder ( recorder ) , dm , imageBackOff )
2015-10-20 21:49:44 +00:00
} else {
2015-10-26 07:18:45 +00:00
dm . imagePuller = kubecontainer . NewImagePuller ( kubecontainer . FilterEventRecorder ( recorder ) , dm , imageBackOff )
2015-10-20 21:49:44 +00:00
}
2016-03-02 21:29:53 +00:00
dm . containerGC = NewContainerGC ( client , podGetter , containerLogsDir )
2015-05-01 22:25:11 +00:00
2016-02-05 15:47:06 +00:00
// apply optional settings..
for _ , optf := range options {
optf ( dm )
}
2015-05-01 01:37:15 +00:00
return dm
2015-04-09 18:57:53 +00:00
}
2015-04-29 03:25:25 +00:00
// GetContainerLogs returns logs of a specific container. By
// default, it returns a snapshot of the container log. Set 'follow' to true to
// stream the log. Set 'follow' to false and specify the number of lines (e.g.
2015-04-02 20:14:52 +00:00
// "100" or "all") to tail the log.
// TODO: Make 'RawTerminal' option flagable.
2015-10-07 17:58:05 +00:00
func ( dm * DockerManager ) GetContainerLogs ( pod * api . Pod , containerID kubecontainer . ContainerID , logOptions * api . PodLogOptions , stdout , stderr io . Writer ) ( err error ) {
2015-09-10 03:46:11 +00:00
var since int64
if logOptions . SinceSeconds != nil {
t := unversioned . Now ( ) . Add ( - time . Duration ( * logOptions . SinceSeconds ) * time . Second )
since = t . Unix ( )
}
if logOptions . SinceTime != nil {
since = logOptions . SinceTime . Unix ( )
}
2015-04-02 20:14:52 +00:00
opts := docker . LogsOptions {
2015-10-07 17:58:05 +00:00
Container : containerID . ID ,
2015-04-02 20:14:52 +00:00
Stdout : true ,
Stderr : true ,
OutputStream : stdout ,
ErrorStream : stderr ,
2015-09-10 03:46:11 +00:00
Timestamps : logOptions . Timestamps ,
Since : since ,
Follow : logOptions . Follow ,
2015-04-02 20:14:52 +00:00
RawTerminal : false ,
}
2015-10-29 21:12:16 +00:00
if logOptions . TailLines != nil {
2015-09-10 03:46:11 +00:00
opts . Tail = strconv . FormatInt ( * logOptions . TailLines , 10 )
2015-04-02 20:14:52 +00:00
}
2015-04-20 03:26:07 +00:00
err = dm . client . Logs ( opts )
2015-04-02 20:14:52 +00:00
return
}
var (
// ErrNoContainersInPod is returned when there are no containers for a given pod
2015-09-08 16:50:19 +00:00
ErrNoContainersInPod = errors . New ( "NoContainersInPod" )
2015-04-02 20:14:52 +00:00
// ErrNoPodInfraContainerInPod is returned when there is no pod infra container for a given pod
2015-09-08 16:50:19 +00:00
ErrNoPodInfraContainerInPod = errors . New ( "NoPodInfraContainerInPod" )
2015-04-02 20:14:52 +00:00
// ErrContainerCannotRun is returned when a container is created, but cannot run properly
2015-09-08 16:50:19 +00:00
ErrContainerCannotRun = errors . New ( "ContainerCannotRun" )
2015-04-02 20:14:52 +00:00
)
2015-10-13 19:51:37 +00:00
// determineContainerIP determines the IP address of the given container. It is expected
// that the container passed is the infrastructure container of a pod and the responsibility
// of the caller to ensure that the correct container is passed.
2016-04-04 22:27:20 +00:00
func ( dm * DockerManager ) determineContainerIP ( podNamespace , podName string , container * dockertypes . ContainerJSON ) string {
2015-10-13 19:51:37 +00:00
result := ""
if container . NetworkSettings != nil {
result = container . NetworkSettings . IPAddress
}
if dm . networkPlugin . Name ( ) != network . DefaultPluginName {
2016-04-06 20:45:16 +00:00
netStatus , err := dm . networkPlugin . Status ( podNamespace , podName , kubecontainer . DockerID ( container . ID ) . ContainerID ( ) )
2015-10-13 19:51:37 +00:00
if err != nil {
glog . Errorf ( "NetworkPlugin %s failed on the status hook for pod '%s' - %v" , dm . networkPlugin . Name ( ) , podName , err )
} else if netStatus != nil {
result = netStatus . IP . String ( )
}
}
return result
}
2015-12-05 00:06:25 +00:00
func ( dm * DockerManager ) inspectContainer ( id string , podName , podNamespace string ) ( * kubecontainer . ContainerStatus , string , error ) {
var ip string
iResult , err := dm . client . InspectContainer ( id )
2015-04-02 20:14:52 +00:00
if err != nil {
2015-12-05 00:06:25 +00:00
return nil , ip , err
2015-04-02 20:14:52 +00:00
}
2015-12-05 00:06:25 +00:00
glog . V ( 4 ) . Infof ( "Container inspect result: %+v" , * iResult )
2015-04-02 20:14:52 +00:00
2015-12-05 00:06:25 +00:00
// TODO: Get k8s container name by parsing the docker name. This will be
// replaced by checking docker labels eventually.
dockerName , hash , err := ParseDockerName ( iResult . Name )
if err != nil {
return nil , ip , fmt . Errorf ( "Unable to parse docker name %q" , iResult . Name )
}
containerName := dockerName . ContainerName
2015-10-23 20:02:32 +00:00
2015-11-13 19:37:33 +00:00
var containerInfo * labelledContainerInfo
2015-12-30 07:46:52 +00:00
containerInfo = getContainerInfoFromLabel ( iResult . Config . Labels )
2015-10-23 20:02:32 +00:00
2016-04-04 22:27:20 +00:00
parseTimestampError := func ( label , s string ) {
glog . Errorf ( "Failed to parse %q timestamp %q for container %q of pod %q" , label , s , id , kubecontainer . BuildPodFullName ( podName , podNamespace ) )
}
var createdAt , startedAt , finishedAt time . Time
if createdAt , err = parseDockerTimestamp ( iResult . Created ) ; err != nil {
parseTimestampError ( "Created" , iResult . Created )
}
if startedAt , err = parseDockerTimestamp ( iResult . State . StartedAt ) ; err != nil {
parseTimestampError ( "StartedAt" , iResult . State . StartedAt )
}
if finishedAt , err = parseDockerTimestamp ( iResult . State . FinishedAt ) ; err != nil {
parseTimestampError ( "FinishedAt" , iResult . State . FinishedAt )
}
2015-12-05 00:06:25 +00:00
status := kubecontainer . ContainerStatus {
2015-10-23 20:02:32 +00:00
Name : containerName ,
2015-11-13 19:37:33 +00:00
RestartCount : containerInfo . RestartCount ,
2015-12-05 00:06:25 +00:00
Image : iResult . Config . Image ,
ImageID : DockerPrefix + iResult . Image ,
2015-12-24 23:46:56 +00:00
ID : kubecontainer . DockerID ( id ) . ContainerID ( ) ,
2015-12-05 00:06:25 +00:00
ExitCode : iResult . State . ExitCode ,
2016-04-04 22:27:20 +00:00
CreatedAt : createdAt ,
2015-12-05 00:06:25 +00:00
Hash : hash ,
}
if iResult . State . Running {
2016-03-05 10:40:25 +00:00
// Container that are running, restarting and paused
2015-12-05 00:06:25 +00:00
status . State = kubecontainer . ContainerStateRunning
2016-04-04 22:27:20 +00:00
status . StartedAt = startedAt
2015-06-09 00:51:57 +00:00
if containerName == PodInfraContainerName {
2015-12-05 00:06:25 +00:00
ip = dm . determineContainerIP ( podNamespace , podName , iResult )
2015-04-02 20:14:52 +00:00
}
2015-12-05 00:06:25 +00:00
return & status , ip , nil
}
// Find containers that have exited or failed to start.
2016-04-04 22:27:20 +00:00
if ! finishedAt . IsZero ( ) || iResult . State . ExitCode != 0 {
2016-03-05 10:40:25 +00:00
// Containers that are exited, dead or created (docker failed to start container)
2015-10-06 04:51:07 +00:00
// When a container fails to start State.ExitCode is non-zero, FinishedAt and StartedAt are both zero
2015-04-02 20:14:52 +00:00
reason := ""
2015-12-05 00:06:25 +00:00
message := iResult . State . Error
2015-10-06 04:51:07 +00:00
2015-04-02 20:14:52 +00:00
// Note: An application might handle OOMKilled gracefully.
// In that case, the container is oom killed, but the exit
// code could be 0.
2015-12-05 00:06:25 +00:00
if iResult . State . OOMKilled {
2015-09-08 16:50:19 +00:00
reason = "OOMKilled"
2015-12-05 00:06:25 +00:00
} else if iResult . State . ExitCode == 0 {
2015-10-06 04:51:07 +00:00
reason = "Completed"
2016-04-04 22:27:20 +00:00
} else if ! finishedAt . IsZero ( ) {
2015-09-08 16:50:19 +00:00
reason = "Error"
2015-10-06 04:51:07 +00:00
} else {
// finishedAt is zero and ExitCode is nonZero occurs when docker fails to start the container
reason = ErrContainerCannotRun . Error ( )
// Adjust time to the time docker attempted to run the container, otherwise startedAt and finishedAt will be set to epoch, which is misleading
2016-04-04 22:27:20 +00:00
finishedAt = createdAt
startedAt = createdAt
2015-04-02 20:14:52 +00:00
}
2015-12-05 00:06:25 +00:00
2015-11-13 19:37:33 +00:00
terminationMessagePath := containerInfo . TerminationMessagePath
2015-11-02 22:32:58 +00:00
if terminationMessagePath != "" {
2016-03-30 23:03:49 +00:00
for _ , mount := range iResult . Mounts {
if mount . Destination == terminationMessagePath {
path := mount . Source
if data , err := ioutil . ReadFile ( path ) ; err != nil {
message = fmt . Sprintf ( "Error on reading termination-log %s: %v" , path , err )
} else {
message = string ( data )
}
2015-04-02 20:14:52 +00:00
}
}
}
2015-12-05 00:06:25 +00:00
status . State = kubecontainer . ContainerStateExited
status . Message = message
status . Reason = reason
status . StartedAt = startedAt
status . FinishedAt = finishedAt
} else {
2016-03-05 10:40:25 +00:00
// Non-running containers that are created (not yet started or kubelet failed before calling
// start container function etc.) Kubelet doesn't handle these scenarios yet.
2015-12-05 00:06:25 +00:00
status . State = kubecontainer . ContainerStateUnknown
2015-04-02 20:14:52 +00:00
}
2015-12-05 00:06:25 +00:00
return & status , "" , nil
2015-04-02 20:14:52 +00:00
}
2015-05-12 21:49:35 +00:00
// makeEnvList converts EnvVar list to a list of strings, in the form of
// '<key>=<value>', which can be understood by docker.
func makeEnvList ( envs [ ] kubecontainer . EnvVar ) ( result [ ] string ) {
for _ , env := range envs {
result = append ( result , fmt . Sprintf ( "%s=%s" , env . Name , env . Value ) )
}
return
}
// makeMountBindings converts the mount list to a list of strings that
// can be understood by docker.
// Each element in the string is in the form of:
// '<HostPath>:<ContainerPath>', or
2015-10-07 19:19:06 +00:00
// '<HostPath>:<ContainerPath>:ro', if the path is read only, or
// '<HostPath>:<ContainerPath>:Z', if the volume requires SELinux
// relabeling and the pod provides an SELinux label
func makeMountBindings ( mounts [ ] kubecontainer . Mount , podHasSELinuxLabel bool ) ( result [ ] string ) {
2015-05-12 21:49:35 +00:00
for _ , m := range mounts {
bind := fmt . Sprintf ( "%s:%s" , m . HostPath , m . ContainerPath )
if m . ReadOnly {
bind += ":ro"
}
2015-10-07 19:19:06 +00:00
// Only request relabeling if the pod provides an
// SELinux context. If the pod does not provide an
// SELinux context relabeling will label the volume
// with the container's randomly allocated MCS label.
// This would restrict access to the volume to the
// container which mounts it first.
if m . SELinuxRelabel && podHasSELinuxLabel {
if m . ReadOnly {
bind += ",Z"
} else {
bind += ":Z"
}
}
2015-05-12 21:49:35 +00:00
result = append ( result , bind )
}
return
}
2016-04-14 17:36:13 +00:00
func makePortsAndBindings ( portMappings [ ] kubecontainer . PortMapping ) ( map [ dockernat . Port ] struct { } , map [ dockernat . Port ] [ ] dockernat . PortBinding ) {
exposedPorts := map [ dockernat . Port ] struct { } { }
portBindings := map [ dockernat . Port ] [ ] dockernat . PortBinding { }
2015-05-12 21:49:35 +00:00
for _ , port := range portMappings {
exteriorPort := port . HostPort
if exteriorPort == 0 {
// No need to do port binding when HostPort is not specified
continue
}
interiorPort := port . ContainerPort
// Some of this port stuff is under-documented voodoo.
// See http://stackoverflow.com/questions/20428302/binding-a-port-to-a-host-interface-using-the-rest-api
var protocol string
switch strings . ToUpper ( string ( port . Protocol ) ) {
case "UDP" :
protocol = "/udp"
case "TCP" :
protocol = "/tcp"
default :
glog . Warningf ( "Unknown protocol %q: defaulting to TCP" , port . Protocol )
protocol = "/tcp"
}
2015-08-31 12:53:02 +00:00
2016-04-14 17:36:13 +00:00
dockerPort := dockernat . Port ( strconv . Itoa ( interiorPort ) + protocol )
2015-05-12 21:49:35 +00:00
exposedPorts [ dockerPort ] = struct { } { }
2015-08-31 12:53:02 +00:00
2016-04-14 17:36:13 +00:00
hostBinding := dockernat . PortBinding {
2015-08-30 16:13:33 +00:00
HostPort : strconv . Itoa ( exteriorPort ) ,
HostIP : port . HostIP ,
}
2015-08-31 12:53:02 +00:00
// Allow multiple host ports bind to same docker port
if existedBindings , ok := portBindings [ dockerPort ] ; ok {
// If a docker port already map to a host port, just append the host ports
2015-08-30 16:13:33 +00:00
portBindings [ dockerPort ] = append ( existedBindings , hostBinding )
} else {
// Otherwise, it's fresh new port binding
2016-04-14 17:36:13 +00:00
portBindings [ dockerPort ] = [ ] dockernat . PortBinding {
2015-08-30 16:13:33 +00:00
hostBinding ,
}
2015-05-12 21:49:35 +00:00
}
}
return exposedPorts , portBindings
}
2015-05-12 21:18:00 +00:00
func ( dm * DockerManager ) runContainer (
pod * api . Pod ,
container * api . Container ,
opts * kubecontainer . RunContainerOptions ,
ref * api . ObjectReference ,
netMode string ,
2015-08-19 05:02:10 +00:00
ipcMode string ,
2015-09-15 16:43:59 +00:00
utsMode string ,
2015-10-23 20:02:32 +00:00
pidMode string ,
2016-01-26 15:03:37 +00:00
restartCount int ,
oomScoreAdj int ) ( kubecontainer . ContainerID , error ) {
2015-05-12 21:18:00 +00:00
2015-04-02 20:14:52 +00:00
dockerName := KubeletContainerName {
PodFullName : kubecontainer . GetPodFullName ( pod ) ,
PodUID : pod . UID ,
ContainerName : container . Name ,
}
2015-08-20 01:57:58 +00:00
2016-02-23 21:27:28 +00:00
securityOpts , err := dm . defaultSecurityOpt ( )
if err != nil {
return kubecontainer . ContainerID { } , err
}
2015-08-20 01:57:58 +00:00
// Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
2015-12-30 07:46:52 +00:00
// while the Kubelet is down and there is no information available to recover the pod.
2015-08-20 01:57:58 +00:00
// TODO: keep these labels up to date if the pod changes
2016-01-21 16:44:28 +00:00
labels := newLabels ( container , pod , restartCount , dm . enableCustomMetrics )
2015-10-30 05:42:25 +00:00
2015-12-30 07:46:52 +00:00
// TODO(random-liu): Remove this when we start to use new labels for KillContainerInPod
2015-05-06 03:50:45 +00:00
if container . Lifecycle != nil && container . Lifecycle . PreStop != nil {
// TODO: This is kind of hacky, we should really just encode the bits we need.
2016-01-22 05:11:30 +00:00
// TODO: This is hacky because the Kubelet should be parameterized to encode a specific version
// and needs to be able to migrate this whenever we deprecate v1. Should be a member of DockerManager.
if data , err := runtime . Encode ( api . Codecs . LegacyCodec ( unversioned . GroupVersion { Group : api . GroupName , Version : "v1" } ) , pod ) ; err == nil {
2015-05-06 03:50:45 +00:00
labels [ kubernetesPodLabel ] = string ( data )
2016-01-22 05:11:30 +00:00
} else {
glog . Errorf ( "Failed to encode pod: %s for prestop hook" , pod . Name )
2015-05-06 03:50:45 +00:00
}
}
2015-05-19 06:53:41 +00:00
memoryLimit := container . Resources . Limits . Memory ( ) . Value ( )
2015-07-30 19:59:22 +00:00
cpuRequest := container . Resources . Requests . Cpu ( )
cpuLimit := container . Resources . Limits . Cpu ( )
var cpuShares int64
// If request is not specified, but limit is, we want request to default to limit.
// API server does this for new containers, but we repeat this logic in Kubelet
// for containers running on existing Kubernetes clusters.
if cpuRequest . Amount == nil && cpuLimit . Amount != nil {
cpuShares = milliCPUToShares ( cpuLimit . MilliValue ( ) )
} else {
// if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number
// of CPU shares.
cpuShares = milliCPUToShares ( cpuRequest . MilliValue ( ) )
}
2015-10-07 19:19:06 +00:00
podHasSELinuxLabel := pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . SELinuxOptions != nil
binds := makeMountBindings ( opts . Mounts , podHasSELinuxLabel )
2016-02-05 20:50:35 +00:00
// The reason we create and mount the log file in here (not in kubelet) is because
// the file's location depends on the ID of the container, and we need to create and
// mount the file before actually starting the container.
// TODO(yifan): Consider to pull this logic out since we might need to reuse it in
// other container runtime.
2016-02-04 00:40:04 +00:00
_ , containerName , cid := BuildDockerName ( dockerName , container )
2015-04-02 20:14:52 +00:00
if opts . PodContainerDir != "" && len ( container . TerminationMessagePath ) != 0 {
2016-02-04 00:40:04 +00:00
// Because the PodContainerDir contains pod uid and container name which is unique enough,
// here we just add an unique container id to make the path unique for different instances
// of the same container.
containerLogPath := path . Join ( opts . PodContainerDir , cid )
2015-04-02 20:14:52 +00:00
fs , err := os . Create ( containerLogPath )
if err != nil {
// TODO: Clean up the previouly created dir? return the error?
glog . Errorf ( "Error on creating termination-log file %q: %v" , containerLogPath , err )
} else {
fs . Close ( ) // Close immediately; we're just doing a `touch` here
b := fmt . Sprintf ( "%s:%s" , containerLogPath , container . TerminationMessagePath )
2015-05-12 21:49:35 +00:00
binds = append ( binds , b )
2015-04-02 20:14:52 +00:00
}
}
2016-02-05 20:50:35 +00:00
2016-04-14 17:36:13 +00:00
hc := & dockercontainer . HostConfig {
2016-02-11 22:31:26 +00:00
Binds : binds ,
2016-04-14 17:36:13 +00:00
NetworkMode : dockercontainer . NetworkMode ( netMode ) ,
IpcMode : dockercontainer . IpcMode ( ipcMode ) ,
UTSMode : dockercontainer . UTSMode ( utsMode ) ,
PidMode : dockercontainer . PidMode ( pidMode ) ,
2016-02-11 22:31:26 +00:00
ReadonlyRootfs : readOnlyRootFilesystem ( container ) ,
2016-04-14 17:36:13 +00:00
Resources : dockercontainer . Resources {
Memory : memoryLimit ,
MemorySwap : - 1 ,
CPUShares : cpuShares ,
} ,
2016-02-23 21:27:28 +00:00
SecurityOpt : securityOpts ,
2015-04-02 20:14:52 +00:00
}
2015-09-01 13:27:01 +00:00
2016-01-26 15:03:37 +00:00
// If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
2016-03-14 08:35:49 +00:00
result , err := dm . checkDockerAPIVersion ( dockerv110APIVersion )
if err != nil {
glog . Errorf ( "Failed to check docker api version: %v" , err )
} else if result >= 0 {
2016-01-26 15:03:37 +00:00
hc . OomScoreAdj = oomScoreAdj
}
2015-09-01 13:27:01 +00:00
if dm . cpuCFSQuota {
// if cpuLimit.Amount is nil, then the appropriate default value is returned to allow full usage of cpu resource.
cpuQuota , cpuPeriod := milliCPUToQuota ( cpuLimit . MilliValue ( ) )
hc . CPUQuota = cpuQuota
hc . CPUPeriod = cpuPeriod
}
2015-04-24 00:07:52 +00:00
if len ( opts . CgroupParent ) > 0 {
hc . CgroupParent = opts . CgroupParent
}
2015-04-02 20:14:52 +00:00
2016-04-14 17:36:13 +00:00
dockerOpts := dockertypes . ContainerCreateConfig {
2016-02-04 00:40:04 +00:00
Name : containerName ,
2016-04-14 17:36:13 +00:00
Config : & dockercontainer . Config {
Env : makeEnvList ( opts . Envs ) ,
Image : container . Image ,
2016-02-04 00:40:04 +00:00
WorkingDir : container . WorkingDir ,
Labels : labels ,
// Interactive containers:
OpenStdin : container . Stdin ,
StdinOnce : container . StdinOnce ,
Tty : container . TTY ,
} ,
HostConfig : hc ,
}
2016-02-06 01:47:32 +00:00
// Set network configuration for infra-container
if container . Name == PodInfraContainerName {
setInfraContainerNetworkConfig ( pod , netMode , opts , dockerOpts )
}
2016-04-14 17:36:13 +00:00
setEntrypointAndCommand ( container , opts , dockerOpts )
2016-02-04 00:40:04 +00:00
glog . V ( 3 ) . Infof ( "Container %v/%v/%v: setting entrypoint \"%v\" and command \"%v\"" , pod . Namespace , pod . Name , container . Name , dockerOpts . Config . Entrypoint , dockerOpts . Config . Cmd )
securityContextProvider := securitycontext . NewSimpleSecurityContextProvider ( )
securityContextProvider . ModifyContainerConfig ( pod , container , dockerOpts . Config )
securityContextProvider . ModifyHostConfig ( pod , container , dockerOpts . HostConfig )
2016-04-14 17:36:13 +00:00
createResp , err := dm . client . CreateContainer ( dockerOpts )
2016-02-04 00:40:04 +00:00
if err != nil {
dm . recorder . Eventf ( ref , api . EventTypeWarning , kubecontainer . FailedToCreateContainer , "Failed to create docker container with error: %v" , err )
return kubecontainer . ContainerID { } , err
}
2016-04-14 17:36:13 +00:00
if len ( createResp . Warnings ) != 0 {
glog . V ( 2 ) . Infof ( "Container %q of pod %q created with warnings: %v" , container . Name , format . Pod ( pod ) , createResp . Warnings )
}
dm . recorder . Eventf ( ref , api . EventTypeNormal , kubecontainer . CreatedContainer , "Created container with docker id %v" , utilstrings . ShortenString ( createResp . ID , 12 ) )
2016-02-04 00:40:04 +00:00
2016-04-14 17:37:35 +00:00
if err = dm . client . StartContainer ( createResp . ID ) ; err != nil {
2015-11-13 22:30:01 +00:00
dm . recorder . Eventf ( ref , api . EventTypeWarning , kubecontainer . FailedToStartContainer ,
2016-04-14 17:36:13 +00:00
"Failed to start container with docker id %v with error: %v" , utilstrings . ShortenString ( createResp . ID , 12 ) , err )
2015-10-07 17:58:05 +00:00
return kubecontainer . ContainerID { } , err
2015-04-02 20:14:52 +00:00
}
2016-04-14 17:36:13 +00:00
dm . recorder . Eventf ( ref , api . EventTypeNormal , kubecontainer . StartedContainer , "Started container with docker id %v" , utilstrings . ShortenString ( createResp . ID , 12 ) )
2015-10-20 11:30:54 +00:00
2016-04-14 17:36:13 +00:00
return kubecontainer . DockerID ( createResp . ID ) . ContainerID ( ) , nil
2015-04-02 20:14:52 +00:00
}
2016-02-06 01:47:32 +00:00
// setInfraContainerNetworkConfig sets the network configuration for the infra-container. We only set network configuration for infra-container, all
// the user containers will share the same network namespace with infra-container.
2016-04-14 17:36:13 +00:00
func setInfraContainerNetworkConfig ( pod * api . Pod , netMode string , opts * kubecontainer . RunContainerOptions , dockerOpts dockertypes . ContainerCreateConfig ) {
2016-02-06 01:47:32 +00:00
exposedPorts , portBindings := makePortsAndBindings ( opts . PortMappings )
dockerOpts . Config . ExposedPorts = exposedPorts
2016-04-14 17:36:13 +00:00
dockerOpts . HostConfig . PortBindings = dockernat . PortMap ( portBindings )
2016-02-06 01:47:32 +00:00
if netMode != namespaceModeHost {
2016-02-02 18:59:54 +00:00
dockerOpts . Config . Hostname = opts . Hostname
2016-02-06 01:47:32 +00:00
if len ( opts . DNS ) > 0 {
dockerOpts . HostConfig . DNS = opts . DNS
}
if len ( opts . DNSSearch ) > 0 {
dockerOpts . HostConfig . DNSSearch = opts . DNSSearch
}
}
}
2016-04-14 17:36:13 +00:00
func setEntrypointAndCommand ( container * api . Container , opts * kubecontainer . RunContainerOptions , dockerOpts dockertypes . ContainerCreateConfig ) {
2015-05-22 22:21:03 +00:00
command , args := kubecontainer . ExpandContainerCommandAndArgs ( container , opts . Envs )
2016-04-14 17:36:13 +00:00
dockerOpts . Config . Entrypoint = dockerstrslice . StrSlice ( command )
dockerOpts . Config . Cmd = dockerstrslice . StrSlice ( args )
2015-04-02 20:14:52 +00:00
}
2015-04-29 20:09:03 +00:00
// A helper function to get the KubeletContainerName and hash from a docker
// container.
2016-04-04 08:56:49 +00:00
func getDockerContainerNameInfo ( c * dockertypes . Container ) ( * KubeletContainerName , uint64 , error ) {
2015-04-29 20:09:03 +00:00
if len ( c . Names ) == 0 {
return nil , 0 , fmt . Errorf ( "cannot parse empty docker container name: %#v" , c . Names )
}
dockerName , hash , err := ParseDockerName ( c . Names [ 0 ] )
if err != nil {
return nil , 0 , fmt . Errorf ( "parse docker container name %q error: %v" , c . Names [ 0 ] , err )
}
return dockerName , hash , nil
}
// Get pod UID, name, and namespace by examining the container names.
2016-04-04 08:56:49 +00:00
func getPodInfoFromContainer ( c * dockertypes . Container ) ( types . UID , string , string , error ) {
2015-04-29 20:09:03 +00:00
dockerName , _ , err := getDockerContainerNameInfo ( c )
if err != nil {
return types . UID ( "" ) , "" , "" , err
}
name , namespace , err := kubecontainer . ParsePodFullName ( dockerName . PodFullName )
if err != nil {
return types . UID ( "" ) , "" , "" , fmt . Errorf ( "parse pod full name %q error: %v" , dockerName . PodFullName , err )
}
return dockerName . PodUID , name , namespace , nil
}
// GetContainers returns a list of running containers if |all| is false;
// otherwise, it returns all containers.
func ( dm * DockerManager ) GetContainers ( all bool ) ( [ ] * kubecontainer . Container , error ) {
containers , err := GetKubeletDockerContainers ( dm . client , all )
if err != nil {
return nil , err
}
// Convert DockerContainers to []*kubecontainer.Container
result := make ( [ ] * kubecontainer . Container , 0 , len ( containers ) )
for _ , c := range containers {
2015-04-30 17:12:23 +00:00
converted , err := toRuntimeContainer ( c )
2015-04-29 20:09:03 +00:00
if err != nil {
glog . Errorf ( "Error examining the container: %v" , err )
continue
}
result = append ( result , converted )
}
return result , nil
}
2015-04-20 03:26:07 +00:00
func ( dm * DockerManager ) GetPods ( all bool ) ( [ ] * kubecontainer . Pod , error ) {
2015-06-09 21:01:23 +00:00
start := time . Now ( )
defer func ( ) {
metrics . ContainerManagerLatency . WithLabelValues ( "GetPods" ) . Observe ( metrics . SinceInMicroseconds ( start ) )
} ( )
2015-04-13 21:00:02 +00:00
pods := make ( map [ types . UID ] * kubecontainer . Pod )
var result [ ] * kubecontainer . Pod
2015-04-20 03:26:07 +00:00
containers , err := GetKubeletDockerContainers ( dm . client , all )
2015-04-13 21:00:02 +00:00
if err != nil {
return nil , err
}
// Group containers by pod.
for _ , c := range containers {
2015-04-30 17:12:23 +00:00
converted , err := toRuntimeContainer ( c )
2015-04-29 20:09:03 +00:00
if err != nil {
glog . Errorf ( "Error examining the container: %v" , err )
2015-04-13 21:00:02 +00:00
continue
}
2015-04-29 20:09:03 +00:00
podUID , podName , podNamespace , err := getPodInfoFromContainer ( c )
2015-04-13 21:00:02 +00:00
if err != nil {
2015-04-29 20:09:03 +00:00
glog . Errorf ( "Error examining the container: %v" , err )
2015-04-13 21:00:02 +00:00
continue
}
2015-04-29 20:09:03 +00:00
pod , found := pods [ podUID ]
2015-04-13 21:00:02 +00:00
if ! found {
pod = & kubecontainer . Pod {
2015-04-29 20:09:03 +00:00
ID : podUID ,
Name : podName ,
Namespace : podNamespace ,
2015-04-13 21:00:02 +00:00
}
2015-04-29 20:09:03 +00:00
pods [ podUID ] = pod
}
pod . Containers = append ( pod . Containers , converted )
2015-04-13 21:00:02 +00:00
}
// Convert map to list.
2016-04-04 08:56:49 +00:00
for _ , p := range pods {
result = append ( result , p )
2015-04-13 21:00:02 +00:00
}
return result , nil
}
2015-04-13 19:24:01 +00:00
2015-04-30 17:12:23 +00:00
// List all images in the local storage.
func ( dm * DockerManager ) ListImages ( ) ( [ ] kubecontainer . Image , error ) {
var images [ ] kubecontainer . Image
dockerImages , err := dm . client . ListImages ( docker . ListImagesOptions { } )
if err != nil {
return images , err
}
for _ , di := range dockerImages {
image , err := toRuntimeImage ( & di )
if err != nil {
continue
}
images = append ( images , * image )
}
return images , nil
}
2015-05-01 01:37:15 +00:00
// TODO(vmarmol): Consider unexporting.
2015-04-30 17:12:23 +00:00
// PullImage pulls an image from network to local storage.
2015-05-08 17:30:59 +00:00
func ( dm * DockerManager ) PullImage ( image kubecontainer . ImageSpec , secrets [ ] api . Secret ) error {
2015-08-10 17:28:39 +00:00
return dm . dockerPuller . Pull ( image . Image , secrets )
2015-04-13 19:24:01 +00:00
}
2015-04-30 17:12:23 +00:00
// IsImagePresent checks whether the container image is already in the local storage.
2015-05-06 21:42:03 +00:00
func ( dm * DockerManager ) IsImagePresent ( image kubecontainer . ImageSpec ) ( bool , error ) {
2015-08-10 17:28:39 +00:00
return dm . dockerPuller . IsImagePresent ( image . Image )
2015-04-13 19:24:01 +00:00
}
2015-04-13 17:02:19 +00:00
2015-04-30 17:12:23 +00:00
// Removes the specified image.
2015-05-06 21:42:03 +00:00
func ( dm * DockerManager ) RemoveImage ( image kubecontainer . ImageSpec ) error {
return dm . client . RemoveImage ( image . Image )
2015-04-30 17:12:23 +00:00
}
2015-04-29 00:51:21 +00:00
// podInfraContainerChanged returns true if the pod infra container has changed.
2015-12-05 00:06:25 +00:00
func ( dm * DockerManager ) podInfraContainerChanged ( pod * api . Pod , podInfraContainerStatus * kubecontainer . ContainerStatus ) ( bool , error ) {
2015-04-13 17:02:19 +00:00
var ports [ ] api . ContainerPort
// Check network mode.
2016-04-14 19:00:51 +00:00
if kubecontainer . IsHostNetworkPod ( pod ) {
2016-02-01 22:56:56 +00:00
dockerPodInfraContainer , err := dm . client . InspectContainer ( podInfraContainerStatus . ID . ID )
if err != nil {
return false , err
}
networkMode := getDockerNetworkMode ( dockerPodInfraContainer )
2016-01-22 21:14:41 +00:00
if networkMode != namespaceModeHost {
2015-09-14 21:56:51 +00:00
glog . V ( 4 ) . Infof ( "host: %v, %v" , pod . Spec . SecurityContext . HostNetwork , networkMode )
2015-04-13 17:02:19 +00:00
return true , nil
}
2016-03-16 13:36:41 +00:00
} else if dm . networkPlugin . Name ( ) != "cni" && dm . networkPlugin . Name ( ) != "kubenet" {
2015-04-13 17:02:19 +00:00
// Docker only exports ports from the pod infra container. Let's
// collect all of the relevant ports and export them.
for _ , container := range pod . Spec . Containers {
ports = append ( ports , container . Ports ... )
}
}
expectedPodInfraContainer := & api . Container {
2015-06-09 00:53:24 +00:00
Name : PodInfraContainerName ,
Image : dm . podInfraContainerImage ,
Ports : ports ,
ImagePullPolicy : podInfraContainerImagePullPolicy ,
2016-02-05 15:47:06 +00:00
Env : dm . podInfraContainerEnv ,
2015-04-13 17:02:19 +00:00
}
2015-12-05 00:06:25 +00:00
return podInfraContainerStatus . Hash != kubecontainer . HashContainer ( expectedPodInfraContainer ) , nil
2015-04-13 17:02:19 +00:00
}
2015-04-21 20:02:50 +00:00
2016-02-11 22:31:26 +00:00
// determine if the container root should be a read only filesystem.
func readOnlyRootFilesystem ( container * api . Container ) bool {
return container . SecurityContext != nil && container . SecurityContext . ReadOnlyRootFilesystem != nil && * container . SecurityContext . ReadOnlyRootFilesystem
}
2016-02-01 22:56:56 +00:00
// container must not be nil
2016-04-04 22:27:20 +00:00
func getDockerNetworkMode ( container * dockertypes . ContainerJSON ) string {
2016-02-01 22:56:56 +00:00
if container . HostConfig != nil {
2016-04-04 22:27:20 +00:00
return string ( container . HostConfig . NetworkMode )
2016-02-01 22:56:56 +00:00
}
return ""
}
2016-01-22 20:54:23 +00:00
// dockerVersion implementes kubecontainer.Version interface by implementing
// Compare() and String() (which is implemented by the underlying semver.Version)
// TODO: this code is the same as rktVersion and may make sense to be moved to
// somewhere shared.
type dockerVersion struct {
* semver . Version
}
2016-02-08 17:40:19 +00:00
// Older versions of Docker could return non-semantically versioned values (distros like Fedora
// included partial values such as 1.8.1.fc21 which is not semver). Force those values to be semver.
var almostSemverRegexp = regexp . MustCompile ( ` ^(\d+\.\d+\.\d+)\.(.*)$ ` )
// newDockerVersion returns a semantically versioned docker version value
2016-01-22 20:54:23 +00:00
func newDockerVersion ( version string ) ( dockerVersion , error ) {
sem , err := semver . NewVersion ( version )
if err != nil {
2016-02-08 17:40:19 +00:00
matches := almostSemverRegexp . FindStringSubmatch ( version )
if matches == nil {
return dockerVersion { } , err
}
sem , err = semver . NewVersion ( strings . Join ( matches [ 1 : ] , "-" ) )
2016-01-22 20:54:23 +00:00
}
2016-02-08 17:40:19 +00:00
return dockerVersion { sem } , err
2016-01-22 20:54:23 +00:00
}
func ( r dockerVersion ) Compare ( other string ) ( int , error ) {
2016-02-08 17:40:19 +00:00
v , err := newDockerVersion ( other )
2016-01-22 20:54:23 +00:00
if err != nil {
return - 1 , err
}
2015-04-21 20:02:50 +00:00
2016-02-08 17:40:19 +00:00
if r . LessThan ( * v . Version ) {
2016-01-22 20:54:23 +00:00
return - 1 , nil
}
2016-02-08 17:40:19 +00:00
if v . Version . LessThan ( * r . Version ) {
2016-01-22 20:54:23 +00:00
return 1 , nil
}
return 0 , nil
2015-04-21 20:02:50 +00:00
}
2016-01-22 20:54:23 +00:00
// dockerVersion implementes kubecontainer.Version interface by implementing
// Compare() and String() on top og go-dockerclient's APIVersion. This version
// string doesn't conform to semantic versioning, as it is only "x.y"
type dockerAPIVersion docker . APIVersion
func ( dv dockerAPIVersion ) String ( ) string {
2015-04-21 20:02:50 +00:00
return docker . APIVersion ( dv ) . String ( )
}
2016-01-22 20:54:23 +00:00
func ( dv dockerAPIVersion ) Compare ( other string ) ( int , error ) {
2015-04-21 20:02:50 +00:00
a := docker . APIVersion ( dv )
b , err := docker . NewAPIVersion ( other )
if err != nil {
return 0 , err
}
if a . LessThan ( b ) {
return - 1 , nil
}
if a . GreaterThan ( b ) {
return 1 , nil
}
return 0 , nil
}
2015-10-21 20:04:10 +00:00
func ( dm * DockerManager ) Type ( ) string {
return DockerType
}
2015-04-21 20:02:50 +00:00
func ( dm * DockerManager ) Version ( ) ( kubecontainer . Version , error ) {
env , err := dm . client . Version ( )
if err != nil {
return nil , fmt . Errorf ( "docker: failed to get docker version: %v" , err )
}
2016-01-14 23:16:07 +00:00
engineVersion := env . Get ( "Version" )
2016-01-22 20:54:23 +00:00
version , err := newDockerVersion ( engineVersion )
2016-01-14 23:16:07 +00:00
if err != nil {
glog . Errorf ( "docker: failed to parse docker server version %q: %v" , engineVersion , err )
return nil , fmt . Errorf ( "docker: failed to parse docker server version %q: %v" , engineVersion , err )
}
2016-01-22 20:54:23 +00:00
return version , nil
2016-01-14 23:16:07 +00:00
}
func ( dm * DockerManager ) APIVersion ( ) ( kubecontainer . Version , error ) {
env , err := dm . client . Version ( )
if err != nil {
return nil , fmt . Errorf ( "docker: failed to get docker version: %v" , err )
}
2015-04-21 20:02:50 +00:00
apiVersion := env . Get ( "ApiVersion" )
version , err := docker . NewAPIVersion ( apiVersion )
if err != nil {
2016-01-14 23:16:07 +00:00
glog . Errorf ( "docker: failed to parse docker api version %q: %v" , apiVersion , err )
return nil , fmt . Errorf ( "docker: failed to parse docker api version %q: %v" , apiVersion , err )
2015-04-21 20:02:50 +00:00
}
2016-01-22 20:54:23 +00:00
return dockerAPIVersion ( version ) , nil
2015-04-21 20:02:50 +00:00
}
2015-04-22 17:20:05 +00:00
2016-03-03 10:01:15 +00:00
// Status returns error if docker daemon is unhealthy, nil otherwise.
// Now we do this by checking whether:
// 1) `docker version` works
// 2) docker version is compatible with minimum requirement
func ( dm * DockerManager ) Status ( ) error {
return dm . checkVersionCompatibility ( )
}
func ( dm * DockerManager ) checkVersionCompatibility ( ) error {
version , err := dm . APIVersion ( )
if err != nil {
return err
}
// Verify the docker version.
result , err := version . Compare ( minimumDockerAPIVersion )
if err != nil {
return fmt . Errorf ( "failed to compare current docker version %v with minimum support Docker version %q - %v" , version , minimumDockerAPIVersion , err )
}
if result < 0 {
return fmt . Errorf ( "container runtime version is older than %s" , minimumDockerAPIVersion )
}
return nil
}
2016-02-23 21:27:28 +00:00
func ( dm * DockerManager ) defaultSecurityOpt ( ) ( [ ] string , error ) {
version , err := dm . APIVersion ( )
if err != nil {
return nil , err
}
// seccomp is to be disabled on docker versions >= v1.10
result , err := version . Compare ( dockerV110APIVersion )
if err != nil {
return nil , err
}
if result >= 0 {
return defaultSecurityOpt , nil
}
return nil , nil
}
2016-03-30 01:10:31 +00:00
// RunInContainer run the command inside the container identified by containerID
2015-10-07 17:58:05 +00:00
func ( dm * DockerManager ) RunInContainer ( containerID kubecontainer . ContainerID , cmd [ ] string ) ( [ ] byte , error ) {
2015-06-12 22:54:22 +00:00
glog . V ( 2 ) . Infof ( "Using docker native exec to run cmd %+v inside container %s" , cmd , containerID )
2015-04-22 17:20:05 +00:00
createOpts := docker . CreateExecOptions {
2015-10-07 17:58:05 +00:00
Container : containerID . ID ,
2015-04-22 17:20:05 +00:00
Cmd : cmd ,
AttachStdin : false ,
AttachStdout : true ,
AttachStderr : true ,
Tty : false ,
}
execObj , err := dm . client . CreateExec ( createOpts )
if err != nil {
return nil , fmt . Errorf ( "failed to run in container - Exec setup failed - %v" , err )
}
var buf bytes . Buffer
startOpts := docker . StartExecOptions {
Detach : false ,
Tty : false ,
2015-05-04 23:01:32 +00:00
OutputStream : & buf ,
ErrorStream : & buf ,
2015-04-22 17:20:05 +00:00
RawTerminal : false ,
}
2015-05-04 23:01:32 +00:00
err = dm . client . StartExec ( execObj . ID , startOpts )
2015-05-08 16:48:31 +00:00
if err != nil {
2015-06-12 22:54:22 +00:00
glog . V ( 2 ) . Infof ( "StartExec With error: %v" , err )
2015-05-08 16:48:31 +00:00
return nil , err
}
2015-07-06 00:03:10 +00:00
ticker := time . NewTicker ( 2 * time . Second )
defer ticker . Stop ( )
2015-05-08 16:48:31 +00:00
for {
inspect , err2 := dm . client . InspectExec ( execObj . ID )
if err2 != nil {
2015-06-12 22:54:22 +00:00
glog . V ( 2 ) . Infof ( "InspectExec %s failed with error: %+v" , execObj . ID , err2 )
2015-05-08 16:48:31 +00:00
return buf . Bytes ( ) , err2
}
if ! inspect . Running {
if inspect . ExitCode != 0 {
2015-06-12 22:54:22 +00:00
glog . V ( 2 ) . Infof ( "InspectExec %s exit with result %+v" , execObj . ID , inspect )
2015-05-08 16:48:31 +00:00
err = & dockerExitError { inspect }
}
break
}
2015-07-06 00:03:10 +00:00
<- ticker . C
2015-05-08 16:48:31 +00:00
}
2015-05-04 23:01:32 +00:00
return buf . Bytes ( ) , err
2015-04-22 17:20:05 +00:00
}
2015-05-08 16:48:31 +00:00
type dockerExitError struct {
Inspect * docker . ExecInspect
}
func ( d * dockerExitError ) String ( ) string {
return d . Error ( )
}
func ( d * dockerExitError ) Error ( ) string {
return fmt . Sprintf ( "Error executing in Docker Container: %d" , d . Inspect . ExitCode )
}
func ( d * dockerExitError ) Exited ( ) bool {
return ! d . Inspect . Running
}
func ( d * dockerExitError ) ExitStatus ( ) int {
return d . Inspect . ExitCode
}
2015-05-27 12:51:01 +00:00
// ExecInContainer runs the command inside the container identified by containerID.
2015-10-07 17:58:05 +00:00
func ( dm * DockerManager ) ExecInContainer ( containerID kubecontainer . ContainerID , cmd [ ] string , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool ) error {
2015-05-27 12:51:01 +00:00
if dm . execHandler == nil {
return errors . New ( "unable to exec without an exec handler" )
2015-04-22 17:20:05 +00:00
}
2015-10-07 17:58:05 +00:00
container , err := dm . client . InspectContainer ( containerID . ID )
2015-04-22 17:20:05 +00:00
if err != nil {
return err
}
if ! container . State . Running {
2015-08-08 01:52:23 +00:00
return fmt . Errorf ( "container not running (%s)" , container . ID )
2015-04-22 17:20:05 +00:00
}
2015-05-27 12:51:01 +00:00
return dm . execHandler . ExecInContainer ( dm . client , container , cmd , stdin , stdout , stderr , tty )
2015-04-22 17:20:05 +00:00
}
2015-10-07 17:58:05 +00:00
func ( dm * DockerManager ) AttachContainer ( containerID kubecontainer . ContainerID , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool ) error {
2015-07-28 04:48:55 +00:00
opts := docker . AttachToContainerOptions {
2015-10-07 17:58:05 +00:00
Container : containerID . ID ,
2015-07-28 04:48:55 +00:00
InputStream : stdin ,
OutputStream : stdout ,
ErrorStream : stderr ,
2015-07-28 22:56:27 +00:00
Stream : true ,
2015-07-28 04:48:55 +00:00
Logs : true ,
Stdin : stdin != nil ,
Stdout : stdout != nil ,
Stderr : stderr != nil ,
RawTerminal : tty ,
}
return dm . client . AttachToContainer ( opts )
}
2015-06-05 21:10:45 +00:00
func noPodInfraContainerError ( podName , podNamespace string ) error {
return fmt . Errorf ( "cannot find pod infra container in pod %q" , kubecontainer . BuildPodFullName ( podName , podNamespace ) )
}
2015-04-22 17:20:05 +00:00
// PortForward executes socat in the pod's network namespace and copies
// data between stream (representing the user's local connection on their
// computer) and the specified port in the container.
//
// TODO:
// - match cgroups of container
// - should we support nsenter + socat on the host? (current impl)
// - should we support nsenter + socat in a container, running with elevated privs and --pid=host?
func ( dm * DockerManager ) PortForward ( pod * kubecontainer . Pod , port uint16 , stream io . ReadWriteCloser ) error {
podInfraContainer := pod . FindContainerByName ( PodInfraContainerName )
if podInfraContainer == nil {
2015-06-05 21:10:45 +00:00
return noPodInfraContainerError ( pod . Name , pod . Namespace )
2015-04-22 17:20:05 +00:00
}
2015-10-07 17:58:05 +00:00
container , err := dm . client . InspectContainer ( podInfraContainer . ID . ID )
2015-04-22 17:20:05 +00:00
if err != nil {
return err
}
if ! container . State . Running {
2015-08-08 01:52:23 +00:00
return fmt . Errorf ( "container not running (%s)" , container . ID )
2015-04-22 17:20:05 +00:00
}
containerPid := container . State . Pid
2015-09-22 20:29:51 +00:00
socatPath , lookupErr := exec . LookPath ( "socat" )
2015-04-22 17:20:05 +00:00
if lookupErr != nil {
2015-09-22 20:29:51 +00:00
return fmt . Errorf ( "unable to do port forwarding: socat not found." )
2015-04-22 17:20:05 +00:00
}
2015-09-22 20:29:51 +00:00
args := [ ] string { "-t" , fmt . Sprintf ( "%d" , containerPid ) , "-n" , socatPath , "-" , fmt . Sprintf ( "TCP4:localhost:%d" , port ) }
nsenterPath , lookupErr := exec . LookPath ( "nsenter" )
if lookupErr != nil {
return fmt . Errorf ( "unable to do port forwarding: nsenter not found." )
}
2016-01-28 20:28:40 +00:00
commandString := fmt . Sprintf ( "%s %s" , nsenterPath , strings . Join ( args , " " ) )
glog . V ( 4 ) . Infof ( "executing port forwarding command: %s" , commandString )
2015-09-22 20:29:51 +00:00
command := exec . Command ( nsenterPath , args ... )
2015-04-22 17:20:05 +00:00
command . Stdout = stream
2015-09-22 20:29:51 +00:00
2016-01-28 20:28:40 +00:00
stderr := new ( bytes . Buffer )
command . Stderr = stderr
2015-09-22 20:29:51 +00:00
// If we use Stdin, command.Run() won't return until the goroutine that's copying
// from stream finishes. Unfortunately, if you have a client like telnet connected
// via port forwarding, as long as the user's telnet client is connected to the user's
// local listener that port forwarding sets up, the telnet session never exits. This
// means that even if socat has finished running, command.Run() won't ever return
// (because the client still has the connection and stream open).
//
// The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
// when the command (socat) exits.
inPipe , err := command . StdinPipe ( )
if err != nil {
return fmt . Errorf ( "unable to do port forwarding: error creating stdin pipe: %v" , err )
}
go func ( ) {
io . Copy ( inPipe , stream )
inPipe . Close ( )
} ( )
2016-01-28 20:28:40 +00:00
if err := command . Run ( ) ; err != nil {
return fmt . Errorf ( "%v: %s" , err , stderr . String ( ) )
}
return nil
2015-04-22 17:20:05 +00:00
}
2015-04-23 22:40:54 +00:00
2015-09-09 21:00:41 +00:00
// Get the IP address of a container's interface using nsenter
func ( dm * DockerManager ) GetContainerIP ( containerID , interfaceName string ) ( string , error ) {
_ , lookupErr := exec . LookPath ( "nsenter" )
if lookupErr != nil {
return "" , fmt . Errorf ( "Unable to obtain IP address of container: missing nsenter." )
}
container , err := dm . client . InspectContainer ( containerID )
if err != nil {
return "" , err
}
if ! container . State . Running {
return "" , fmt . Errorf ( "container not running (%s)" , container . ID )
}
containerPid := container . State . Pid
extractIPCmd := fmt . Sprintf ( "ip -4 addr show %s | grep inet | awk -F\" \" '{print $2}'" , interfaceName )
args := [ ] string { "-t" , fmt . Sprintf ( "%d" , containerPid ) , "-n" , "--" , "bash" , "-c" , extractIPCmd }
command := exec . Command ( "nsenter" , args ... )
out , err := command . CombinedOutput ( )
if err != nil {
return "" , err
}
return string ( out ) , nil
}
2015-12-31 08:41:05 +00:00
// TODO(random-liu): Change running pod to pod status in the future. We can't do it now, because kubelet also uses this function without pod status.
2015-12-05 00:06:25 +00:00
// We can only deprecate this after refactoring kubelet.
2015-12-30 07:46:52 +00:00
// TODO(random-liu): After using pod status for KillPod(), we can also remove the kubernetesPodLabel, because all the needed information should have
// been extract from new labels and stored in pod status.
2015-08-20 01:57:58 +00:00
func ( dm * DockerManager ) KillPod ( pod * api . Pod , runningPod kubecontainer . Pod ) error {
2016-01-13 23:15:18 +00:00
result := dm . killPodWithSyncResult ( pod , runningPod )
return result . Error ( )
}
// TODO(random-liu): This is just a temporary function, will be removed when we acturally add PodSyncResult
// NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted.
func ( dm * DockerManager ) killPodWithSyncResult ( pod * api . Pod , runningPod kubecontainer . Pod ) ( result kubecontainer . PodSyncResult ) {
// Send the kills in parallel since they may take a long time.
// There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel
containerResults := make ( chan * kubecontainer . SyncResult , len ( runningPod . Containers ) )
2015-04-27 22:34:01 +00:00
wg := sync . WaitGroup { }
2015-08-20 01:57:58 +00:00
var (
networkContainer * kubecontainer . Container
networkSpec * api . Container
)
2016-01-13 23:15:18 +00:00
wg . Add ( len ( runningPod . Containers ) )
2015-08-20 01:57:58 +00:00
for _ , container := range runningPod . Containers {
2015-04-27 22:34:01 +00:00
go func ( container * kubecontainer . Container ) {
2016-01-15 07:32:10 +00:00
defer utilruntime . HandleCrash ( )
2015-06-13 03:49:32 +00:00
defer wg . Done ( )
2015-04-28 18:02:29 +00:00
2015-08-20 01:57:58 +00:00
var containerSpec * api . Container
if pod != nil {
for i , c := range pod . Spec . Containers {
if c . Name == container . Name {
containerSpec = & pod . Spec . Containers [ i ]
break
}
}
}
2015-04-28 18:02:29 +00:00
// TODO: Handle this without signaling the pod infra container to
// adapt to the generic container runtime.
if container . Name == PodInfraContainerName {
2015-06-13 03:49:32 +00:00
// Store the container runtime for later deletion.
// We do this so that PreStop handlers can run in the network namespace.
2015-08-20 01:57:58 +00:00
networkContainer = container
networkSpec = containerSpec
2015-06-13 03:49:32 +00:00
return
2015-04-28 18:02:29 +00:00
}
2015-08-20 01:57:58 +00:00
2016-01-13 23:15:18 +00:00
killContainerResult := kubecontainer . NewSyncResult ( kubecontainer . KillContainer , container . Name )
2015-10-10 09:12:47 +00:00
err := dm . KillContainerInPod ( container . ID , containerSpec , pod , "Need to kill pod." )
2015-08-20 01:57:58 +00:00
if err != nil {
2016-01-13 23:15:18 +00:00
killContainerResult . Fail ( kubecontainer . ErrKillContainer , err . Error ( ) )
2015-08-20 01:57:58 +00:00
glog . Errorf ( "Failed to delete container: %v; Skipping pod %q" , err , runningPod . ID )
2015-04-27 22:34:01 +00:00
}
2016-01-13 23:15:18 +00:00
containerResults <- killContainerResult
2015-04-27 22:34:01 +00:00
} ( container )
}
wg . Wait ( )
2016-01-13 23:15:18 +00:00
close ( containerResults )
for containerResult := range containerResults {
result . AddSyncResult ( containerResult )
}
2015-08-20 01:57:58 +00:00
if networkContainer != nil {
2016-01-22 21:14:41 +00:00
ins , err := dm . client . InspectContainer ( networkContainer . ID . ID )
if err != nil {
2016-02-10 02:28:26 +00:00
err = fmt . Errorf ( "Error inspecting container %v: %v" , networkContainer . ID . ID , err )
glog . Error ( err )
result . Fail ( err )
2016-01-22 21:14:41 +00:00
return
}
2016-02-01 22:56:56 +00:00
if getDockerNetworkMode ( ins ) != namespaceModeHost {
2016-01-22 21:14:41 +00:00
teardownNetworkResult := kubecontainer . NewSyncResult ( kubecontainer . TeardownNetwork , kubecontainer . BuildPodFullName ( runningPod . Name , runningPod . Namespace ) )
result . AddSyncResult ( teardownNetworkResult )
2016-04-06 20:45:16 +00:00
if err := dm . networkPlugin . TearDownPod ( runningPod . Namespace , runningPod . Name , networkContainer . ID ) ; err != nil {
2016-01-22 21:14:41 +00:00
message := fmt . Sprintf ( "Failed to teardown network for pod %q using network plugins %q: %v" , runningPod . ID , dm . networkPlugin . Name ( ) , err )
teardownNetworkResult . Fail ( kubecontainer . ErrTeardownNetwork , message )
glog . Error ( message )
}
2015-06-13 03:49:32 +00:00
}
2016-01-13 23:15:18 +00:00
killContainerResult := kubecontainer . NewSyncResult ( kubecontainer . KillContainer , networkContainer . Name )
result . AddSyncResult ( killContainerResult )
2015-10-10 09:12:47 +00:00
if err := dm . KillContainerInPod ( networkContainer . ID , networkSpec , pod , "Need to kill pod." ) ; err != nil {
2016-01-13 23:15:18 +00:00
killContainerResult . Fail ( kubecontainer . ErrKillContainer , err . Error ( ) )
2015-08-20 01:57:58 +00:00
glog . Errorf ( "Failed to delete container: %v; Skipping pod %q" , err , runningPod . ID )
2015-06-13 03:49:32 +00:00
}
}
2016-01-13 23:15:18 +00:00
return
2015-04-27 22:34:01 +00:00
}
2015-08-20 01:57:58 +00:00
// KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
// and will attempt to lookup the other information if missing.
2015-10-10 09:12:47 +00:00
func ( dm * DockerManager ) KillContainerInPod ( containerID kubecontainer . ContainerID , container * api . Container , pod * api . Pod , message string ) error {
2015-08-20 01:57:58 +00:00
switch {
2015-10-07 17:58:05 +00:00
case containerID . IsEmpty ( ) :
2015-08-20 01:57:58 +00:00
// Locate the container.
pods , err := dm . GetPods ( false )
if err != nil {
return err
}
targetPod := kubecontainer . Pods ( pods ) . FindPod ( kubecontainer . GetPodFullName ( pod ) , pod . UID )
targetContainer := targetPod . FindContainerByName ( container . Name )
if targetContainer == nil {
return fmt . Errorf ( "unable to find container %q in pod %q" , container . Name , targetPod . Name )
}
containerID = targetContainer . ID
2015-05-21 22:36:44 +00:00
2015-08-20 01:57:58 +00:00
case container == nil || pod == nil :
// Read information about the container from labels
2015-10-07 17:58:05 +00:00
inspect , err := dm . client . InspectContainer ( containerID . ID )
2015-08-20 01:57:58 +00:00
if err != nil {
return err
}
storedPod , storedContainer , cerr := containerAndPodFromLabels ( inspect )
if cerr != nil {
glog . Errorf ( "unable to access pod data from container: %v" , err )
}
if container == nil {
container = storedContainer
}
if pod == nil {
pod = storedPod
}
}
2015-10-10 09:12:47 +00:00
return dm . killContainer ( containerID , container , pod , message )
2015-08-19 00:34:49 +00:00
}
2015-06-03 00:36:58 +00:00
2015-08-20 01:57:58 +00:00
// killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
// KillContainerInPod if information must be retrieved first.
2015-10-10 09:12:47 +00:00
func ( dm * DockerManager ) killContainer ( containerID kubecontainer . ContainerID , container * api . Container , pod * api . Pod , reason string ) error {
2015-10-07 17:58:05 +00:00
ID := containerID . ID
2015-08-20 01:57:58 +00:00
name := ID
if container != nil {
name = fmt . Sprintf ( "%s %s" , name , container . Name )
2015-06-03 00:36:58 +00:00
}
2015-08-20 01:57:58 +00:00
if pod != nil {
name = fmt . Sprintf ( "%s %s/%s" , name , pod . Namespace , pod . Name )
2015-08-19 00:34:49 +00:00
}
2015-08-20 01:57:58 +00:00
gracePeriod := int64 ( minimumGracePeriodInSeconds )
if pod != nil {
switch {
case pod . DeletionGracePeriodSeconds != nil :
gracePeriod = * pod . DeletionGracePeriodSeconds
case pod . Spec . TerminationGracePeriodSeconds != nil :
gracePeriod = * pod . Spec . TerminationGracePeriodSeconds
}
}
glog . V ( 2 ) . Infof ( "Killing container %q with %d second grace period" , name , gracePeriod )
2015-09-17 22:21:55 +00:00
start := unversioned . Now ( )
2015-08-20 01:57:58 +00:00
if pod != nil && container != nil && container . Lifecycle != nil && container . Lifecycle . PreStop != nil {
glog . V ( 4 ) . Infof ( "Running preStop hook for container %q" , name )
done := make ( chan struct { } )
go func ( ) {
defer close ( done )
2016-01-15 07:32:10 +00:00
defer utilruntime . HandleCrash ( )
2015-10-07 17:58:05 +00:00
if err := dm . runner . Run ( containerID , pod , container , container . Lifecycle . PreStop ) ; err != nil {
2015-08-20 01:57:58 +00:00
glog . Errorf ( "preStop hook for container %q failed: %v" , name , err )
2015-06-03 00:36:58 +00:00
}
2015-08-20 01:57:58 +00:00
} ( )
select {
case <- time . After ( time . Duration ( gracePeriod ) * time . Second ) :
glog . V ( 2 ) . Infof ( "preStop hook for container %q did not complete in %d seconds" , name , gracePeriod )
case <- done :
glog . V ( 4 ) . Infof ( "preStop hook for container %q completed" , name )
2015-06-03 00:36:58 +00:00
}
2015-09-17 22:21:55 +00:00
gracePeriod -= int64 ( unversioned . Now ( ) . Sub ( start . Time ) . Seconds ( ) )
2015-06-03 00:36:58 +00:00
}
2015-08-20 01:57:58 +00:00
// always give containers a minimal shutdown window to avoid unnecessary SIGKILLs
if gracePeriod < minimumGracePeriodInSeconds {
gracePeriod = minimumGracePeriodInSeconds
}
2016-04-14 17:37:35 +00:00
err := dm . client . StopContainer ( ID , int ( gracePeriod ) )
2015-08-20 01:57:58 +00:00
if err == nil {
2015-09-17 22:21:55 +00:00
glog . V ( 2 ) . Infof ( "Container %q exited after %s" , name , unversioned . Now ( ) . Sub ( start . Time ) )
2015-08-20 01:57:58 +00:00
} else {
2015-09-17 22:21:55 +00:00
glog . V ( 2 ) . Infof ( "Container %q termination failed after %s: %v" , name , unversioned . Now ( ) . Sub ( start . Time ) , err )
2015-08-20 01:57:58 +00:00
}
2015-10-07 17:58:05 +00:00
ref , ok := dm . containerRefManager . GetRef ( containerID )
2015-08-19 00:34:49 +00:00
if ! ok {
2015-08-20 01:57:58 +00:00
glog . Warningf ( "No ref for pod '%q'" , name )
2015-08-19 00:34:49 +00:00
} else {
2016-01-11 07:55:51 +00:00
message := fmt . Sprintf ( "Killing container with docker id %v" , utilstrings . ShortenString ( ID , 12 ) )
2015-10-10 09:12:47 +00:00
if reason != "" {
message = fmt . Sprint ( message , ": " , reason )
}
2015-11-13 22:30:01 +00:00
dm . recorder . Event ( ref , api . EventTypeNormal , kubecontainer . KillingContainer , message )
2015-10-07 17:58:05 +00:00
dm . containerRefManager . ClearRef ( containerID )
2015-06-03 00:36:58 +00:00
}
2015-08-19 00:34:49 +00:00
return err
2015-06-03 00:36:58 +00:00
}
2015-08-20 01:57:58 +00:00
var errNoPodOnContainer = fmt . Errorf ( "no pod information labels on Docker container" )
// containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
2016-04-04 22:27:20 +00:00
func containerAndPodFromLabels ( inspect * dockertypes . ContainerJSON ) ( pod * api . Pod , container * api . Container , err error ) {
2015-08-20 01:57:58 +00:00
if inspect == nil && inspect . Config == nil && inspect . Config . Labels == nil {
return nil , nil , errNoPodOnContainer
}
labels := inspect . Config . Labels
// the pod data may not be set
if body , found := labels [ kubernetesPodLabel ] ; found {
pod = & api . Pod { }
2016-01-22 05:11:30 +00:00
if err = runtime . DecodeInto ( api . Codecs . UniversalDecoder ( ) , [ ] byte ( body ) , pod ) ; err == nil {
2015-12-30 07:46:52 +00:00
name := labels [ kubernetesContainerNameLabel ]
2015-08-20 01:57:58 +00:00
for ix := range pod . Spec . Containers {
if pod . Spec . Containers [ ix ] . Name == name {
container = & pod . Spec . Containers [ ix ]
break
}
}
if container == nil {
err = fmt . Errorf ( "unable to find container %s in pod %v" , name , pod )
}
} else {
pod = nil
}
}
// attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
// field (the one used by kill)
if pod == nil {
2015-12-30 07:46:52 +00:00
if period , ok := labels [ kubernetesPodTerminationGracePeriodLabel ] ; ok {
2015-08-20 01:57:58 +00:00
if seconds , err := strconv . ParseInt ( period , 10 , 64 ) ; err == nil {
pod = & api . Pod { }
pod . DeletionGracePeriodSeconds = & seconds
}
}
}
return
}
2016-04-04 22:27:20 +00:00
func ( dm * DockerManager ) applyOOMScoreAdj ( container * api . Container , containerInfo * dockertypes . ContainerJSON ) error {
2016-01-26 20:29:10 +00:00
cgroupName , err := dm . procFs . GetFullContainerName ( containerInfo . State . Pid )
if err != nil {
if err == os . ErrNotExist {
// Container exited. We cannot do anything about it. Ignore this error.
glog . V ( 2 ) . Infof ( "Failed to apply OOM score adj on container %q with ID %q. Init process does not exist." , containerInfo . Name , containerInfo . ID )
return nil
}
return err
}
2016-02-26 09:06:26 +00:00
oomScoreAdj := dm . calculateOomScoreAdj ( container )
2016-01-26 20:29:10 +00:00
if err = dm . oomAdjuster . ApplyOOMScoreAdjContainer ( cgroupName , oomScoreAdj , 5 ) ; err != nil {
if err == os . ErrNotExist {
// Container exited. We cannot do anything about it. Ignore this error.
glog . V ( 2 ) . Infof ( "Failed to apply OOM score adj on container %q with ID %q. Init process does not exist." , containerInfo . Name , containerInfo . ID )
return nil
}
return err
}
return nil
}
2015-04-23 23:38:22 +00:00
// Run a single container from a pod. Returns the docker container ID
2015-10-23 20:02:32 +00:00
// If do not need to pass labels, just pass nil.
2016-03-07 20:24:08 +00:00
func ( dm * DockerManager ) runContainerInPod ( pod * api . Pod , container * api . Container , netMode , ipcMode , pidMode , podIP string , restartCount int ) ( kubecontainer . ContainerID , error ) {
2015-06-09 21:01:23 +00:00
start := time . Now ( )
defer func ( ) {
metrics . ContainerManagerLatency . WithLabelValues ( "runContainerInPod" ) . Observe ( metrics . SinceInMicroseconds ( start ) )
} ( )
2015-04-23 23:38:22 +00:00
ref , err := kubecontainer . GenerateContainerRef ( pod , container )
if err != nil {
2016-01-26 00:31:32 +00:00
glog . Errorf ( "Can't make a ref to pod %v, container %v: '%v'" , pod . Name , container . Name , err )
2015-04-23 23:38:22 +00:00
}
2016-03-07 20:24:08 +00:00
opts , err := dm . runtimeHelper . GenerateRunContainerOptions ( pod , container , podIP )
2015-04-23 23:38:22 +00:00
if err != nil {
2016-01-26 00:31:32 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "GenerateRunContainerOptions: %v" , err )
2015-04-23 23:38:22 +00:00
}
2015-08-19 05:02:10 +00:00
utsMode := ""
2016-04-14 19:00:51 +00:00
if kubecontainer . IsHostNetworkPod ( pod ) {
2016-01-22 21:14:41 +00:00
utsMode = namespaceModeHost
2015-08-19 05:02:10 +00:00
}
2016-01-26 15:03:37 +00:00
2016-02-26 09:06:26 +00:00
oomScoreAdj := dm . calculateOomScoreAdj ( container )
2016-01-26 15:03:37 +00:00
id , err := dm . runContainer ( pod , container , opts , ref , netMode , ipcMode , utsMode , pidMode , restartCount , oomScoreAdj )
2015-04-23 23:38:22 +00:00
if err != nil {
2016-01-26 00:31:32 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "runContainer: %v" , err )
2015-04-23 23:38:22 +00:00
}
// Remember this reference so we can report events about this container
if ref != nil {
dm . containerRefManager . SetRef ( id , ref )
}
if container . Lifecycle != nil && container . Lifecycle . PostStart != nil {
2015-05-01 01:37:15 +00:00
handlerErr := dm . runner . Run ( id , pod , container , container . Lifecycle . PostStart )
2015-04-23 23:38:22 +00:00
if handlerErr != nil {
2016-01-26 00:31:32 +00:00
err := fmt . Errorf ( "PostStart handler: %v" , handlerErr )
2015-10-10 09:12:47 +00:00
dm . KillContainerInPod ( id , container , pod , err . Error ( ) )
return kubecontainer . ContainerID { } , err
2015-04-23 23:38:22 +00:00
}
}
2015-04-27 20:03:55 +00:00
// Create a symbolic link to the Docker container log file using a name which captures the
// full pod name, the container name and the Docker container ID. Cluster level logging will
// capture these symbolic filenames which can be used for search terms in Elasticsearch or for
// labels for Cloud Logging.
2015-10-07 17:58:05 +00:00
containerLogFile := path . Join ( dm . dockerRoot , "containers" , id . ID , fmt . Sprintf ( "%s-json.log" , id . ID ) )
2015-12-07 21:31:02 +00:00
symlinkFile := LogSymlink ( dm . containerLogsDir , kubecontainer . GetPodFullName ( pod ) , container . Name , id . ID )
2015-04-27 20:03:55 +00:00
if err = dm . os . Symlink ( containerLogFile , symlinkFile ) ; err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Failed to create symbolic link to the log file of pod %q container %q: %v" , format . Pod ( pod ) , container . Name , err )
2015-04-27 20:03:55 +00:00
}
2015-06-15 21:20:17 +00:00
2015-08-04 00:28:33 +00:00
// Container information is used in adjusting OOM scores and adding ndots.
2015-10-07 17:58:05 +00:00
containerInfo , err := dm . client . InspectContainer ( id . ID )
2015-06-15 21:20:17 +00:00
if err != nil {
2016-01-26 00:31:32 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "InspectContainer: %v" , err )
2015-06-15 21:20:17 +00:00
}
// Ensure the PID actually exists, else we'll move ourselves.
if containerInfo . State . Pid == 0 {
2016-01-26 00:31:32 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "can't get init PID for container %q" , id )
2015-06-15 21:20:17 +00:00
}
2015-08-04 00:28:33 +00:00
2016-01-26 15:03:37 +00:00
// Check if current docker version is higher than 1.10. Otherwise, we have to apply OOMScoreAdj instead of using docker API.
err = dm . applyOOMScoreAdjIfNeeded ( container , containerInfo )
if err != nil {
return kubecontainer . ContainerID { } , err
2015-08-04 00:28:33 +00:00
}
2016-01-26 15:03:37 +00:00
2015-08-04 00:28:33 +00:00
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
// we modify it when the pause container is created since it is the first container created in the pod since it holds
// the networking namespace.
2016-01-22 21:14:41 +00:00
if container . Name == PodInfraContainerName && utsMode != namespaceModeHost {
2015-08-04 00:28:33 +00:00
err = addNDotsOption ( containerInfo . ResolvConfPath )
2016-01-26 00:31:32 +00:00
if err != nil {
return kubecontainer . ContainerID { } , fmt . Errorf ( "addNDotsOption: %v" , err )
}
2015-06-15 21:20:17 +00:00
}
2015-10-07 17:58:05 +00:00
return id , err
2015-04-23 23:38:22 +00:00
}
2015-04-27 17:45:10 +00:00
2016-04-04 22:27:20 +00:00
func ( dm * DockerManager ) applyOOMScoreAdjIfNeeded ( container * api . Container , containerInfo * dockertypes . ContainerJSON ) error {
2016-03-14 08:35:49 +00:00
// Compare current API version with expected api version.
result , err := dm . checkDockerAPIVersion ( dockerv110APIVersion )
if err != nil {
return fmt . Errorf ( "Failed to check docker api version: %v" , err )
}
2016-01-26 15:03:37 +00:00
// If current api version is older than OOMScoreAdj requested, use the old way.
if result < 0 {
if err := dm . applyOOMScoreAdj ( container , containerInfo ) ; err != nil {
2016-03-14 08:35:49 +00:00
return fmt . Errorf ( "Failed to apply oom-score-adj to container %q- %v" , err , containerInfo . Name )
2016-01-26 15:03:37 +00:00
}
}
return nil
}
2016-02-26 09:06:26 +00:00
func ( dm * DockerManager ) calculateOomScoreAdj ( container * api . Container ) int {
// Set OOM score of the container based on the priority of the container.
// Processes in lower-priority pods should be killed first if the system runs out of memory.
// The main pod infrastructure container is considered high priority, since if it is killed the
// whole pod will die.
var oomScoreAdj int
if container . Name == PodInfraContainerName {
oomScoreAdj = qos . PodInfraOOMAdj
} else {
oomScoreAdj = qos . GetContainerOOMScoreAdjust ( container , int64 ( dm . machineInfo . MemoryCapacity ) )
}
return oomScoreAdj
}
// checkDockerAPIVersion checks current docker API version against expected version.
2016-01-26 15:03:37 +00:00
// Return:
// 1 : newer than expected version
// -1: older than expected version
// 0 : same version
2016-03-14 08:35:49 +00:00
func ( dm * DockerManager ) checkDockerAPIVersion ( expectedVersion string ) ( int , error ) {
2016-04-17 10:14:03 +00:00
apiVersion , _ , err := dm . getVersionInfo ( )
2016-01-26 15:03:37 +00:00
if err != nil {
2016-03-14 08:35:49 +00:00
return 0 , err
2016-01-26 15:03:37 +00:00
}
result , err := apiVersion . Compare ( expectedVersion )
if err != nil {
2016-03-14 08:35:49 +00:00
return 0 , fmt . Errorf ( "Failed to compare current docker api version %v with OOMScoreAdj supported Docker version %q - %v" ,
2016-01-26 15:03:37 +00:00
apiVersion , expectedVersion , err )
}
2016-03-14 08:35:49 +00:00
return result , nil
2016-01-26 15:03:37 +00:00
}
2015-06-23 23:36:06 +00:00
func addNDotsOption ( resolvFilePath string ) error {
if len ( resolvFilePath ) == 0 {
2016-01-26 00:31:32 +00:00
glog . Errorf ( "ResolvConfPath is empty." )
2015-06-23 23:36:06 +00:00
return nil
}
if _ , err := os . Stat ( resolvFilePath ) ; os . IsNotExist ( err ) {
2016-01-26 00:31:32 +00:00
return fmt . Errorf ( "ResolvConfPath %q does not exist" , resolvFilePath )
2015-06-23 23:36:06 +00:00
}
glog . V ( 4 ) . Infof ( "DNS ResolvConfPath exists: %s. Will attempt to add ndots option: %s" , resolvFilePath , ndotsDNSOption )
if err := appendToFile ( resolvFilePath , ndotsDNSOption ) ; err != nil {
2016-01-26 00:31:32 +00:00
glog . Errorf ( "resolv.conf could not be updated: %v" , err )
2015-06-23 23:36:06 +00:00
return err
}
return nil
}
func appendToFile ( filePath , stringToAppend string ) error {
f , err := os . OpenFile ( filePath , os . O_APPEND | os . O_WRONLY , 0644 )
if err != nil {
return err
}
defer f . Close ( )
_ , err = f . WriteString ( stringToAppend )
return err
}
2015-05-01 01:37:15 +00:00
// createPodInfraContainer starts the pod infra container for a pod. Returns the docker container ID of the newly created container.
2016-01-13 23:15:18 +00:00
// If any error occurs in this function, it will return a brief error and a detailed error message.
func ( dm * DockerManager ) createPodInfraContainer ( pod * api . Pod ) ( kubecontainer . DockerID , error , string ) {
2015-06-09 21:01:23 +00:00
start := time . Now ( )
defer func ( ) {
metrics . ContainerManagerLatency . WithLabelValues ( "createPodInfraContainer" ) . Observe ( metrics . SinceInMicroseconds ( start ) )
} ( )
2015-04-27 17:45:10 +00:00
// Use host networking if specified.
netNamespace := ""
var ports [ ] api . ContainerPort
2016-04-14 19:00:51 +00:00
if kubecontainer . IsHostNetworkPod ( pod ) {
2016-01-22 21:14:41 +00:00
netNamespace = namespaceModeHost
2016-02-01 22:56:56 +00:00
} else if dm . networkPlugin . Name ( ) == "cni" || dm . networkPlugin . Name ( ) == "kubenet" {
netNamespace = "none"
2015-04-27 17:45:10 +00:00
} else {
// Docker only exports ports from the pod infra container. Let's
// collect all of the relevant ports and export them.
for _ , container := range pod . Spec . Containers {
ports = append ( ports , container . Ports ... )
}
}
container := & api . Container {
2015-06-09 00:53:24 +00:00
Name : PodInfraContainerName ,
Image : dm . podInfraContainerImage ,
Ports : ports ,
ImagePullPolicy : podInfraContainerImagePullPolicy ,
2016-02-05 15:47:06 +00:00
Env : dm . podInfraContainerEnv ,
2015-04-27 17:45:10 +00:00
}
2015-06-09 00:53:24 +00:00
// No pod secrets for the infra container.
2016-02-12 19:33:32 +00:00
// The message isn't needed for the Infra container
2016-01-13 23:15:18 +00:00
if err , msg := dm . imagePuller . PullImage ( pod , container , nil ) ; err != nil {
return "" , err , msg
2015-04-27 17:45:10 +00:00
}
2015-10-30 05:42:25 +00:00
// Currently we don't care about restart count of infra container, just set it to 0.
2016-03-07 20:24:08 +00:00
id , err := dm . runContainerInPod ( pod , container , netNamespace , getIPCMode ( pod ) , getPidMode ( pod ) , "" , 0 )
2015-04-27 17:45:10 +00:00
if err != nil {
2016-01-13 23:15:18 +00:00
return "" , kubecontainer . ErrRunContainer , err . Error ( )
2015-04-27 17:45:10 +00:00
}
2016-01-13 23:15:18 +00:00
return kubecontainer . DockerID ( id . ID ) , nil , ""
2015-04-27 17:45:10 +00:00
}
2015-04-29 00:51:21 +00:00
// Structure keeping information on changes that need to happen for a pod. The semantics is as follows:
// - startInfraContainer is true if new Infra Containers have to be started and old one (if running) killed.
// Additionally if it is true then containersToKeep have to be empty
2015-09-15 02:25:13 +00:00
// - infraContainerId have to be set if and only if startInfraContainer is false. It stores dockerID of running Infra Container
2015-10-10 09:12:47 +00:00
// - containersToStart keeps indices of Specs of containers that have to be started and reasons why containers will be started.
2015-04-29 00:51:21 +00:00
// - containersToKeep stores mapping from dockerIDs of running containers to indices of their Specs for containers that
// should be kept running. If startInfraContainer is false then it contains an entry for infraContainerId (mapped to -1).
// It shouldn't be the case where containersToStart is empty and containersToKeep contains only infraContainerId. In such case
// Infra Container should be killed, hence it's removed from this map.
// - all running containers which are NOT contained in containersToKeep should be killed.
2015-11-25 09:26:38 +00:00
type podContainerChangesSpec struct {
2015-04-29 00:51:21 +00:00
StartInfraContainer bool
2015-10-29 11:41:22 +00:00
InfraChanged bool
2015-12-24 23:46:56 +00:00
InfraContainerId kubecontainer . DockerID
2015-10-10 09:12:47 +00:00
ContainersToStart map [ int ] string
2015-12-24 23:46:56 +00:00
ContainersToKeep map [ kubecontainer . DockerID ] int
2015-04-29 00:51:21 +00:00
}
2015-11-25 09:26:38 +00:00
func ( dm * DockerManager ) computePodContainerChanges ( pod * api . Pod , podStatus * kubecontainer . PodStatus ) ( podContainerChangesSpec , error ) {
2015-06-09 21:01:23 +00:00
start := time . Now ( )
defer func ( ) {
metrics . ContainerManagerLatency . WithLabelValues ( "computePodContainerChanges" ) . Observe ( metrics . SinceInMicroseconds ( start ) )
} ( )
2016-02-20 20:07:23 +00:00
glog . V ( 5 ) . Infof ( "Syncing Pod %q: %+v" , format . Pod ( pod ) , pod )
2015-04-29 00:51:21 +00:00
2015-10-10 09:12:47 +00:00
containersToStart := make ( map [ int ] string )
2015-12-24 23:46:56 +00:00
containersToKeep := make ( map [ kubecontainer . DockerID ] int )
2015-04-29 00:51:21 +00:00
var err error
2015-12-24 23:46:56 +00:00
var podInfraContainerID kubecontainer . DockerID
2015-04-29 00:51:21 +00:00
var changed bool
2015-12-05 00:06:25 +00:00
podInfraContainerStatus := podStatus . FindContainerStatusByName ( PodInfraContainerName )
if podInfraContainerStatus != nil && podInfraContainerStatus . State == kubecontainer . ContainerStateRunning {
2015-12-07 21:31:02 +00:00
glog . V ( 4 ) . Infof ( "Found pod infra container for %q" , format . Pod ( pod ) )
2015-12-05 00:06:25 +00:00
changed , err = dm . podInfraContainerChanged ( pod , podInfraContainerStatus )
2015-04-29 00:51:21 +00:00
if err != nil {
2015-11-25 09:26:38 +00:00
return podContainerChangesSpec { } , err
2015-04-29 00:51:21 +00:00
}
}
2015-10-07 16:53:27 +00:00
createPodInfraContainer := true
2015-12-05 00:06:25 +00:00
if podInfraContainerStatus == nil || podInfraContainerStatus . State != kubecontainer . ContainerStateRunning {
2015-12-07 21:31:02 +00:00
glog . V ( 2 ) . Infof ( "Need to restart pod infra container for %q because it is not found" , format . Pod ( pod ) )
2015-04-29 00:51:21 +00:00
} else if changed {
2015-12-07 21:31:02 +00:00
glog . V ( 2 ) . Infof ( "Need to restart pod infra container for %q because it is changed" , format . Pod ( pod ) )
2015-04-29 00:51:21 +00:00
} else {
2015-12-07 21:31:02 +00:00
glog . V ( 4 ) . Infof ( "Pod infra container looks good, keep it %q" , format . Pod ( pod ) )
2015-04-29 00:51:21 +00:00
createPodInfraContainer = false
2015-12-24 23:46:56 +00:00
podInfraContainerID = kubecontainer . DockerID ( podInfraContainerStatus . ID . ID )
2015-04-29 00:51:21 +00:00
containersToKeep [ podInfraContainerID ] = - 1
}
for index , container := range pod . Spec . Containers {
2015-05-15 23:14:08 +00:00
expectedHash := kubecontainer . HashContainer ( & container )
2015-04-29 00:51:21 +00:00
2015-12-05 00:06:25 +00:00
containerStatus := podStatus . FindContainerStatusByName ( container . Name )
if containerStatus == nil || containerStatus . State != kubecontainer . ContainerStateRunning {
if kubecontainer . ShouldContainerBeRestarted ( & container , pod , podStatus ) {
2015-04-29 00:51:21 +00:00
// If we are here it means that the container is dead and should be restarted, or never existed and should
// be created. We may be inserting this ID again if the container has changed and it has
// RestartPolicy::Always, but it's not a big deal.
2015-10-10 09:12:47 +00:00
message := fmt . Sprintf ( "Container %+v is dead, but RestartPolicy says that we should restart it." , container )
glog . V ( 3 ) . Info ( message )
containersToStart [ index ] = message
2015-04-29 00:51:21 +00:00
}
continue
}
2015-12-24 23:46:56 +00:00
containerID := kubecontainer . DockerID ( containerStatus . ID . ID )
2015-12-05 00:06:25 +00:00
hash := containerStatus . Hash
2015-12-07 21:31:02 +00:00
glog . V ( 3 ) . Infof ( "pod %q container %q exists as %v" , format . Pod ( pod ) , container . Name , containerID )
2015-04-29 00:51:21 +00:00
if createPodInfraContainer {
// createPodInfraContainer == true and Container exists
2015-10-07 16:53:27 +00:00
// If we're creating infra container everything will be killed anyway
2015-04-29 00:51:21 +00:00
// If RestartPolicy is Always or OnFailure we restart containers that were running before we
// killed them when restarting Infra Container.
if pod . Spec . RestartPolicy != api . RestartPolicyNever {
2015-10-10 09:12:47 +00:00
message := fmt . Sprintf ( "Infra Container is being recreated. %q will be restarted." , container . Name )
glog . V ( 1 ) . Info ( message )
containersToStart [ index ] = message
2015-04-29 00:51:21 +00:00
}
continue
}
// At this point, the container is running and pod infra container is good.
// We will look for changes and check healthiness for the container.
containerChanged := hash != 0 && hash != expectedHash
if containerChanged {
2015-12-07 21:31:02 +00:00
message := fmt . Sprintf ( "pod %q container %q hash changed (%d vs %d), it will be killed and re-created." , format . Pod ( pod ) , container . Name , hash , expectedHash )
2015-10-10 09:12:47 +00:00
glog . Info ( message )
containersToStart [ index ] = message
2015-04-29 00:51:21 +00:00
continue
}
2015-12-05 00:06:25 +00:00
liveness , found := dm . livenessManager . Get ( containerStatus . ID )
2015-10-19 22:15:59 +00:00
if ! found || liveness == proberesults . Success {
2015-04-29 00:51:21 +00:00
containersToKeep [ containerID ] = index
continue
}
2015-10-06 20:14:26 +00:00
if pod . Spec . RestartPolicy != api . RestartPolicyNever {
2015-12-07 21:31:02 +00:00
message := fmt . Sprintf ( "pod %q container %q is unhealthy, it will be killed and re-created." , format . Pod ( pod ) , container . Name )
2015-10-10 09:12:47 +00:00
glog . Info ( message )
containersToStart [ index ] = message
2015-10-06 20:14:26 +00:00
}
2015-04-29 00:51:21 +00:00
}
// After the loop one of the following should be true:
// - createPodInfraContainer is true and containersToKeep is empty.
// (In fact, when createPodInfraContainer is false, containersToKeep will not be touched).
// - createPodInfraContainer is false and containersToKeep contains at least ID of Infra Container
// If Infra container is the last running one, we don't want to keep it.
if ! createPodInfraContainer && len ( containersToStart ) == 0 && len ( containersToKeep ) == 1 {
2015-12-24 23:46:56 +00:00
containersToKeep = make ( map [ kubecontainer . DockerID ] int )
2015-04-29 00:51:21 +00:00
}
2015-11-25 09:26:38 +00:00
return podContainerChangesSpec {
2015-04-29 00:51:21 +00:00
StartInfraContainer : createPodInfraContainer ,
2015-10-29 11:41:22 +00:00
InfraChanged : changed ,
2015-04-29 00:51:21 +00:00
InfraContainerId : podInfraContainerID ,
ContainersToStart : containersToStart ,
ContainersToKeep : containersToKeep ,
} , nil
}
2015-05-01 01:37:15 +00:00
// Sync the running pod to match the specified desired pod.
2016-03-09 02:58:24 +00:00
func ( dm * DockerManager ) SyncPod ( pod * api . Pod , _ api . PodStatus , podStatus * kubecontainer . PodStatus , pullSecrets [ ] api . Secret , backOff * flowcontrol . Backoff ) ( result kubecontainer . PodSyncResult ) {
2015-06-09 21:01:23 +00:00
start := time . Now ( )
defer func ( ) {
metrics . ContainerManagerLatency . WithLabelValues ( "SyncPod" ) . Observe ( metrics . SinceInMicroseconds ( start ) )
} ( )
2015-12-05 00:06:25 +00:00
containerChanges , err := dm . computePodContainerChanges ( pod , podStatus )
2015-05-01 01:37:15 +00:00
if err != nil {
2016-01-13 23:15:18 +00:00
result . Fail ( err )
return
2015-05-01 01:37:15 +00:00
}
2015-12-07 21:31:02 +00:00
glog . V ( 3 ) . Infof ( "Got container changes for pod %q: %+v" , format . Pod ( pod ) , containerChanges )
2015-05-01 01:37:15 +00:00
2015-10-29 11:41:22 +00:00
if containerChanges . InfraChanged {
ref , err := api . GetReference ( pod )
if err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Couldn't make a ref to pod %q: '%v'" , format . Pod ( pod ) , err )
2015-10-29 11:41:22 +00:00
}
2015-11-13 22:30:01 +00:00
dm . recorder . Eventf ( ref , api . EventTypeNormal , "InfraChanged" , "Pod infrastructure changed, it will be killed and re-created." )
2015-10-29 11:41:22 +00:00
}
2015-05-01 01:37:15 +00:00
if containerChanges . StartInfraContainer || ( len ( containerChanges . ContainersToKeep ) == 0 && len ( containerChanges . ContainersToStart ) == 0 ) {
if len ( containerChanges . ContainersToKeep ) == 0 && len ( containerChanges . ContainersToStart ) == 0 {
2015-12-07 21:31:02 +00:00
glog . V ( 4 ) . Infof ( "Killing Infra Container for %q because all other containers are dead." , format . Pod ( pod ) )
2015-05-01 01:37:15 +00:00
} else {
2015-12-07 21:31:02 +00:00
glog . V ( 4 ) . Infof ( "Killing Infra Container for %q, will start new one" , format . Pod ( pod ) )
2015-05-01 01:37:15 +00:00
}
// Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
2015-12-31 08:41:05 +00:00
// TODO(random-liu): We'll use pod status directly in the future
2016-01-13 23:15:18 +00:00
killResult := dm . killPodWithSyncResult ( pod , kubecontainer . ConvertPodStatusToRunningPod ( podStatus ) )
result . AddPodSyncResult ( killResult )
if killResult . Error ( ) != nil {
return
2015-05-01 01:37:15 +00:00
}
} else {
2015-12-05 00:06:25 +00:00
// Otherwise kill any running containers in this pod which are not specified as ones to keep.
runningContainerStatues := podStatus . GetRunningContainerStatuses ( )
for _ , containerStatus := range runningContainerStatues {
2015-12-24 23:46:56 +00:00
_ , keep := containerChanges . ContainersToKeep [ kubecontainer . DockerID ( containerStatus . ID . ID ) ]
2015-05-01 01:37:15 +00:00
if ! keep {
2016-01-13 23:15:18 +00:00
glog . V ( 3 ) . Infof ( "Killing unwanted container %q(id=%q) for pod %q" , containerStatus . Name , containerStatus . ID , format . Pod ( pod ) )
2015-08-20 01:57:58 +00:00
// attempt to find the appropriate container policy
var podContainer * api . Container
2015-10-10 09:12:47 +00:00
var killMessage string
2015-08-20 01:57:58 +00:00
for i , c := range pod . Spec . Containers {
2015-12-05 00:06:25 +00:00
if c . Name == containerStatus . Name {
2015-08-20 01:57:58 +00:00
podContainer = & pod . Spec . Containers [ i ]
2015-10-10 09:12:47 +00:00
killMessage = containerChanges . ContainersToStart [ i ]
2015-08-20 01:57:58 +00:00
break
}
}
2016-01-13 23:15:18 +00:00
killContainerResult := kubecontainer . NewSyncResult ( kubecontainer . KillContainer , containerStatus . Name )
result . AddSyncResult ( killContainerResult )
2015-12-05 00:06:25 +00:00
if err := dm . KillContainerInPod ( containerStatus . ID , podContainer , pod , killMessage ) ; err != nil {
2016-01-13 23:15:18 +00:00
killContainerResult . Fail ( kubecontainer . ErrKillContainer , err . Error ( ) )
glog . Errorf ( "Error killing container %q(id=%q) for pod %q: %v" , containerStatus . Name , containerStatus . ID , format . Pod ( pod ) , err )
return
2015-05-01 01:37:15 +00:00
}
}
}
}
2016-03-07 20:24:08 +00:00
// We pass the value of the podIP down to runContainerInPod, which in turn
// passes it to various other functions, in order to facilitate
// functionality that requires this value (hosts file and downward API)
// and avoid races determining the pod IP in cases where a container
// requires restart but the podIP isn't in the status manager yet.
//
// We default to the IP in the passed-in pod status, and overwrite it if the
// infra container needs to be (re)started.
podIP := ""
if podStatus != nil {
podIP = podStatus . IP
}
2015-05-01 01:37:15 +00:00
// If we should create infra container then we do it first.
podInfraContainerID := containerChanges . InfraContainerId
if containerChanges . StartInfraContainer && ( len ( containerChanges . ContainersToStart ) > 0 ) {
2015-12-07 21:31:02 +00:00
glog . V ( 4 ) . Infof ( "Creating pod infra container for %q" , format . Pod ( pod ) )
2016-01-13 23:15:18 +00:00
startContainerResult := kubecontainer . NewSyncResult ( kubecontainer . StartContainer , PodInfraContainerName )
result . AddSyncResult ( startContainerResult )
var msg string
podInfraContainerID , err , msg = dm . createPodInfraContainer ( pod )
2015-10-06 13:56:00 +00:00
if err != nil {
2016-01-13 23:15:18 +00:00
startContainerResult . Fail ( err , msg )
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Failed to create pod infra container: %v; Skipping pod %q" , err , format . Pod ( pod ) )
2016-01-13 23:15:18 +00:00
return
2015-10-06 13:56:00 +00:00
}
2015-09-08 07:34:10 +00:00
2016-01-13 23:15:18 +00:00
setupNetworkResult := kubecontainer . NewSyncResult ( kubecontainer . SetupNetwork , kubecontainer . GetPodFullName ( pod ) )
result . AddSyncResult ( setupNetworkResult )
2016-04-14 19:00:51 +00:00
if ! kubecontainer . IsHostNetworkPod ( pod ) {
2016-01-22 21:14:41 +00:00
// Call the networking plugin
2016-04-06 20:45:16 +00:00
err = dm . networkPlugin . SetUpPod ( pod . Namespace , pod . Name , podInfraContainerID . ContainerID ( ) )
2016-01-22 21:14:41 +00:00
if err != nil {
// TODO: (random-liu) There shouldn't be "Skipping pod" in sync result message
message := fmt . Sprintf ( "Failed to setup network for pod %q using network plugins %q: %v; Skipping pod" , format . Pod ( pod ) , dm . networkPlugin . Name ( ) , err )
setupNetworkResult . Fail ( kubecontainer . ErrSetupNetwork , message )
glog . Error ( message )
// Delete infra container
killContainerResult := kubecontainer . NewSyncResult ( kubecontainer . KillContainer , PodInfraContainerName )
result . AddSyncResult ( killContainerResult )
if delErr := dm . KillContainerInPod ( kubecontainer . ContainerID {
ID : string ( podInfraContainerID ) ,
Type : "docker" } , nil , pod , message ) ; delErr != nil {
killContainerResult . Fail ( kubecontainer . ErrKillContainer , delErr . Error ( ) )
glog . Warningf ( "Clear infra container failed for pod %q: %v" , format . Pod ( pod ) , delErr )
}
return
2015-10-06 13:56:00 +00:00
}
2015-09-06 11:53:20 +00:00
2016-02-01 22:56:56 +00:00
// Setup the host interface unless the pod is on the host's network (FIXME: move to networkPlugin when ready)
2016-04-04 22:27:20 +00:00
podInfraContainer , err := dm . client . InspectContainer ( string ( podInfraContainerID ) )
2016-02-01 22:56:56 +00:00
if err != nil {
glog . Errorf ( "Failed to inspect pod infra container: %v; Skipping pod %q" , err , format . Pod ( pod ) )
result . Fail ( err )
return
}
if dm . configureHairpinMode {
if err = hairpin . SetUpContainer ( podInfraContainer . State . Pid , network . DefaultInterfaceName ) ; err != nil {
glog . Warningf ( "Hairpin setup failed for pod %q: %v" , format . Pod ( pod ) , err )
}
2015-10-07 00:26:01 +00:00
}
2015-10-13 19:51:37 +00:00
2016-03-07 20:24:08 +00:00
// Overwrite the podIP passed in the pod status, since we just started the infra container.
podIP = dm . determineContainerIP ( pod . Name , pod . Namespace , podInfraContainer )
2016-02-01 22:56:56 +00:00
}
2015-05-01 01:37:15 +00:00
}
// Start everything
2015-05-08 18:54:44 +00:00
for idx := range containerChanges . ContainersToStart {
container := & pod . Spec . Containers [ idx ]
2016-01-13 23:15:18 +00:00
startContainerResult := kubecontainer . NewSyncResult ( kubecontainer . StartContainer , container . Name )
result . AddSyncResult ( startContainerResult )
2015-08-13 12:59:15 +00:00
// containerChanges.StartInfraContainer causes the containers to be restarted for config reasons
// ignore backoff
2016-01-13 23:15:18 +00:00
if ! containerChanges . StartInfraContainer {
isInBackOff , err , msg := dm . doBackOff ( pod , container , podStatus , backOff )
if isInBackOff {
startContainerResult . Fail ( err , msg )
glog . V ( 4 ) . Infof ( "Backing Off restarting container %+v in pod %v" , container , format . Pod ( pod ) )
continue
}
2015-08-13 12:59:15 +00:00
}
2015-12-07 21:31:02 +00:00
glog . V ( 4 ) . Infof ( "Creating container %+v in pod %v" , container , format . Pod ( pod ) )
2015-10-02 13:45:46 +00:00
err , msg := dm . imagePuller . PullImage ( pod , container , pullSecrets )
2015-05-08 18:54:44 +00:00
if err != nil {
2016-01-13 23:15:18 +00:00
startContainerResult . Fail ( err , msg )
2015-05-01 01:37:15 +00:00
continue
}
2015-05-08 18:54:44 +00:00
2015-10-20 18:03:32 +00:00
if container . SecurityContext != nil && container . SecurityContext . RunAsNonRoot != nil && * container . SecurityContext . RunAsNonRoot {
2015-08-10 17:30:34 +00:00
err := dm . verifyNonRoot ( container )
if err != nil {
2016-01-13 23:15:18 +00:00
startContainerResult . Fail ( kubecontainer . ErrVerifyNonRoot , err . Error ( ) )
2015-12-28 18:01:19 +00:00
glog . Errorf ( "Error running pod %q container %q: %v" , format . Pod ( pod ) , container . Name , err )
2015-08-10 17:30:34 +00:00
continue
}
}
2015-10-23 20:02:32 +00:00
// For a new container, the RestartCount should be 0
2015-10-30 05:42:25 +00:00
restartCount := 0
2015-12-05 00:06:25 +00:00
containerStatus := podStatus . FindContainerStatusByName ( container . Name )
if containerStatus != nil {
restartCount = containerStatus . RestartCount + 1
2015-10-23 20:02:32 +00:00
}
2015-05-01 01:37:15 +00:00
// TODO(dawnchen): Check RestartPolicy.DelaySeconds before restart a container
2015-09-21 15:34:02 +00:00
// Note: when configuring the pod's containers anything that can be configured by pointing
// to the namespace of the infra container should use namespaceMode. This includes things like the net namespace
// and IPC namespace. PID mode cannot point to another container right now.
// See createPodInfraContainer for infra container setup.
2015-05-01 01:37:15 +00:00
namespaceMode := fmt . Sprintf ( "container:%v" , podInfraContainerID )
2016-03-07 20:24:08 +00:00
_ , err = dm . runContainerInPod ( pod , container , namespaceMode , namespaceMode , getPidMode ( pod ) , podIP , restartCount )
2015-05-01 01:37:15 +00:00
if err != nil {
2016-01-13 23:15:18 +00:00
startContainerResult . Fail ( kubecontainer . ErrRunContainer , err . Error ( ) )
2015-05-01 01:37:15 +00:00
// TODO(bburns) : Perhaps blacklist a container after N failures?
2015-12-28 18:01:19 +00:00
glog . Errorf ( "Error running pod %q container %q: %v" , format . Pod ( pod ) , container . Name , err )
2015-05-08 18:54:44 +00:00
continue
2015-05-01 01:37:15 +00:00
}
2015-05-08 18:54:44 +00:00
// Successfully started the container; clear the entry in the failure
2015-05-01 01:37:15 +00:00
}
2016-01-13 23:15:18 +00:00
return
2015-05-01 01:37:15 +00:00
}
2015-08-10 17:30:34 +00:00
// verifyNonRoot returns an error if the container or image will run as the root user.
func ( dm * DockerManager ) verifyNonRoot ( container * api . Container ) error {
if securitycontext . HasRunAsUser ( container ) {
if securitycontext . HasRootRunAsUser ( container ) {
return fmt . Errorf ( "container's runAsUser breaks non-root policy" )
}
return nil
}
imgRoot , err := dm . isImageRoot ( container . Image )
if err != nil {
2016-01-26 00:31:32 +00:00
return fmt . Errorf ( "can't tell if image runs as root: %v" , err )
2015-08-10 17:30:34 +00:00
}
if imgRoot {
return fmt . Errorf ( "container has no runAsUser and image will run as root" )
}
return nil
}
// isImageRoot returns true if the user directive is not set on the image, the user is set to 0
// or the user is set to root. If there is an error inspecting the image this method will return
// false and return the error.
func ( dm * DockerManager ) isImageRoot ( image string ) ( bool , error ) {
img , err := dm . client . InspectImage ( image )
if err != nil {
return false , err
}
if img == nil || img . Config == nil {
return false , fmt . Errorf ( "unable to inspect image %s, nil Config" , image )
}
user := getUidFromUser ( img . Config . User )
// if no user is defined container will run as root
if user == "" {
return true , nil
}
// do not allow non-numeric user directives
uid , err := strconv . Atoi ( user )
if err != nil {
2016-01-26 00:31:32 +00:00
return false , fmt . Errorf ( "non-numeric user (%s) is not allowed" , user )
2015-08-10 17:30:34 +00:00
}
// user is numeric, check for 0
return uid == 0 , nil
}
// getUidFromUser splits the uid out of a uid:gid string.
func getUidFromUser ( id string ) string {
if id == "" {
return id
}
// split instances where the id may contain uid:gid
if strings . Contains ( id , ":" ) {
return strings . Split ( id , ":" ) [ 0 ]
}
// no gid, just return the id
return id
}
2015-08-13 12:59:15 +00:00
2016-01-13 23:15:18 +00:00
// If all instances of a container are garbage collected, doBackOff will also return false, which means the container may be restarted before the
// backoff deadline. However, because that won't cause error and the chance is really slim, we can just ignore it for now.
// If a container is still in backoff, the function will return a brief backoff error and a detailed error message.
2016-03-09 02:58:24 +00:00
func ( dm * DockerManager ) doBackOff ( pod * api . Pod , container * api . Container , podStatus * kubecontainer . PodStatus , backOff * flowcontrol . Backoff ) ( bool , error , string ) {
2016-03-01 01:05:37 +00:00
var cStatus * kubecontainer . ContainerStatus
// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
// TODO(random-liu): Better define backoff start point; add unit and e2e test after we finalize this. (See github issue #22240)
for _ , c := range podStatus . ContainerStatuses {
if c . Name == container . Name && c . State == kubecontainer . ContainerStateExited {
cStatus = c
break
}
}
if cStatus != nil {
ts := cStatus . FinishedAt
2015-12-05 00:06:25 +00:00
// found a container that requires backoff
2015-08-13 12:59:15 +00:00
dockerName := KubeletContainerName {
PodFullName : kubecontainer . GetPodFullName ( pod ) ,
PodUID : pod . UID ,
ContainerName : container . Name ,
}
2016-02-04 00:40:04 +00:00
stableName , _ , _ := BuildDockerName ( dockerName , container )
2015-12-05 00:06:25 +00:00
if backOff . IsInBackOffSince ( stableName , ts ) {
2015-08-13 12:59:15 +00:00
if ref , err := kubecontainer . GenerateContainerRef ( pod , container ) ; err == nil {
2015-11-13 22:30:01 +00:00
dm . recorder . Eventf ( ref , api . EventTypeWarning , kubecontainer . BackOffStartContainer , "Back-off restarting failed docker container" )
2015-08-13 12:59:15 +00:00
}
2015-12-28 18:01:19 +00:00
err := fmt . Errorf ( "Back-off %s restarting failed container=%s pod=%s" , backOff . Get ( stableName ) , container . Name , format . Pod ( pod ) )
2015-09-16 14:07:25 +00:00
glog . Infof ( "%s" , err . Error ( ) )
2016-01-13 23:15:18 +00:00
return true , kubecontainer . ErrCrashLoopBackOff , err . Error ( )
2015-08-13 12:59:15 +00:00
}
2015-12-05 00:06:25 +00:00
backOff . Next ( stableName , ts )
2015-08-13 12:59:15 +00:00
}
2016-01-13 23:15:18 +00:00
return false , nil , ""
2015-08-13 12:59:15 +00:00
}
2015-09-15 16:43:59 +00:00
// getPidMode returns the pid mode to use on the docker container based on pod.Spec.HostPID.
func getPidMode ( pod * api . Pod ) string {
pidMode := ""
2015-09-14 21:56:51 +00:00
if pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . HostPID {
2016-01-22 21:14:41 +00:00
pidMode = namespaceModeHost
2015-09-15 16:43:59 +00:00
}
return pidMode
}
2015-08-10 08:14:01 +00:00
// getIPCMode returns the ipc mode to use on the docker container based on pod.Spec.HostIPC.
2015-09-21 15:34:02 +00:00
func getIPCMode ( pod * api . Pod ) string {
ipcMode := ""
2015-09-14 21:56:51 +00:00
if pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . HostIPC {
2016-01-22 21:14:41 +00:00
ipcMode = namespaceModeHost
2015-08-10 08:14:01 +00:00
}
return ipcMode
}
2015-09-09 21:00:41 +00:00
2016-01-22 21:14:41 +00:00
// GetNetNS returns the network namespace path for the given container
func ( dm * DockerManager ) GetNetNS ( containerID kubecontainer . ContainerID ) ( string , error ) {
2015-10-07 17:58:05 +00:00
inspectResult , err := dm . client . InspectContainer ( containerID . ID )
2015-09-09 21:00:41 +00:00
if err != nil {
glog . Errorf ( "Error inspecting container: '%v'" , err )
return "" , err
}
netnsPath := fmt . Sprintf ( DockerNetnsFmt , inspectResult . State . Pid )
return netnsPath , nil
}
2015-10-03 15:39:15 +00:00
// Garbage collection of dead containers
2015-10-05 22:35:32 +00:00
func ( dm * DockerManager ) GarbageCollect ( gcPolicy kubecontainer . ContainerGCPolicy ) error {
return dm . containerGC . GarbageCollect ( gcPolicy )
2015-10-03 15:39:15 +00:00
}
2015-11-10 22:18:47 +00:00
2015-12-05 00:06:25 +00:00
func ( dm * DockerManager ) GetPodStatus ( uid types . UID , name , namespace string ) ( * kubecontainer . PodStatus , error ) {
podStatus := & kubecontainer . PodStatus { ID : uid , Name : name , Namespace : namespace }
// Now we retain restart count of container as a docker label. Each time a container
2016-01-13 22:40:56 +00:00
// restarts, pod will read the restart count from the registered dead container, increment
2015-12-05 00:06:25 +00:00
// it to get the new restart count, and then add a label with the new restart count on
// the newly started container.
// However, there are some limitations of this method:
// 1. When all dead containers were garbage collected, the container status could
// not get the historical value and would be *inaccurate*. Fortunately, the chance
// is really slim.
// 2. When working with old version containers which have no restart count label,
// we can only assume their restart count is 0.
// Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
// these limitations now.
var containerStatuses [ ] * kubecontainer . ContainerStatus
// We have added labels like pod name and pod namespace, it seems that we can do filtered list here.
// However, there may be some old containers without these labels, so at least now we can't do that.
2015-12-31 08:41:05 +00:00
// TODO(random-liu): Do only one list and pass in the list result in the future
// TODO(random-liu): Add filter when we are sure that all the containers have the labels
2016-04-04 08:56:49 +00:00
containers , err := dm . client . ListContainers ( dockertypes . ContainerListOptions { All : true } )
2015-12-05 00:06:25 +00:00
if err != nil {
return podStatus , err
}
// Loop through list of running and exited docker containers to construct
// the statuses. We assume docker returns a list of containers sorted in
// reverse by time.
// TODO: optimization: set maximum number of containers per container name to examine.
for _ , c := range containers {
if len ( c . Names ) == 0 {
continue
}
dockerName , _ , err := ParseDockerName ( c . Names [ 0 ] )
if err != nil {
continue
}
if dockerName . PodUID != uid {
continue
}
result , ip , err := dm . inspectContainer ( c . ID , name , namespace )
if err != nil {
2016-04-04 22:27:20 +00:00
if _ , ok := err . ( containerNotFoundError ) ; ok {
2016-03-05 01:33:23 +00:00
// https://github.com/kubernetes/kubernetes/issues/22541
// Sometimes when docker's state is corrupt, a container can be listed
// but couldn't be inspected. We fake a status for this container so
// that we can still return a status for the pod to sync.
result = & kubecontainer . ContainerStatus {
ID : kubecontainer . DockerID ( c . ID ) . ContainerID ( ) ,
Name : dockerName . ContainerName ,
State : kubecontainer . ContainerStateUnknown ,
}
glog . Errorf ( "Unable to inspect container %q: %v" , c . ID , err )
} else {
return podStatus , err
}
2015-12-05 00:06:25 +00:00
}
containerStatuses = append ( containerStatuses , result )
if ip != "" {
podStatus . IP = ip
}
}
podStatus . ContainerStatuses = containerStatuses
return podStatus , nil
2015-11-10 22:18:47 +00:00
}
2016-03-14 08:35:49 +00:00
// getVersionInfo returns apiVersion & daemonVersion of docker runtime
func ( dm * DockerManager ) getVersionInfo ( ) ( kubecontainer . Version , kubecontainer . Version , error ) {
apiVersion , err := dm . APIVersion ( )
if err != nil {
return nil , nil , err
}
daemonVersion , err := dm . Version ( )
if err != nil {
return nil , nil , err
}
return apiVersion , daemonVersion , nil
}