2014-06-06 23:40:48 +00:00
/ *
2015-05-01 16:19:44 +00:00
Copyright 2015 The Kubernetes Authors All rights reserved .
2014-06-06 23:40:48 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubelet
2015-06-10 20:31:22 +00:00
// Note: if you change code in this file, you might need to change code in
// contrib/mesos/pkg/executor/.
2014-06-06 23:40:48 +00:00
import (
2015-10-21 17:17:27 +00:00
"bytes"
2015-03-06 07:56:30 +00:00
"errors"
2014-06-06 23:40:48 +00:00
"fmt"
2014-09-24 21:27:10 +00:00
"io"
2014-11-12 05:21:40 +00:00
"io/ioutil"
"net"
2014-06-06 23:40:48 +00:00
"net/http"
2014-11-07 06:41:16 +00:00
"os"
2014-07-29 17:20:50 +00:00
"path"
2015-10-07 19:19:06 +00:00
"path/filepath"
2014-10-28 00:29:55 +00:00
"sort"
2014-06-06 23:40:48 +00:00
"strings"
2015-05-05 10:19:54 +00:00
"sync"
2014-06-06 23:40:48 +00:00
"time"
2015-08-05 22:05:17 +00:00
"github.com/golang/glog"
2015-10-16 03:00:28 +00:00
cadvisorapi "github.com/google/cadvisor/info/v1"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2015-09-21 18:06:38 +00:00
apierrors "k8s.io/kubernetes/pkg/api/errors"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api/resource"
2015-09-17 22:21:55 +00:00
"k8s.io/kubernetes/pkg/api/unversioned"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api/validation"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/record"
2015-09-03 21:43:19 +00:00
client "k8s.io/kubernetes/pkg/client/unversioned"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/fieldpath"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/envvars"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/network"
2015-10-12 23:28:23 +00:00
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
2015-08-25 17:39:41 +00:00
"k8s.io/kubernetes/pkg/kubelet/prober"
2015-10-19 22:15:59 +00:00
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/rkt"
2015-09-11 19:22:01 +00:00
"k8s.io/kubernetes/pkg/kubelet/status"
2015-10-09 17:24:31 +00:00
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
2015-10-14 03:46:32 +00:00
kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/runtime"
2015-10-07 19:19:06 +00:00
"k8s.io/kubernetes/pkg/securitycontext"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util"
2015-08-10 22:08:31 +00:00
"k8s.io/kubernetes/pkg/util/bandwidth"
2015-10-20 18:49:39 +00:00
"k8s.io/kubernetes/pkg/util/chmod"
"k8s.io/kubernetes/pkg/util/chown"
2015-10-14 05:18:37 +00:00
utilerrors "k8s.io/kubernetes/pkg/util/errors"
2015-09-14 09:51:40 +00:00
kubeio "k8s.io/kubernetes/pkg/util/io"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/util/mount"
2015-09-21 18:00:04 +00:00
nodeutil "k8s.io/kubernetes/pkg/util/node"
2015-08-04 00:28:33 +00:00
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/procfs"
2015-10-07 19:19:06 +00:00
"k8s.io/kubernetes/pkg/util/selinux"
2015-09-09 17:45:01 +00:00
"k8s.io/kubernetes/pkg/util/sets"
2015-09-21 18:06:38 +00:00
"k8s.io/kubernetes/pkg/version"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/watch"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/third_party/golang/expansion"
2014-06-06 23:40:48 +00:00
)
2015-03-06 00:37:08 +00:00
const (
2015-05-04 20:14:55 +00:00
// Max amount of time to wait for the container runtime to come up.
maxWaitForContainerRuntime = 5 * time . Minute
2015-02-23 21:04:45 +00:00
2015-09-21 18:06:38 +00:00
// nodeStatusUpdateRetry specifies how many times kubelet retries when posting node status failed.
nodeStatusUpdateRetry = 5
2015-04-27 20:03:55 +00:00
// Location of container logs.
containerLogsDir = "/var/log/containers"
2015-08-13 12:59:15 +00:00
2015-10-15 04:58:20 +00:00
// max backoff period, exported for the e2e test
MaxContainerBackOff = 300 * time . Second
2015-08-19 00:52:26 +00:00
// Capacity of the channel for storing pods to kill. A small number should
// suffice because a goroutine is dedicated to check the channel and does
// not block on anything else.
podKillingChannelCapacity = 50
2015-07-28 18:54:32 +00:00
// system default DNS resolver configuration
ResolvConfDefault = "/etc/resolv.conf"
2015-08-28 01:07:57 +00:00
// Minimum period for performing global cleanup tasks, i.e., housekeeping
// will not be performed more than once per housekeepingMinimumPeriod.
housekeepingMinimumPeriod = time . Second * 2
2015-10-21 17:17:27 +00:00
etcHostsPath = "/etc/hosts"
2015-03-06 00:37:08 +00:00
)
2015-02-20 03:17:44 +00:00
2015-03-06 07:56:30 +00:00
var (
2015-03-09 22:46:47 +00:00
// ErrContainerNotFound returned when a container in the given pod with the
// given container name was not found, amongst those managed by the kubelet.
2015-03-06 07:56:30 +00:00
ErrContainerNotFound = errors . New ( "no matching container" )
)
2014-07-15 20:24:41 +00:00
// SyncHandler is an interface implemented by Kubelet, for testability
type SyncHandler interface {
2015-08-19 00:52:26 +00:00
HandlePodAdditions ( pods [ ] * api . Pod )
HandlePodUpdates ( pods [ ] * api . Pod )
HandlePodDeletions ( pods [ ] * api . Pod )
HandlePodSyncs ( pods [ ] * api . Pod )
HandlePodCleanups ( ) error
2014-07-15 20:24:41 +00:00
}
2015-10-06 01:20:57 +00:00
type SourcesReadyFn func ( sourcesSeen sets . String ) bool
2014-12-17 05:11:27 +00:00
2015-05-04 20:14:55 +00:00
// Wait for the container runtime to be up with a timeout.
func waitUntilRuntimeIsUp ( cr kubecontainer . Runtime , timeout time . Duration ) error {
var err error = nil
waitStart := time . Now ( )
for time . Since ( waitStart ) < timeout {
_ , err = cr . Version ( )
if err == nil {
return nil
}
time . Sleep ( 100 * time . Millisecond )
}
return err
}
2014-07-22 21:40:59 +00:00
// New creates a new Kubelet for use in main
func NewMainKubelet (
2015-01-07 02:31:40 +00:00
hostname string ,
2015-06-12 15:40:34 +00:00
nodeName string ,
2015-01-07 02:31:40 +00:00
dockerClient dockertools . DockerInterface ,
2015-02-27 18:44:44 +00:00
kubeClient client . Interface ,
2015-01-07 02:31:40 +00:00
rootDirectory string ,
2015-01-21 00:59:26 +00:00
podInfraContainerImage string ,
2015-01-07 02:31:40 +00:00
resyncInterval time . Duration ,
2014-09-26 04:24:44 +00:00
pullQPS float32 ,
2014-10-28 00:29:55 +00:00
pullBurst int ,
2015-09-09 08:57:21 +00:00
eventQPS float32 ,
eventBurst int ,
2015-10-05 22:35:32 +00:00
containerGCPolicy kubecontainer . ContainerGCPolicy ,
2015-03-05 18:49:36 +00:00
sourcesReady SourcesReadyFn ,
2015-05-20 21:21:03 +00:00
registerNode bool ,
2015-09-16 04:53:33 +00:00
registerSchedulable bool ,
2015-06-12 17:20:26 +00:00
standaloneMode bool ,
2014-11-12 05:21:40 +00:00
clusterDomain string ,
2015-01-08 15:25:14 +00:00
clusterDNS net . IP ,
2014-11-23 15:47:25 +00:00
masterServiceNamespace string ,
2015-03-19 05:18:31 +00:00
volumePlugins [ ] volume . VolumePlugin ,
2015-03-19 23:14:13 +00:00
networkPlugins [ ] network . NetworkPlugin ,
networkPluginName string ,
2015-03-03 06:06:20 +00:00
streamingConnectionIdleTimeout time . Duration ,
2015-03-06 07:56:30 +00:00
recorder record . EventRecorder ,
2015-02-23 21:04:45 +00:00
cadvisorInterface cadvisor . Interface ,
2015-03-23 22:31:13 +00:00
imageGCPolicy ImageGCPolicy ,
2015-05-12 08:24:08 +00:00
diskSpacePolicy DiskSpacePolicy ,
2015-03-31 11:17:12 +00:00
cloud cloudprovider . Interface ,
2015-04-14 00:30:57 +00:00
nodeStatusUpdateFrequency time . Duration ,
2015-04-21 00:26:40 +00:00
resourceContainer string ,
2015-04-24 00:07:52 +00:00
osInterface kubecontainer . OSInterface ,
2015-05-01 21:24:07 +00:00
cgroupRoot string ,
2015-05-04 14:43:10 +00:00
containerRuntime string ,
2015-08-17 17:03:45 +00:00
rktPath string ,
2015-09-01 02:25:26 +00:00
rktStage1Image string ,
2015-05-12 16:59:02 +00:00
mounter mount . Interface ,
2015-09-14 09:51:40 +00:00
writer kubeio . Writer ,
2015-10-20 18:49:39 +00:00
chownRunner chown . Interface ,
chmodRunner chmod . Interface ,
2015-05-11 21:07:24 +00:00
dockerDaemonContainer string ,
2015-05-19 23:19:12 +00:00
systemContainer string ,
2015-03-17 14:43:49 +00:00
configureCBR0 bool ,
2015-06-24 18:10:10 +00:00
podCIDR string ,
2015-09-16 04:53:33 +00:00
reconcileCIDR bool ,
2015-05-27 12:51:01 +00:00
pods int ,
2015-07-28 18:54:32 +00:00
dockerExecHandler dockertools . ExecHandler ,
2015-09-01 13:27:01 +00:00
resolverConfig string ,
2015-08-13 14:05:32 +00:00
cpuCFSQuota bool ,
2015-09-30 10:55:37 +00:00
daemonEndpoints * api . NodeDaemonEndpoints ,
2015-10-20 21:49:44 +00:00
oomAdjuster * oom . OOMAdjuster ,
serializeImagePulls bool ,
) ( * Kubelet , error ) {
2015-01-12 00:42:11 +00:00
if rootDirectory == "" {
return nil , fmt . Errorf ( "invalid root directory %q" , rootDirectory )
}
2015-01-07 02:31:40 +00:00
if resyncInterval <= 0 {
return nil , fmt . Errorf ( "invalid sync frequency %d" , resyncInterval )
}
2015-05-19 22:52:12 +00:00
if systemContainer != "" && cgroupRoot == "" {
return nil , fmt . Errorf ( "invalid configuration: system container was specified and cgroup root was not specified" )
}
2015-04-25 04:57:19 +00:00
dockerClient = dockertools . NewInstrumentedDockerInterface ( dockerClient )
2015-03-06 00:37:08 +00:00
2015-01-26 21:44:53 +00:00
serviceStore := cache . NewStore ( cache . MetaNamespaceKeyFunc )
2015-01-16 21:39:31 +00:00
if kubeClient != nil {
2015-02-27 18:44:44 +00:00
// TODO: cache.NewListWatchFromClient is limited as it takes a client implementation rather
// than an interface. There is no way to construct a list+watcher using resource name.
listWatch := & cache . ListWatch {
ListFunc : func ( ) ( runtime . Object , error ) {
2015-10-13 10:11:48 +00:00
return kubeClient . Services ( api . NamespaceAll ) . List ( labels . Everything ( ) , fields . Everything ( ) )
2015-02-27 18:44:44 +00:00
} ,
2015-10-26 09:34:45 +00:00
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
2015-10-16 13:07:14 +00:00
return kubeClient . Services ( api . NamespaceAll ) . Watch ( labels . Everything ( ) , fields . Everything ( ) , options )
2015-02-27 18:44:44 +00:00
} ,
}
cache . NewReflector ( listWatch , & api . Service { } , serviceStore , 0 ) . Run ( )
2015-01-16 21:39:31 +00:00
}
2015-08-08 01:52:23 +00:00
serviceLister := & cache . StoreToServiceLister { Store : serviceStore }
2015-01-08 15:25:14 +00:00
2015-09-21 18:00:04 +00:00
nodeStore := cache . NewStore ( cache . MetaNamespaceKeyFunc )
if kubeClient != nil {
// TODO: cache.NewListWatchFromClient is limited as it takes a client implementation rather
// than an interface. There is no way to construct a list+watcher using resource name.
fieldSelector := fields . Set { client . ObjectNameField : nodeName } . AsSelector ( )
listWatch := & cache . ListWatch {
ListFunc : func ( ) ( runtime . Object , error ) {
return kubeClient . Nodes ( ) . List ( labels . Everything ( ) , fieldSelector )
} ,
2015-10-26 09:34:45 +00:00
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
2015-10-16 13:07:14 +00:00
return kubeClient . Nodes ( ) . Watch ( labels . Everything ( ) , fieldSelector , options )
2015-09-21 18:00:04 +00:00
} ,
}
cache . NewReflector ( listWatch , & api . Node { } , nodeStore , 0 ) . Run ( )
}
nodeLister := & cache . StoreToNodeLister { Store : nodeStore }
2015-09-09 14:18:17 +00:00
// TODO: get the real node object of ourself,
// and use the real node name and UID.
2015-03-27 20:12:48 +00:00
// TODO: what is namespace for node?
nodeRef := & api . ObjectReference {
Kind : "Node" ,
2015-06-12 15:40:34 +00:00
Name : nodeName ,
UID : types . UID ( nodeName ) ,
2015-03-27 20:12:48 +00:00
Namespace : "" ,
}
2015-05-12 08:24:08 +00:00
diskSpaceManager , err := newDiskSpaceManager ( cadvisorInterface , diskSpacePolicy )
if err != nil {
return nil , fmt . Errorf ( "failed to initialize disk manager: %v" , err )
}
2015-09-11 19:22:01 +00:00
statusManager := status . NewManager ( kubeClient )
2015-04-23 21:16:59 +00:00
containerRefManager := kubecontainer . NewRefManager ( )
2015-04-16 00:40:07 +00:00
volumeManager := newVolumeManager ( )
2015-03-14 17:13:20 +00:00
2015-04-11 00:29:56 +00:00
oomWatcher := NewOOMWatcher ( cadvisorInterface , recorder )
2015-01-12 00:42:11 +00:00
klet := & Kubelet {
2015-04-17 22:54:28 +00:00
hostname : hostname ,
2015-06-12 15:40:34 +00:00
nodeName : nodeName ,
2015-04-17 22:54:28 +00:00
dockerClient : dockerClient ,
kubeClient : kubeClient ,
rootDirectory : rootDirectory ,
resyncInterval : resyncInterval ,
2015-04-23 21:16:59 +00:00
containerRefManager : containerRefManager ,
2015-04-17 22:54:28 +00:00
httpClient : & http . Client { } ,
sourcesReady : sourcesReady ,
2015-09-21 18:06:38 +00:00
registerNode : registerNode ,
2015-09-16 04:53:33 +00:00
registerSchedulable : registerSchedulable ,
2015-06-12 17:20:26 +00:00
standaloneMode : standaloneMode ,
2015-04-17 22:54:28 +00:00
clusterDomain : clusterDomain ,
clusterDNS : clusterDNS ,
serviceLister : serviceLister ,
2015-09-21 18:00:04 +00:00
nodeLister : nodeLister ,
2015-05-05 10:19:54 +00:00
runtimeMutex : sync . Mutex { } ,
runtimeUpThreshold : maxWaitForContainerRuntime ,
lastTimestampRuntimeUp : time . Time { } ,
2015-04-17 22:54:28 +00:00
masterServiceNamespace : masterServiceNamespace ,
2015-01-08 20:41:38 +00:00
streamingConnectionIdleTimeout : streamingConnectionIdleTimeout ,
2015-03-03 06:06:20 +00:00
recorder : recorder ,
2015-03-06 07:56:30 +00:00
cadvisor : cadvisorInterface ,
2015-05-12 08:24:08 +00:00
diskSpaceManager : diskSpaceManager ,
2015-03-20 16:37:08 +00:00
statusManager : statusManager ,
2015-04-16 00:40:07 +00:00
volumeManager : volumeManager ,
2015-03-23 22:31:13 +00:00
cloud : cloud ,
2015-03-27 20:12:48 +00:00
nodeRef : nodeRef ,
2015-09-21 18:06:38 +00:00
nodeStatusUpdateFrequency : nodeStatusUpdateFrequency ,
2015-04-14 00:30:57 +00:00
resourceContainer : resourceContainer ,
2015-04-21 00:26:40 +00:00
os : osInterface ,
2015-04-11 00:29:56 +00:00
oomWatcher : oomWatcher ,
2015-04-24 00:07:52 +00:00
cgroupRoot : cgroupRoot ,
2015-05-04 14:43:10 +00:00
mounter : mounter ,
2015-10-20 18:49:39 +00:00
chmodRunner : chmodRunner ,
chownRunner : chownRunner ,
2015-09-14 09:51:40 +00:00
writer : writer ,
2015-05-11 21:07:24 +00:00
configureCBR0 : configureCBR0 ,
2015-09-21 18:06:38 +00:00
podCIDR : podCIDR ,
2015-09-16 04:53:33 +00:00
reconcileCIDR : reconcileCIDR ,
2015-05-18 22:32:32 +00:00
pods : pods ,
2015-06-17 22:31:46 +00:00
syncLoopMonitor : util . AtomicValue { } ,
2015-07-28 18:54:32 +00:00
resolverConfig : resolverConfig ,
2015-09-01 13:27:01 +00:00
cpuCFSQuota : cpuCFSQuota ,
2015-09-21 18:06:38 +00:00
daemonEndpoints : daemonEndpoints ,
2015-01-12 00:42:11 +00:00
}
2015-04-28 18:02:29 +00:00
if plug , err := network . InitNetworkPlugin ( networkPlugins , networkPluginName , & networkHost { klet } ) ; err != nil {
return nil , err
} else {
klet . networkPlugin = plug
}
2015-05-01 21:24:07 +00:00
2015-09-21 18:06:38 +00:00
machineInfo , err := klet . GetCachedMachineInfo ( )
2015-08-04 00:28:33 +00:00
if err != nil {
return nil , err
}
procFs := procfs . NewProcFs ( )
2015-10-15 04:58:20 +00:00
imageBackOff := util . NewBackOff ( resyncInterval , MaxContainerBackOff )
2015-10-19 22:15:59 +00:00
readinessManager := proberesults . NewManager ( )
klet . livenessManager = proberesults . NewManagerWithUpdates ( )
2015-05-01 21:24:07 +00:00
// Initialize the runtime.
switch containerRuntime {
case "docker" :
// Only supported one for now, continue.
2015-05-01 22:25:11 +00:00
klet . containerRuntime = dockertools . NewDockerManager (
dockerClient ,
recorder ,
2015-10-19 22:15:59 +00:00
klet . livenessManager ,
2015-05-01 22:25:11 +00:00
containerRefManager ,
2015-08-04 00:28:33 +00:00
machineInfo ,
2015-05-01 22:25:11 +00:00
podInfraContainerImage ,
pullQPS ,
pullBurst ,
containerLogsDir ,
osInterface ,
klet . networkPlugin ,
klet ,
klet . httpClient ,
2015-08-04 00:28:33 +00:00
dockerExecHandler ,
oomAdjuster ,
2015-09-01 13:27:01 +00:00
procFs ,
2015-10-02 13:45:46 +00:00
klet . cpuCFSQuota ,
2015-10-20 21:49:44 +00:00
imageBackOff ,
serializeImagePulls ,
)
2015-10-02 13:45:46 +00:00
2015-05-08 06:26:07 +00:00
case "rkt" :
2015-08-17 17:03:45 +00:00
conf := & rkt . Config {
Path : rktPath ,
2015-09-01 02:25:26 +00:00
Stage1Image : rktStage1Image ,
2015-08-17 17:03:45 +00:00
InsecureSkipVerify : true ,
}
2015-05-08 06:26:07 +00:00
rktRuntime , err := rkt . New (
conf ,
klet ,
recorder ,
containerRefManager ,
2015-10-19 22:15:59 +00:00
klet . livenessManager ,
2015-10-02 13:45:46 +00:00
klet . volumeManager ,
2015-10-20 21:49:44 +00:00
imageBackOff ,
serializeImagePulls ,
)
2015-05-08 06:26:07 +00:00
if err != nil {
return nil , err
}
klet . containerRuntime = rktRuntime
2015-09-28 22:46:29 +00:00
klet . imageManager = rkt . NewImageManager ( rktRuntime )
2015-05-12 16:59:02 +00:00
// No Docker daemon to put in a container.
dockerDaemonContainer = ""
2015-05-01 21:24:07 +00:00
default :
return nil , fmt . Errorf ( "unsupported container runtime %q specified" , containerRuntime )
}
2015-04-28 18:02:29 +00:00
2015-10-03 15:37:07 +00:00
// setup containerGC
2015-10-05 22:35:32 +00:00
containerGC , err := kubecontainer . NewContainerGC ( klet . containerRuntime , containerGCPolicy )
2015-10-03 15:37:07 +00:00
if err != nil {
return nil , err
}
klet . containerGC = containerGC
2015-09-26 00:29:08 +00:00
// setup imageManager
imageManager , err := newImageManager ( klet . containerRuntime , cadvisorInterface , recorder , nodeRef , imageGCPolicy )
if err != nil {
return nil , fmt . Errorf ( "failed to initialize image manager: %v" , err )
}
klet . imageManager = imageManager
2015-05-19 22:52:12 +00:00
// Setup container manager, can fail if the devices hierarchy is not mounted
// (it is required by Docker however).
2015-09-22 23:42:30 +00:00
containerManager , err := newContainerManager ( mounter , cadvisorInterface , dockerDaemonContainer , systemContainer , resourceContainer )
2015-05-12 16:59:02 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to create the Container Manager: %v" , err )
}
klet . containerManager = containerManager
2015-06-24 18:10:10 +00:00
go util . Until ( klet . syncNetworkStatus , 30 * time . Second , util . NeverStop )
2015-09-21 18:06:38 +00:00
if klet . kubeClient != nil {
// Start syncing node status immediately, this may set up things the runtime needs to run.
go util . Until ( klet . syncNodeStatus , klet . nodeStatusUpdateFrequency , util . NeverStop )
}
2015-06-24 18:10:10 +00:00
2015-05-04 20:14:55 +00:00
// Wait for the runtime to be up with a timeout.
if err := waitUntilRuntimeIsUp ( klet . containerRuntime , maxWaitForContainerRuntime ) ; err != nil {
return nil , fmt . Errorf ( "timed out waiting for %q to come up: %v" , containerRuntime , err )
}
2015-05-05 10:19:54 +00:00
klet . lastTimestampRuntimeUp = time . Now ( )
2015-05-04 20:14:55 +00:00
2015-05-01 22:25:11 +00:00
klet . runner = klet . containerRuntime
2015-10-12 23:28:23 +00:00
klet . podManager = kubepod . NewBasicPodManager ( kubepod . NewBasicMirrorClient ( klet . kubeClient ) )
2015-03-21 00:22:02 +00:00
2015-08-25 17:39:41 +00:00
klet . probeManager = prober . NewManager (
klet . resyncInterval ,
klet . statusManager ,
2015-10-19 22:15:59 +00:00
readinessManager ,
klet . livenessManager ,
klet . runner ,
containerRefManager ,
recorder )
2015-08-25 17:39:41 +00:00
2015-05-01 22:25:11 +00:00
runtimeCache , err := kubecontainer . NewRuntimeCache ( klet . containerRuntime )
2015-02-19 09:12:53 +00:00
if err != nil {
return nil , err
}
2015-04-14 01:04:11 +00:00
klet . runtimeCache = runtimeCache
klet . podWorkers = newPodWorkers ( runtimeCache , klet . syncPod , recorder )
2015-02-19 09:12:53 +00:00
2015-04-14 01:04:11 +00:00
metrics . Register ( runtimeCache )
2015-02-24 18:08:32 +00:00
2015-02-19 09:12:53 +00:00
if err = klet . setupDataDirs ( ) ; err != nil {
2015-01-12 00:42:11 +00:00
return nil , err
}
2015-02-19 09:12:53 +00:00
if err = klet . volumePluginMgr . InitPlugins ( volumePlugins , & volumeHost { klet } ) ; err != nil {
2014-11-23 15:47:25 +00:00
return nil , err
}
2015-01-12 00:42:11 +00:00
2015-04-27 20:03:55 +00:00
// If the container logs directory does not exist, create it.
if _ , err := os . Stat ( containerLogsDir ) ; err != nil {
if err := osInterface . Mkdir ( containerLogsDir , 0755 ) ; err != nil {
glog . Errorf ( "Failed to create directory %q: %v" , containerLogsDir , err )
2015-04-21 00:26:40 +00:00
}
}
2015-03-19 23:14:13 +00:00
2015-10-15 04:58:20 +00:00
klet . backOff = util . NewBackOff ( resyncInterval , MaxContainerBackOff )
2015-08-19 00:52:26 +00:00
klet . podKillingCh = make ( chan * kubecontainer . Pod , podKillingChannelCapacity )
2015-10-06 01:20:57 +00:00
klet . sourcesSeen = sets . NewString ( )
2015-01-12 00:42:11 +00:00
return klet , nil
2014-07-22 21:40:59 +00:00
}
2015-01-08 15:25:14 +00:00
type serviceLister interface {
List ( ) ( api . ServiceList , error )
}
2015-09-21 18:00:04 +00:00
type nodeLister interface {
List ( ) ( machines api . NodeList , err error )
GetNodeInfo ( id string ) ( * api . Node , error )
}
2014-07-10 12:26:24 +00:00
// Kubelet is the main kubelet implementation.
2014-06-06 23:40:48 +00:00
type Kubelet struct {
2015-04-09 01:56:58 +00:00
hostname string
2015-06-12 15:40:34 +00:00
nodeName string
2015-04-09 01:56:58 +00:00
dockerClient dockertools . DockerInterface
2015-04-14 01:04:11 +00:00
runtimeCache kubecontainer . RuntimeCache
2015-04-09 01:56:58 +00:00
kubeClient client . Interface
rootDirectory string
2015-05-08 18:48:26 +00:00
podWorkers PodWorkers
2015-04-09 01:56:58 +00:00
resyncInterval time . Duration
2015-10-27 01:50:57 +00:00
resyncTicker * time . Ticker
2015-04-09 01:56:58 +00:00
sourcesReady SourcesReadyFn
2015-10-06 01:20:57 +00:00
// sourcesSeen records the sources seen by kubelet. This set is not thread
// safe and should only be access by the main kubelet syncloop goroutine.
sourcesSeen sets . String
2014-07-15 20:24:41 +00:00
2015-10-12 23:28:23 +00:00
podManager kubepod . Manager
2015-03-18 18:43:59 +00:00
2014-11-04 00:16:31 +00:00
// Needed to report events for containers belonging to deleted/modified pods.
// Tracks references for reporting events
2015-03-26 18:44:52 +00:00
containerRefManager * kubecontainer . RefManager
2014-11-04 00:16:31 +00:00
2014-07-15 20:24:41 +00:00
// Optional, defaults to /logs/ from /var/log
2014-07-22 21:40:59 +00:00
logServer http . Handler
2014-08-07 18:15:11 +00:00
// Optional, defaults to simple Docker implementation
2015-05-11 22:32:51 +00:00
runner kubecontainer . ContainerCommandRunner
2014-09-03 20:39:56 +00:00
// Optional, client for http requests, defaults to empty client
2015-10-09 17:24:31 +00:00
httpClient kubetypes . HttpGetter
2014-10-09 00:05:04 +00:00
2015-03-06 07:56:30 +00:00
// cAdvisor used for container information.
cadvisor cadvisor . Interface
2014-10-28 00:29:55 +00:00
2015-09-21 18:06:38 +00:00
// Set to true to have the node register itself with the apiserver.
registerNode bool
2015-09-16 04:53:33 +00:00
// Set to true to have the node register itself as schedulable.
registerSchedulable bool
2015-07-01 01:49:18 +00:00
// for internal book keeping; access only from within registerWithApiserver
registrationCompleted bool
2015-05-20 21:21:03 +00:00
2015-06-12 17:20:26 +00:00
// Set to true if the kubelet is in standalone mode (i.e. setup without an apiserver)
standaloneMode bool
2014-11-12 05:21:40 +00:00
// If non-empty, use this for container DNS search.
clusterDomain string
// If non-nil, use this for container DNS server.
clusterDNS net . IP
2015-01-08 15:25:14 +00:00
masterServiceNamespace string
serviceLister serviceLister
2015-09-21 18:00:04 +00:00
nodeLister nodeLister
2014-11-23 15:47:25 +00:00
2015-07-03 20:29:14 +00:00
// Last timestamp when runtime responded on ping.
2015-05-05 10:19:54 +00:00
// Mutex is used to protect this value.
runtimeMutex sync . Mutex
runtimeUpThreshold time . Duration
lastTimestampRuntimeUp time . Time
2015-06-24 18:10:10 +00:00
// Network Status information
networkConfigMutex sync . Mutex
networkConfigured bool
2014-11-23 15:47:25 +00:00
// Volume plugins.
2015-03-19 05:18:31 +00:00
volumePluginMgr volume . VolumePluginMgr
2015-02-02 18:51:52 +00:00
2015-04-17 22:54:28 +00:00
// Network plugin.
2015-03-19 23:14:13 +00:00
networkPlugin network . NetworkPlugin
2015-10-19 22:15:59 +00:00
// Handles container probing.
2015-08-25 17:39:41 +00:00
probeManager prober . Manager
2015-10-19 22:15:59 +00:00
// Manages container health check results.
livenessManager proberesults . Manager
2015-01-08 20:41:38 +00:00
2015-03-20 16:37:08 +00:00
// How long to keep idle streaming command execution/port forwarding
2015-01-08 20:41:38 +00:00
// connections open before terminating them
streamingConnectionIdleTimeout time . Duration
2015-03-03 06:06:20 +00:00
2015-03-20 16:37:08 +00:00
// The EventRecorder to use
2015-03-03 06:06:20 +00:00
recorder record . EventRecorder
2015-03-03 18:33:25 +00:00
2015-03-14 17:13:20 +00:00
// Policy for handling garbage collection of dead containers.
2015-10-05 22:35:32 +00:00
containerGC kubecontainer . ContainerGC
2015-03-16 04:00:46 +00:00
// Manager for images.
imageManager imageManager
2015-03-16 12:50:00 +00:00
2015-05-12 08:24:08 +00:00
// Diskspace manager.
diskSpaceManager diskSpaceManager
2015-03-16 12:50:00 +00:00
// Cached MachineInfo returned by cadvisor.
2015-10-16 03:00:28 +00:00
machineInfo * cadvisorapi . MachineInfo
2015-03-20 16:37:08 +00:00
// Syncs pods statuses with apiserver; also used as a cache of statuses.
2015-09-11 19:22:01 +00:00
statusManager status . Manager
2015-03-23 22:31:13 +00:00
2015-04-16 00:40:07 +00:00
// Manager for the volume maps for the pods.
volumeManager * volumeManager
2015-09-21 18:06:38 +00:00
//Cloud provider interface
2015-03-23 22:31:13 +00:00
cloud cloudprovider . Interface
2015-03-27 20:12:48 +00:00
// Reference to this node.
nodeRef * api . ObjectReference
2015-04-02 20:14:52 +00:00
2015-05-01 22:25:11 +00:00
// Container runtime.
containerRuntime kubecontainer . Runtime
2015-03-31 11:17:12 +00:00
2015-09-21 18:06:38 +00:00
// nodeStatusUpdateFrequency specifies how often kubelet posts node status to master.
// Note: be cautious when changing the constant, it must work with nodeMonitorGracePeriod
// in nodecontroller. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeStatusUpdateFrequency needs to be large enough for kubelet to generate node
// status. Kubelet may fail to update node status reliably if the value is too small,
// as it takes time to gather all necessary node information.
nodeStatusUpdateFrequency time . Duration
2015-04-14 00:30:57 +00:00
// The name of the resource-only container to run the Kubelet in (empty for no container).
// Name must be absolute.
resourceContainer string
2015-04-21 00:26:40 +00:00
2015-04-29 20:44:29 +00:00
os kubecontainer . OSInterface
// Watcher of out of memory events.
2015-04-11 00:29:56 +00:00
oomWatcher OOMWatcher
2015-04-29 20:44:29 +00:00
2015-04-24 00:07:52 +00:00
// If non-empty, pass this to the container runtime as the root cgroup.
cgroupRoot string
2015-05-04 14:43:10 +00:00
// Mounter to use for volumes.
mounter mount . Interface
2015-10-20 18:49:39 +00:00
// chown.Interface implementation to use
chownRunner chown . Interface
// chmod.Interface implementation to use
chmodRunner chmod . Interface
2015-05-12 16:59:02 +00:00
2015-09-14 09:51:40 +00:00
// Writer interface to use for volumes.
writer kubeio . Writer
2015-05-12 16:59:02 +00:00
// Manager of non-Runtime containers.
containerManager containerManager
2015-05-11 21:07:24 +00:00
// Whether or not kubelet should take responsibility for keeping cbr0 in
// the correct state.
configureCBR0 bool
2015-09-21 18:06:38 +00:00
podCIDR string
2015-09-16 04:53:33 +00:00
reconcileCIDR bool
2015-03-17 14:43:49 +00:00
// Number of Pods which can be run by this Kubelet
2015-05-18 22:32:32 +00:00
pods int
2015-06-17 22:31:46 +00:00
// Monitor Kubelet's sync loop
syncLoopMonitor util . AtomicValue
2015-08-13 12:59:15 +00:00
// Container restart Backoff
backOff * util . Backoff
2015-08-19 00:52:26 +00:00
// Channel for sending pods to kill.
podKillingCh chan * kubecontainer . Pod
2015-07-28 18:54:32 +00:00
// The configuration file used as the base to generate the container's
// DNS resolver configuration file. This can be used in conjunction with
// clusterDomain and clusterDNS.
resolverConfig string
2015-08-10 22:08:31 +00:00
// Optionally shape the bandwidth of a pod
shaper bandwidth . BandwidthShaper
2015-09-01 13:27:01 +00:00
// True if container cpu limits should be enforced via cgroup CFS quota
cpuCFSQuota bool
2015-09-21 18:06:38 +00:00
// Information about the ports which are opened by daemons on Node running this Kubelet server.
daemonEndpoints * api . NodeDaemonEndpoints
2014-10-28 00:29:55 +00:00
}
2015-10-06 01:20:57 +00:00
func ( kl * Kubelet ) allSourcesReady ( ) bool {
// Make a copy of the sourcesSeen list because it's not thread-safe.
return kl . sourcesReady ( sets . NewString ( kl . sourcesSeen . List ( ) ... ) )
}
func ( kl * Kubelet ) addSource ( source string ) {
kl . sourcesSeen . Insert ( source )
}
2014-11-23 15:47:25 +00:00
// getRootDir returns the full path to the directory under which kubelet can
2014-11-29 19:02:28 +00:00
// store data. These functions are useful to pass interfaces to other modules
// that may need to know where to write data without getting a whole kubelet
// instance.
2014-11-23 15:47:25 +00:00
func ( kl * Kubelet ) getRootDir ( ) string {
2014-11-29 19:02:28 +00:00
return kl . rootDirectory
}
2014-11-23 15:47:25 +00:00
// getPodsDir returns the full path to the directory under which pod
2014-11-29 19:02:28 +00:00
// directories are created.
2014-11-23 15:47:25 +00:00
func ( kl * Kubelet ) getPodsDir ( ) string {
return path . Join ( kl . getRootDir ( ) , "pods" )
}
// getPluginsDir returns the full path to the directory under which plugin
// directories are created. Plugins can use these directories for data that
// they need to persist. Plugins should create subdirectories under this named
// after their own names.
func ( kl * Kubelet ) getPluginsDir ( ) string {
return path . Join ( kl . getRootDir ( ) , "plugins" )
}
// getPluginDir returns a data directory name for a given plugin name.
// Plugins can use these directories to store data that they need to persist.
// For per-pod plugin data, see getPodPluginDir.
func ( kl * Kubelet ) getPluginDir ( pluginName string ) string {
return path . Join ( kl . getPluginsDir ( ) , pluginName )
2014-11-29 19:02:28 +00:00
}
2014-11-23 15:47:25 +00:00
// getPodDir returns the full path to the per-pod data directory for the
2014-11-29 19:02:28 +00:00
// specified pod. This directory may not exist if the pod does not exist.
2014-11-23 15:47:25 +00:00
func ( kl * Kubelet ) getPodDir ( podUID types . UID ) string {
2014-11-29 19:02:28 +00:00
// Backwards compat. The "old" stuff should be removed before 1.0
// release. The thinking here is this:
// !old && !new = use new
// !old && new = use new
// old && !new = use old
// old && new = use new (but warn)
2014-11-23 15:47:25 +00:00
oldPath := path . Join ( kl . getRootDir ( ) , string ( podUID ) )
2014-11-29 19:02:28 +00:00
oldExists := dirExists ( oldPath )
2014-11-23 15:47:25 +00:00
newPath := path . Join ( kl . getPodsDir ( ) , string ( podUID ) )
2014-11-29 19:02:28 +00:00
newExists := dirExists ( newPath )
if oldExists && ! newExists {
return oldPath
}
if oldExists {
glog . Warningf ( "Data dir for pod %q exists in both old and new form, using new" , podUID )
}
return newPath
2014-11-29 19:02:28 +00:00
}
2014-11-23 15:47:25 +00:00
// getPodVolumesDir returns the full path to the per-pod data directory under
2014-11-29 19:02:28 +00:00
// which volumes are created for the specified pod. This directory may not
// exist if the pod does not exist.
2014-11-23 15:47:25 +00:00
func ( kl * Kubelet ) getPodVolumesDir ( podUID types . UID ) string {
return path . Join ( kl . getPodDir ( podUID ) , "volumes" )
2014-11-29 19:02:28 +00:00
}
2014-11-23 15:47:25 +00:00
// getPodVolumeDir returns the full path to the directory which represents the
// named volume under the named plugin for specified pod. This directory may not
// exist if the pod does not exist.
func ( kl * Kubelet ) getPodVolumeDir ( podUID types . UID , pluginName string , volumeName string ) string {
return path . Join ( kl . getPodVolumesDir ( podUID ) , pluginName , volumeName )
}
// getPodPluginsDir returns the full path to the per-pod data directory under
// which plugins may store data for the specified pod. This directory may not
// exist if the pod does not exist.
func ( kl * Kubelet ) getPodPluginsDir ( podUID types . UID ) string {
return path . Join ( kl . getPodDir ( podUID ) , "plugins" )
}
// getPodPluginDir returns a data directory name for a given plugin name for a
// given pod UID. Plugins can use these directories to store data that they
// need to persist. For non-per-pod plugin data, see getPluginDir.
func ( kl * Kubelet ) getPodPluginDir ( podUID types . UID , pluginName string ) string {
return path . Join ( kl . getPodPluginsDir ( podUID ) , pluginName )
}
// getPodContainerDir returns the full path to the per-pod data directory under
2014-11-29 19:02:28 +00:00
// which container data is held for the specified pod. This directory may not
// exist if the pod or container does not exist.
2014-11-23 15:47:25 +00:00
func ( kl * Kubelet ) getPodContainerDir ( podUID types . UID , ctrName string ) string {
2014-11-29 19:02:28 +00:00
// Backwards compat. The "old" stuff should be removed before 1.0
// release. The thinking here is this:
// !old && !new = use new
// !old && new = use new
// old && !new = use old
// old && new = use new (but warn)
2014-11-23 15:47:25 +00:00
oldPath := path . Join ( kl . getPodDir ( podUID ) , ctrName )
2014-11-29 19:02:28 +00:00
oldExists := dirExists ( oldPath )
2014-11-23 15:47:25 +00:00
newPath := path . Join ( kl . getPodDir ( podUID ) , "containers" , ctrName )
2014-11-29 19:02:28 +00:00
newExists := dirExists ( newPath )
if oldExists && ! newExists {
return oldPath
}
if oldExists {
glog . Warningf ( "Data dir for pod %q, container %q exists in both old and new form, using new" , podUID , ctrName )
}
return newPath
}
func dirExists ( path string ) bool {
s , err := os . Stat ( path )
if err != nil {
return false
}
return s . IsDir ( )
2014-11-29 19:02:28 +00:00
}
2015-01-12 00:42:11 +00:00
func ( kl * Kubelet ) setupDataDirs ( ) error {
kl . rootDirectory = path . Clean ( kl . rootDirectory )
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getRootDir ( ) , 0750 ) ; err != nil {
2015-01-12 00:42:11 +00:00
return fmt . Errorf ( "error creating root directory: %v" , err )
}
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getPodsDir ( ) , 0750 ) ; err != nil {
2015-01-12 00:42:11 +00:00
return fmt . Errorf ( "error creating pods directory: %v" , err )
}
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getPluginsDir ( ) , 0750 ) ; err != nil {
return fmt . Errorf ( "error creating plugins directory: %v" , err )
}
2015-01-12 00:42:11 +00:00
return nil
}
// Get a list of pods that have data directories.
2015-01-14 23:22:21 +00:00
func ( kl * Kubelet ) listPodsFromDisk ( ) ( [ ] types . UID , error ) {
2014-11-23 15:47:25 +00:00
podInfos , err := ioutil . ReadDir ( kl . getPodsDir ( ) )
2015-01-12 00:42:11 +00:00
if err != nil {
return nil , err
}
2015-01-14 23:22:21 +00:00
pods := [ ] types . UID { }
2015-01-12 00:42:11 +00:00
for i := range podInfos {
if podInfos [ i ] . IsDir ( ) {
2015-01-14 23:22:21 +00:00
pods = append ( pods , types . UID ( podInfos [ i ] . Name ( ) ) )
2015-01-12 00:42:11 +00:00
}
}
return pods , nil
}
2015-09-21 18:00:04 +00:00
func ( kl * Kubelet ) GetNode ( ) ( * api . Node , error ) {
if kl . standaloneMode {
return nil , errors . New ( "no node entry for kubelet in standalone mode" )
}
return kl . nodeLister . GetNodeInfo ( kl . nodeName )
}
2015-07-03 20:29:14 +00:00
// Starts garbage collection threads.
2015-03-16 04:00:46 +00:00
func ( kl * Kubelet ) StartGarbageCollection ( ) {
2015-08-24 01:59:15 +00:00
go util . Until ( func ( ) {
2015-03-14 17:13:20 +00:00
if err := kl . containerGC . GarbageCollect ( ) ; err != nil {
2015-03-16 04:00:46 +00:00
glog . Errorf ( "Container garbage collection failed: %v" , err )
}
2015-08-24 01:59:15 +00:00
} , time . Minute , util . NeverStop )
2015-03-16 04:00:46 +00:00
2015-08-24 01:59:15 +00:00
go util . Until ( func ( ) {
2015-03-16 04:00:46 +00:00
if err := kl . imageManager . GarbageCollect ( ) ; err != nil {
glog . Errorf ( "Image garbage collection failed: %v" , err )
2014-12-22 19:54:07 +00:00
}
2015-08-24 01:59:15 +00:00
} , 5 * time . Minute , util . NeverStop )
2014-12-22 19:54:07 +00:00
}
2014-07-15 20:24:41 +00:00
// Run starts the kubelet reacting to config updates
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) Run ( updates <- chan kubetypes . PodUpdate ) {
2014-07-22 21:40:59 +00:00
if kl . logServer == nil {
kl . logServer = http . StripPrefix ( "/logs/" , http . FileServer ( http . Dir ( "/var/log/" ) ) )
2014-07-15 07:04:30 +00:00
}
2015-02-23 21:04:45 +00:00
if kl . kubeClient == nil {
glog . Warning ( "No api server defined - no node status update will be sent." )
}
2015-04-14 00:30:57 +00:00
// Move Kubelet to a container.
if kl . resourceContainer != "" {
2015-09-22 23:42:30 +00:00
// Fixme: I need to reside inside ContainerManager interface.
2015-04-14 00:30:57 +00:00
err := util . RunInResourceContainer ( kl . resourceContainer )
if err != nil {
glog . Warningf ( "Failed to move Kubelet to container %q: %v" , kl . resourceContainer , err )
}
glog . Infof ( "Running in container %q" , kl . resourceContainer )
}
2015-05-20 21:21:03 +00:00
if err := kl . imageManager . Start ( ) ; err != nil {
2015-08-11 07:25:10 +00:00
kl . recorder . Eventf ( kl . nodeRef , "KubeletSetupFailed" , "Failed to start ImageManager %v" , err )
2015-05-05 18:15:12 +00:00
glog . Errorf ( "Failed to start ImageManager, images may not be garbage collected: %v" , err )
}
2015-05-20 21:21:03 +00:00
if err := kl . cadvisor . Start ( ) ; err != nil {
2015-08-11 07:25:10 +00:00
kl . recorder . Eventf ( kl . nodeRef , "KubeletSetupFailed" , "Failed to start CAdvisor %v" , err )
2015-05-15 20:24:24 +00:00
glog . Errorf ( "Failed to start CAdvisor, system may not be properly monitored: %v" , err )
}
2015-05-20 21:21:03 +00:00
if err := kl . containerManager . Start ( ) ; err != nil {
2015-08-11 07:25:10 +00:00
kl . recorder . Eventf ( kl . nodeRef , "KubeletSetupFailed" , "Failed to start ContainerManager %v" , err )
2015-05-12 16:59:02 +00:00
glog . Errorf ( "Failed to start ContainerManager, system may not be properly isolated: %v" , err )
}
2015-05-20 21:21:03 +00:00
if err := kl . oomWatcher . Start ( kl . nodeRef ) ; err != nil {
2015-08-11 07:25:10 +00:00
kl . recorder . Eventf ( kl . nodeRef , "KubeletSetupFailed" , "Failed to start OOM watcher %v" , err )
2015-05-18 19:18:12 +00:00
glog . Errorf ( "Failed to start OOM watching: %v" , err )
}
2015-05-05 10:19:54 +00:00
go util . Until ( kl . updateRuntimeUp , 5 * time . Second , util . NeverStop )
2015-06-24 18:10:10 +00:00
2015-08-19 00:52:26 +00:00
// Start a goroutine responsible for killing pods (that are not properly
// handled by pod workers).
go util . Until ( kl . podKiller , 1 * time . Second , util . NeverStop )
2015-04-11 00:29:56 +00:00
// Run the system oom watcher forever.
2015-03-20 16:37:08 +00:00
kl . statusManager . Start ( )
2014-07-15 20:24:41 +00:00
kl . syncLoop ( updates , kl )
2014-06-06 23:40:48 +00:00
}
2015-09-21 18:06:38 +00:00
func ( kl * Kubelet ) initialNodeStatus ( ) ( * api . Node , error ) {
node := & api . Node {
ObjectMeta : api . ObjectMeta {
Name : kl . nodeName ,
Labels : map [ string ] string { "kubernetes.io/hostname" : kl . hostname } ,
} ,
2015-09-16 04:53:33 +00:00
Spec : api . NodeSpec {
Unschedulable : ! kl . registerSchedulable ,
} ,
2015-09-21 18:06:38 +00:00
}
if kl . cloud != nil {
instances , ok := kl . cloud . Instances ( )
if ! ok {
return nil , fmt . Errorf ( "failed to get instances from cloud provider" )
}
// TODO(roberthbailey): Can we do this without having credentials to talk
// to the cloud provider?
// TODO: ExternalID is deprecated, we'll have to drop this code
externalID , err := instances . ExternalID ( kl . nodeName )
if err != nil {
return nil , fmt . Errorf ( "failed to get external ID from cloud provider: %v" , err )
}
node . Spec . ExternalID = externalID
// TODO: We can't assume that the node has credentials to talk to the
// cloudprovider from arbitrary nodes. At most, we should talk to a
// local metadata server here.
node . Spec . ProviderID , err = cloudprovider . GetInstanceProviderID ( kl . cloud , kl . nodeName )
if err != nil {
return nil , err
}
} else {
node . Spec . ExternalID = kl . hostname
}
if err := kl . setNodeStatus ( node ) ; err != nil {
return nil , err
}
return node , nil
}
// registerWithApiserver registers the node with the cluster master. It is safe
// to call multiple times, but not concurrently (kl.registrationCompleted is
// not locked).
func ( kl * Kubelet ) registerWithApiserver ( ) {
if kl . registrationCompleted {
return
}
step := 100 * time . Millisecond
for {
time . Sleep ( step )
step = step * 2
if step >= 7 * time . Second {
step = 7 * time . Second
}
node , err := kl . initialNodeStatus ( )
if err != nil {
glog . Errorf ( "Unable to construct api.Node object for kubelet: %v" , err )
continue
}
glog . V ( 2 ) . Infof ( "Attempting to register node %s" , node . Name )
if _ , err := kl . kubeClient . Nodes ( ) . Create ( node ) ; err != nil {
if ! apierrors . IsAlreadyExists ( err ) {
glog . V ( 2 ) . Infof ( "Unable to register %s with the apiserver: %v" , node . Name , err )
continue
}
currentNode , err := kl . kubeClient . Nodes ( ) . Get ( kl . nodeName )
if err != nil {
glog . Errorf ( "error getting node %q: %v" , kl . nodeName , err )
continue
}
if currentNode == nil {
glog . Errorf ( "no node instance returned for %q" , kl . nodeName )
continue
}
if currentNode . Spec . ExternalID == node . Spec . ExternalID {
glog . Infof ( "Node %s was previously registered" , node . Name )
kl . registrationCompleted = true
return
}
glog . Errorf (
"Previously %q had externalID %q; now it is %q; will delete and recreate." ,
kl . nodeName , node . Spec . ExternalID , currentNode . Spec . ExternalID ,
)
if err := kl . kubeClient . Nodes ( ) . Delete ( node . Name ) ; err != nil {
glog . Errorf ( "Unable to delete old node: %v" , err )
} else {
glog . Errorf ( "Deleted old node object %q" , kl . nodeName )
}
continue
}
glog . Infof ( "Successfully registered node %s" , node . Name )
kl . registrationCompleted = true
return
}
}
// syncNodeStatus should be called periodically from a goroutine.
// It synchronizes node status to master, registering the kubelet first if
// necessary.
func ( kl * Kubelet ) syncNodeStatus ( ) {
if kl . kubeClient == nil {
return
}
if kl . registerNode {
// This will exit immediately if it doesn't need to do anything.
kl . registerWithApiserver ( )
}
if err := kl . updateNodeStatus ( ) ; err != nil {
glog . Errorf ( "Unable to update node status: %v" , err )
}
}
2015-10-07 19:19:06 +00:00
// relabelVolumes relabels SELinux volumes to match the pod's
// SELinuxOptions specification. This is only needed if the pod uses
// hostPID or hostIPC. Otherwise relabeling is delegated to docker.
func ( kl * Kubelet ) relabelVolumes ( pod * api . Pod , volumes kubecontainer . VolumeMap ) error {
if pod . Spec . SecurityContext . SELinuxOptions == nil {
return nil
}
rootDirContext , err := kl . getRootDirContext ( )
if err != nil {
return err
}
chconRunner := selinux . NewChconRunner ( )
// Apply the pod's Level to the rootDirContext
rootDirSELinuxOptions , err := securitycontext . ParseSELinuxOptions ( rootDirContext )
if err != nil {
return err
}
rootDirSELinuxOptions . Level = pod . Spec . SecurityContext . SELinuxOptions . Level
volumeContext := fmt . Sprintf ( "%s:%s:%s:%s" , rootDirSELinuxOptions . User , rootDirSELinuxOptions . Role , rootDirSELinuxOptions . Type , rootDirSELinuxOptions . Level )
for _ , volume := range volumes {
if volume . Builder . SupportsSELinux ( ) && ! volume . Builder . IsReadOnly ( ) {
// Relabel the volume and its content to match the 'Level' of the pod
err := filepath . Walk ( volume . Builder . GetPath ( ) , func ( path string , info os . FileInfo , err error ) error {
if err != nil {
return err
}
return chconRunner . SetContext ( path , volumeContext )
} )
if err != nil {
return err
}
volume . SELinuxLabeled = true
}
}
return nil
}
2015-10-21 17:17:27 +00:00
func makeMounts ( pod * api . Pod , podDir string , container * api . Container , podVolumes kubecontainer . VolumeMap ) ( [ ] kubecontainer . Mount , error ) {
// Kubernetes only mounts on /etc/hosts if :
// - container does not use hostNetwork and
// - container is not a infrastructure(pause) container
// - container is not already mounting on /etc/hosts
// When the pause container is being created, its IP is still unknown. Hence, PodIP will not have been set.
2015-10-23 18:52:59 +00:00
mountEtcHostsFile := ( pod . Spec . SecurityContext == nil || ! pod . Spec . SecurityContext . HostNetwork ) && len ( pod . Status . PodIP ) > 0
2015-10-21 17:17:27 +00:00
glog . V ( 4 ) . Infof ( "Will create hosts mount for container:%q, podIP:%s: %v" , container . Name , pod . Status . PodIP , mountEtcHostsFile )
mounts := [ ] kubecontainer . Mount { }
2014-08-27 05:08:06 +00:00
for _ , mount := range container . VolumeMounts {
2015-10-21 17:17:27 +00:00
mountEtcHostsFile = mountEtcHostsFile && ( mount . MountPath != etcHostsPath )
2014-08-27 05:08:06 +00:00
vol , ok := podVolumes [ mount . Name ]
if ! ok {
2015-04-16 00:40:07 +00:00
glog . Warningf ( "Mount cannot be satisified for container %q, because the volume is missing: %q" , container . Name , mount )
2014-08-27 05:08:06 +00:00
continue
2014-06-19 23:59:48 +00:00
}
2015-10-07 19:19:06 +00:00
relabelVolume := false
// If the volume supports SELinux and it has not been
// relabeled already and it is not a read-only volume,
// relabel it and mark it as labeled
if vol . Builder . SupportsSELinux ( ) && ! vol . SELinuxLabeled && ! vol . Builder . IsReadOnly ( ) {
vol . SELinuxLabeled = true
relabelVolume = true
}
2015-05-12 21:49:35 +00:00
mounts = append ( mounts , kubecontainer . Mount {
2015-10-07 19:19:06 +00:00
Name : mount . Name ,
ContainerPath : mount . MountPath ,
HostPath : vol . Builder . GetPath ( ) ,
ReadOnly : mount . ReadOnly ,
SELinuxRelabel : relabelVolume ,
2015-05-12 21:49:35 +00:00
} )
}
2015-10-21 17:17:27 +00:00
if mountEtcHostsFile {
hostsMount , err := makeHostsMount ( podDir , pod . Status . PodIP , pod . Name )
if err != nil {
return nil , err
}
mounts = append ( mounts , * hostsMount )
}
return mounts , nil
}
func makeHostsMount ( podDir , podIP , podName string ) ( * kubecontainer . Mount , error ) {
hostsFilePath := path . Join ( podDir , "etc-hosts" )
if err := ensureHostsFile ( hostsFilePath , podIP , podName ) ; err != nil {
return nil , err
}
return & kubecontainer . Mount {
Name : "k8s-managed-etc-hosts" ,
ContainerPath : etcHostsPath ,
HostPath : hostsFilePath ,
ReadOnly : false ,
} , nil
}
func ensureHostsFile ( fileName string , hostIP , hostName string ) error {
if _ , err := os . Stat ( fileName ) ; os . IsExist ( err ) {
glog . V ( 4 ) . Infof ( "kubernetes-managed etc-hosts file exits. Will not be recreated: %q" , fileName )
return nil
}
var buffer bytes . Buffer
buffer . WriteString ( "# Kubernetes-managed hosts file.\n" )
buffer . WriteString ( "127.0.0.1\tlocalhost\n" ) // ipv4 localhost
buffer . WriteString ( "::1\tlocalhost ip6-localhost ip6-loopback\n" ) // ipv6 localhost
buffer . WriteString ( "fe00::0\tip6-localnet\n" )
buffer . WriteString ( "fe00::0\tip6-mcastprefix\n" )
buffer . WriteString ( "fe00::1\tip6-allnodes\n" )
buffer . WriteString ( "fe00::2\tip6-allrouters\n" )
buffer . WriteString ( fmt . Sprintf ( "%s\t%s\n" , hostIP , hostName ) )
return ioutil . WriteFile ( fileName , buffer . Bytes ( ) , 0644 )
2015-05-12 21:49:35 +00:00
}
func makePortMappings ( container * api . Container ) ( ports [ ] kubecontainer . PortMapping ) {
names := make ( map [ string ] struct { } )
for _ , p := range container . Ports {
pm := kubecontainer . PortMapping {
HostPort : p . HostPort ,
ContainerPort : p . ContainerPort ,
Protocol : p . Protocol ,
HostIP : p . HostIP ,
2014-06-06 23:40:48 +00:00
}
2015-05-12 21:49:35 +00:00
// We need to create some default port name if it's not specified, since
// this is necessary for rkt.
2015-08-06 01:08:26 +00:00
// http://issue.k8s.io/7710
2015-05-12 21:49:35 +00:00
if p . Name == "" {
pm . Name = fmt . Sprintf ( "%s-%s:%d" , container . Name , p . Protocol , p . ContainerPort )
} else {
pm . Name = fmt . Sprintf ( "%s-%s" , container . Name , p . Name )
}
// Protect against exposing the same protocol-port more than once in a container.
if _ , ok := names [ pm . Name ] ; ok {
glog . Warningf ( "Port name conflicted, %q is defined more than once" , pm . Name )
continue
}
ports = append ( ports , pm )
names [ pm . Name ] = struct { } { }
2014-06-06 23:40:48 +00:00
}
2015-04-16 00:40:07 +00:00
return
2014-06-09 20:47:25 +00:00
}
2015-02-23 21:04:45 +00:00
2015-04-23 20:55:50 +00:00
// GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
2015-03-26 18:59:41 +00:00
// the container runtime to set parameters for launching a container.
2015-05-12 21:18:00 +00:00
func ( kl * Kubelet ) GenerateRunContainerOptions ( pod * api . Pod , container * api . Container ) ( * kubecontainer . RunContainerOptions , error ) {
2015-03-26 18:59:41 +00:00
var err error
2015-05-12 21:18:00 +00:00
opts := & kubecontainer . RunContainerOptions { CgroupParent : kl . cgroupRoot }
2014-11-14 19:34:41 +00:00
2015-04-16 00:40:07 +00:00
vol , ok := kl . volumeManager . GetVolumes ( pod . UID )
if ! ok {
return nil , fmt . Errorf ( "impossible: cannot find the mounted volumes for pod %q" , kubecontainer . GetPodFullName ( pod ) )
}
2015-05-12 21:49:35 +00:00
opts . PortMappings = makePortMappings ( container )
2015-10-07 19:19:06 +00:00
// Docker does not relabel volumes if the container is running
// in the host pid or ipc namespaces so the kubelet must
// relabel the volumes
if pod . Spec . SecurityContext != nil && ( pod . Spec . SecurityContext . HostIPC || pod . Spec . SecurityContext . HostPID ) {
err = kl . relabelVolumes ( pod , vol )
if err != nil {
return nil , err
}
}
2015-10-21 17:17:27 +00:00
opts . Mounts , err = makeMounts ( pod , kl . getPodDir ( pod . UID ) , container , vol )
if err != nil {
return nil , err
}
2015-04-23 20:57:30 +00:00
opts . Envs , err = kl . makeEnvironmentVariables ( pod , container )
2014-06-06 23:40:48 +00:00
if err != nil {
2015-03-26 18:59:41 +00:00
return nil , err
2014-11-14 19:34:41 +00:00
}
2014-11-07 06:41:16 +00:00
if len ( container . TerminationMessagePath ) != 0 {
2014-11-23 15:47:25 +00:00
p := kl . getPodContainerDir ( pod . UID , container . Name )
2014-11-07 06:41:16 +00:00
if err := os . MkdirAll ( p , 0750 ) ; err != nil {
2015-01-06 00:38:47 +00:00
glog . Errorf ( "Error on creating %q: %v" , p , err )
2014-11-07 06:41:16 +00:00
} else {
2015-03-26 18:59:41 +00:00
opts . PodContainerDir = p
2014-11-07 06:41:16 +00:00
}
}
2015-07-28 18:54:32 +00:00
opts . DNS , opts . DNSSearch , err = kl . getClusterDNS ( pod )
if err != nil {
return nil , err
2014-11-12 05:21:40 +00:00
}
2015-07-28 18:54:32 +00:00
2015-03-26 18:59:41 +00:00
return opts , nil
}
2015-09-09 17:45:01 +00:00
var masterServices = sets . NewString ( "kubernetes" )
2015-01-08 15:25:14 +00:00
// getServiceEnvVarMap makes a map[string]string of env vars for services a pod in namespace ns should see
func ( kl * Kubelet ) getServiceEnvVarMap ( ns string ) ( map [ string ] string , error ) {
var (
serviceMap = make ( map [ string ] api . Service )
m = make ( map [ string ] string )
)
// Get all service resources from the master (via a cache),
2015-07-03 20:29:14 +00:00
// and populate them into service environment variables.
2015-01-08 15:25:14 +00:00
if kl . serviceLister == nil {
// Kubelets without masters (e.g. plain GCE ContainerVM) don't set env vars.
return m , nil
}
services , err := kl . serviceLister . List ( )
if err != nil {
2015-02-16 17:33:20 +00:00
return m , fmt . Errorf ( "failed to list services when setting up env vars." )
2015-01-08 15:25:14 +00:00
}
// project the services in namespace ns onto the master services
for _ , service := range services . Items {
2015-05-23 20:41:11 +00:00
// ignore services where ClusterIP is "None" or empty
2015-03-16 21:36:30 +00:00
if ! api . IsServiceIPSet ( & service ) {
continue
}
2015-01-08 15:25:14 +00:00
serviceName := service . Name
switch service . Namespace {
// for the case whether the master service namespace is the namespace the pod
2015-01-28 17:00:53 +00:00
// is in, the pod should receive all the services in the namespace.
2015-01-08 15:25:14 +00:00
//
// ordering of the case clauses below enforces this
case ns :
serviceMap [ serviceName ] = service
case kl . masterServiceNamespace :
if masterServices . Has ( serviceName ) {
2015-06-02 18:46:57 +00:00
if _ , exists := serviceMap [ serviceName ] ; ! exists {
2015-01-08 15:25:14 +00:00
serviceMap [ serviceName ] = service
}
}
}
}
services . Items = [ ] api . Service { }
for _ , service := range serviceMap {
services . Items = append ( services . Items , service )
}
for _ , e := range envvars . FromServices ( & services ) {
m [ e . Name ] = e . Value
}
return m , nil
}
// Make the service environment variables for a pod in the given namespace.
2015-05-12 21:49:35 +00:00
func ( kl * Kubelet ) makeEnvironmentVariables ( pod * api . Pod , container * api . Container ) ( [ ] kubecontainer . EnvVar , error ) {
var result [ ] kubecontainer . EnvVar
2015-01-08 15:25:14 +00:00
// Note: These are added to the docker.Config, but are not included in the checksum computed
// by dockertools.BuildDockerName(...). That way, we can still determine whether an
// api.Container is already running by its hash. (We don't want to restart a container just
// because some service changed.)
//
// Note that there is a race between Kubelet seeing the pod and kubelet seeing the service.
// To avoid this users can: (1) wait between starting a service and starting; or (2) detect
// missing service env var and exit and be restarted; or (3) use DNS instead of env vars
// and keep trying to resolve the DNS name of the service (recommended).
2015-04-23 20:57:30 +00:00
serviceEnv , err := kl . getServiceEnvVarMap ( pod . Namespace )
2015-01-08 15:25:14 +00:00
if err != nil {
return result , err
}
2015-05-22 22:21:03 +00:00
// Determine the final values of variables:
//
// 1. Determine the final value of each variable:
// a. If the variable's Value is set, expand the `$(var)` references to other
// variables in the .Value field; the sources of variables are the declared
// variables of the container and the service environment variables
// b. If a source is defined for an environment variable, resolve the source
// 2. Create the container's environment in the order variables are declared
// 3. Add remaining service environment vars
tmpEnv := make ( map [ string ] string )
mappingFunc := expansion . MappingFuncFor ( tmpEnv , serviceEnv )
for _ , envVar := range container . Env {
2015-03-12 14:39:22 +00:00
// Accesses apiserver+Pods.
2015-01-08 15:25:14 +00:00
// So, the master may set service env vars, or kubelet may. In case both are doing
// it, we delete the key from the kubelet-generated ones so we don't have duplicate
// env vars.
// TODO: remove this net line once all platforms use apiserver+Pods.
2015-05-22 22:21:03 +00:00
delete ( serviceEnv , envVar . Name )
runtimeVal := envVar . Value
if runtimeVal != "" {
// Step 1a: expand variable references
runtimeVal = expansion . Expand ( runtimeVal , mappingFunc )
} else if envVar . ValueFrom != nil && envVar . ValueFrom . FieldRef != nil {
// Step 1b: resolve alternate env var sources
runtimeVal , err = kl . podFieldSelectorRuntimeValue ( envVar . ValueFrom . FieldRef , pod )
if err != nil {
return result , err
}
2015-04-23 20:57:30 +00:00
}
2015-05-22 22:21:03 +00:00
tmpEnv [ envVar . Name ] = runtimeVal
result = append ( result , kubecontainer . EnvVar { Name : envVar . Name , Value : tmpEnv [ envVar . Name ] } )
2015-01-08 15:25:14 +00:00
}
// Append remaining service env vars.
for k , v := range serviceEnv {
2015-05-12 21:49:35 +00:00
result = append ( result , kubecontainer . EnvVar { Name : k , Value : v } )
2015-01-08 15:25:14 +00:00
}
return result , nil
}
2015-04-23 20:57:30 +00:00
func ( kl * Kubelet ) podFieldSelectorRuntimeValue ( fs * api . ObjectFieldSelector , pod * api . Pod ) ( string , error ) {
internalFieldPath , _ , err := api . Scheme . ConvertFieldLabel ( fs . APIVersion , "Pod" , fs . FieldPath , "" )
if err != nil {
return "" , err
}
2015-08-12 18:14:49 +00:00
switch internalFieldPath {
case "status.podIP" :
return pod . Status . PodIP , nil
}
2015-04-23 20:57:30 +00:00
return fieldpath . ExtractFieldPathAsString ( pod , internalFieldPath )
}
2015-03-26 18:59:41 +00:00
// getClusterDNS returns a list of the DNS servers and a list of the DNS search
// domains of the cluster.
func ( kl * Kubelet ) getClusterDNS ( pod * api . Pod ) ( [ ] string , [ ] string , error ) {
2015-07-28 18:54:32 +00:00
var hostDNS , hostSearch [ ] string
2014-11-12 05:21:40 +00:00
// Get host DNS settings and append them to cluster DNS settings.
2015-07-28 18:54:32 +00:00
if kl . resolverConfig != "" {
f , err := os . Open ( kl . resolverConfig )
if err != nil {
return nil , nil , err
}
defer f . Close ( )
2014-11-12 05:21:40 +00:00
2015-10-24 00:01:49 +00:00
hostDNS , hostSearch , err = kl . parseResolvConf ( f )
2015-07-28 18:54:32 +00:00
if err != nil {
return nil , nil , err
}
}
if pod . Spec . DNSPolicy != api . DNSClusterFirst {
// When the kubelet --resolv-conf flag is set to the empty string, use
// DNS settings that override the docker default (which is to use
// /etc/resolv.conf) and effectivly disable DNS lookups. According to
// the bind documentation, the behavior of the DNS client library when
// "nameservers" are not specified is to "use the nameserver on the
// local machine". A nameserver setting of localhost is equivalent to
// this documented behavior.
if kl . resolverConfig == "" {
hostDNS = [ ] string { "127.0.0.1" }
hostSearch = [ ] string { "." }
}
return hostDNS , hostSearch , nil
2014-11-12 05:21:40 +00:00
}
2015-03-26 18:59:41 +00:00
var dns , dnsSearch [ ] string
2014-11-12 05:21:40 +00:00
if kl . clusterDNS != nil {
2015-03-26 18:59:41 +00:00
dns = append ( [ ] string { kl . clusterDNS . String ( ) } , hostDNS ... )
2015-07-28 18:54:32 +00:00
} else {
dns = hostDNS
2014-11-12 05:21:40 +00:00
}
if kl . clusterDomain != "" {
2015-06-01 19:21:39 +00:00
nsSvcDomain := fmt . Sprintf ( "%s.svc.%s" , pod . Namespace , kl . clusterDomain )
svcDomain := fmt . Sprintf ( "svc.%s" , kl . clusterDomain )
dnsSearch = append ( [ ] string { nsSvcDomain , svcDomain , kl . clusterDomain } , hostSearch ... )
2015-07-28 18:54:32 +00:00
} else {
dnsSearch = hostSearch
2014-11-12 05:21:40 +00:00
}
2015-03-26 18:59:41 +00:00
return dns , dnsSearch , nil
2014-11-12 05:21:40 +00:00
}
// Returns the list of DNS servers and DNS search domains.
2015-10-24 00:01:49 +00:00
func ( kl * Kubelet ) parseResolvConf ( reader io . Reader ) ( nameservers [ ] string , searches [ ] string , err error ) {
var scrubber dnsScrubber
if kl . cloud != nil {
scrubber = kl . cloud
}
return parseResolvConf ( reader , scrubber )
}
// A helper for testing.
type dnsScrubber interface {
ScrubDNS ( nameservers , searches [ ] string ) ( nsOut , srchOut [ ] string )
}
func parseResolvConf ( reader io . Reader , dnsScrubber dnsScrubber ) ( nameservers [ ] string , searches [ ] string , err error ) {
2014-11-12 05:21:40 +00:00
file , err := ioutil . ReadAll ( reader )
if err != nil {
return nil , nil , err
}
// Lines of the form "nameserver 1.2.3.4" accumulate.
nameservers = [ ] string { }
// Lines of the form "search example.com" overrule - last one wins.
searches = [ ] string { }
lines := strings . Split ( string ( file ) , "\n" )
for l := range lines {
trimmed := strings . TrimSpace ( lines [ l ] )
if strings . HasPrefix ( trimmed , "#" ) {
continue
}
fields := strings . Fields ( trimmed )
if len ( fields ) == 0 {
continue
}
if fields [ 0 ] == "nameserver" {
nameservers = append ( nameservers , fields [ 1 : ] ... )
}
if fields [ 0 ] == "search" {
searches = fields [ 1 : ]
}
}
2015-10-24 00:01:49 +00:00
// Give the cloud-provider a chance to post-process DNS settings.
if dnsScrubber != nil {
nameservers , searches = dnsScrubber . ScrubDNS ( nameservers , searches )
}
2014-11-12 05:21:40 +00:00
return nameservers , searches , nil
}
2015-03-26 18:25:48 +00:00
// Kill all running containers in a pod (includes the pod infra container).
2015-08-20 01:57:58 +00:00
func ( kl * Kubelet ) killPod ( pod * api . Pod , runningPod kubecontainer . Pod ) error {
return kl . containerRuntime . KillPod ( pod , runningPod )
2014-08-08 04:49:17 +00:00
}
2014-07-18 18:42:47 +00:00
type empty struct { }
2015-03-04 01:33:48 +00:00
// makePodDataDirs creates the dirs for the pod datas.
2015-03-13 13:19:07 +00:00
func ( kl * Kubelet ) makePodDataDirs ( pod * api . Pod ) error {
2015-03-04 01:33:48 +00:00
uid := pod . UID
if err := os . Mkdir ( kl . getPodDir ( uid ) , 0750 ) ; err != nil && ! os . IsExist ( err ) {
return err
}
if err := os . Mkdir ( kl . getPodVolumesDir ( uid ) , 0750 ) ; err != nil && ! os . IsExist ( err ) {
return err
}
if err := os . Mkdir ( kl . getPodPluginsDir ( uid ) , 0750 ) ; err != nil && ! os . IsExist ( err ) {
return err
}
return nil
}
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) syncPod ( pod * api . Pod , mirrorPod * api . Pod , runningPod kubecontainer . Pod , updateType kubetypes . SyncPodType ) error {
2015-03-23 17:14:30 +00:00
podFullName := kubecontainer . GetPodFullName ( pod )
2015-03-10 14:09:55 +00:00
uid := pod . UID
2015-06-09 21:01:23 +00:00
start := time . Now ( )
2015-06-05 19:42:23 +00:00
var firstSeenTime time . Time
2015-10-09 17:24:31 +00:00
if firstSeenTimeStr , ok := pod . Annotations [ kubetypes . ConfigFirstSeenAnnotationKey ] ; ! ok {
2015-06-05 19:42:23 +00:00
glog . V ( 3 ) . Infof ( "First seen time not recorded for pod %q" , pod . UID )
} else {
2015-10-09 17:24:31 +00:00
firstSeenTime = kubetypes . ConvertToTimestamp ( firstSeenTimeStr ) . Get ( )
2015-06-05 19:42:23 +00:00
}
2015-03-09 14:23:52 +00:00
// Before returning, regenerate status and store it in the cache.
defer func ( ) {
2015-10-12 23:28:23 +00:00
if kubepod . IsStaticPod ( pod ) && mirrorPod == nil {
2015-03-24 23:52:38 +00:00
// No need to cache the status because the mirror pod does not
// exist yet.
return
}
2015-04-08 18:53:31 +00:00
status , err := kl . generatePodStatus ( pod )
2015-03-09 14:23:52 +00:00
if err != nil {
glog . Errorf ( "Unable to generate status for pod with name %q and uid %q info with error(%v)" , podFullName , uid , err )
} else {
2015-03-24 23:52:38 +00:00
podToUpdate := pod
if mirrorPod != nil {
podToUpdate = mirrorPod
}
2015-08-18 20:26:56 +00:00
existingStatus , ok := kl . statusManager . GetPodStatus ( podToUpdate . UID )
2015-06-05 19:42:23 +00:00
if ! ok || existingStatus . Phase == api . PodPending && status . Phase == api . PodRunning &&
! firstSeenTime . IsZero ( ) {
metrics . PodStartLatency . Observe ( metrics . SinceInMicroseconds ( firstSeenTime ) )
2015-06-09 21:01:23 +00:00
}
2015-03-24 23:52:38 +00:00
kl . statusManager . SetPodStatus ( podToUpdate , status )
2015-03-09 14:23:52 +00:00
}
} ( )
2015-03-26 18:25:48 +00:00
// Kill pods we can't run.
2015-08-20 01:57:58 +00:00
if err := canRunPod ( pod ) ; err != nil || pod . DeletionTimestamp != nil {
if err := kl . killPod ( pod , runningPod ) ; err != nil {
util . HandleError ( err )
}
2015-03-26 18:25:48 +00:00
return err
}
2015-04-17 23:12:08 +00:00
if err := kl . makePodDataDirs ( pod ) ; err != nil {
glog . Errorf ( "Unable to make pod data directories for pod %q (uid %q): %v" , podFullName , uid , err )
return err
}
2015-04-16 00:40:07 +00:00
// Starting phase:
ref , err := api . GetReference ( pod )
if err != nil {
glog . Errorf ( "Couldn't make a ref to pod %q: '%v'" , podFullName , err )
}
// Mount volumes.
podVolumes , err := kl . mountExternalVolumes ( pod )
if err != nil {
if ref != nil {
2015-08-11 07:25:10 +00:00
kl . recorder . Eventf ( ref , "FailedMount" , "Unable to mount volumes for pod %q: %v" , podFullName , err )
2015-04-16 00:40:07 +00:00
}
glog . Errorf ( "Unable to mount volumes for pod %q: %v; skipping pod" , podFullName , err )
return err
}
kl . volumeManager . SetVolumes ( pod . UID , podVolumes )
2015-06-10 00:50:15 +00:00
// The kubelet is the source of truth for pod status. It ignores the status sent from
// the apiserver and regenerates status for every pod update, incrementally updating
// the status it received at pod creation time.
//
// The container runtime needs 2 pieces of information from the status to sync a pod:
// The terminated state of containers (to restart them) and the podIp (for liveness probes).
// New pods don't have either, so we skip the expensive status generation step.
//
// If we end up here with a create event for an already running pod, it could result in a
// restart of its containers. This cannot happen unless the kubelet restarts, because the
2015-08-19 00:52:26 +00:00
// delete before the second create would cancel this pod worker.
2015-06-10 00:50:15 +00:00
//
// If the kubelet restarts, we have a bunch of running containers for which we get create
// events. This is ok, because the pod status for these will include the podIp and terminated
// status. Any race conditions here effectively boils down to -- the pod worker didn't sync
// state of a newly started container with the apiserver before the kubelet restarted, so
// it's OK to pretend like the kubelet started them after it restarted.
var podStatus api . PodStatus
2015-10-09 17:24:31 +00:00
if updateType == kubetypes . SyncPodCreate {
2015-06-05 19:42:23 +00:00
// This is the first time we are syncing the pod. Record the latency
// since kubelet first saw the pod if firstSeenTime is set.
if ! firstSeenTime . IsZero ( ) {
metrics . PodWorkerStartLatency . Observe ( metrics . SinceInMicroseconds ( firstSeenTime ) )
}
2015-06-10 00:50:15 +00:00
podStatus = pod . Status
2015-09-17 22:21:55 +00:00
podStatus . StartTime = & unversioned . Time { Time : start }
2015-06-18 20:28:18 +00:00
kl . statusManager . SetPodStatus ( pod , podStatus )
glog . V ( 3 ) . Infof ( "Not generating pod status for new pod %q" , podFullName )
2015-06-10 00:50:15 +00:00
} else {
var err error
podStatus , err = kl . generatePodStatus ( pod )
if err != nil {
glog . Errorf ( "Unable to get status for pod %q (uid %q): %v" , podFullName , uid , err )
return err
}
2014-06-06 23:40:48 +00:00
}
2014-07-18 18:42:47 +00:00
2015-05-08 17:53:00 +00:00
pullSecrets , err := kl . getPullSecretsForPod ( pod )
if err != nil {
glog . Errorf ( "Unable to get pull secrets for pod %q (uid %q): %v" , podFullName , uid , err )
return err
}
2015-08-13 12:59:15 +00:00
err = kl . containerRuntime . SyncPod ( pod , runningPod , podStatus , pullSecrets , kl . backOff )
2015-05-01 01:37:15 +00:00
if err != nil {
return err
2015-03-10 14:09:55 +00:00
}
2015-08-10 22:08:31 +00:00
ingress , egress , err := extractBandwidthResources ( pod )
if err != nil {
return err
}
if egress != nil || ingress != nil {
2015-09-14 21:56:51 +00:00
if podUsesHostNetwork ( pod ) {
2015-09-08 16:50:19 +00:00
kl . recorder . Event ( pod , "HostNetworkNotSupported" , "Bandwidth shaping is not currently supported on the host network" )
2015-08-10 22:08:31 +00:00
} else if kl . shaper != nil {
status , found := kl . statusManager . GetPodStatus ( pod . UID )
if ! found {
statusPtr , err := kl . containerRuntime . GetPodStatus ( pod )
if err != nil {
glog . Errorf ( "Error getting pod for bandwidth shaping" )
return err
}
status = * statusPtr
}
if len ( status . PodIP ) > 0 {
err = kl . shaper . ReconcileCIDR ( fmt . Sprintf ( "%s/32" , status . PodIP ) , egress , ingress )
}
} else {
2015-09-08 16:50:19 +00:00
kl . recorder . Event ( pod , "NilShaper" , "Pod requests bandwidth shaping, but the shaper is undefined" )
2015-08-10 22:08:31 +00:00
}
}
2015-10-12 23:28:23 +00:00
if kubepod . IsStaticPod ( pod ) {
2015-04-08 20:28:33 +00:00
if mirrorPod != nil && ! kl . podManager . IsMirrorPodOf ( mirrorPod , pod ) {
// The mirror pod is semantically different from the static pod. Remove
// it. The mirror pod will get recreated later.
glog . Errorf ( "Deleting mirror pod %q because it is outdated" , podFullName )
if err := kl . podManager . DeleteMirrorPod ( podFullName ) ; err != nil {
glog . Errorf ( "Failed deleting mirror pod %q: %v" , podFullName , err )
}
}
if mirrorPod == nil {
glog . V ( 3 ) . Infof ( "Creating a mirror pod %q" , podFullName )
2015-04-20 18:20:53 +00:00
if err := kl . podManager . CreateMirrorPod ( pod ) ; err != nil {
2015-04-08 20:28:33 +00:00
glog . Errorf ( "Failed creating a mirror pod %q: %v" , podFullName , err )
}
2015-03-09 22:46:47 +00:00
}
}
2014-07-01 05:27:56 +00:00
return nil
}
2015-09-14 21:56:51 +00:00
func podUsesHostNetwork ( pod * api . Pod ) bool {
return pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . HostNetwork
}
2015-05-08 17:53:00 +00:00
// getPullSecretsForPod inspects the Pod and retrieves the referenced pull secrets
// TODO duplicate secrets are being retrieved multiple times and there is no cache. Creating and using a secret manager interface will make this easier to address.
func ( kl * Kubelet ) getPullSecretsForPod ( pod * api . Pod ) ( [ ] api . Secret , error ) {
pullSecrets := [ ] api . Secret { }
for _ , secretRef := range pod . Spec . ImagePullSecrets {
secret , err := kl . kubeClient . Secrets ( pod . Namespace ) . Get ( secretRef . Name )
if err != nil {
2015-08-14 16:51:28 +00:00
glog . Warningf ( "Unable to retrieve pull secret %s/%s for %s/%s due to %v. The image pull may not succeed." , pod . Namespace , secretRef . Name , pod . Namespace , pod . Name , err )
continue
2015-05-08 17:53:00 +00:00
}
pullSecrets = append ( pullSecrets , * secret )
}
return pullSecrets , nil
}
2014-07-30 21:04:19 +00:00
// Stores all volumes defined by the set of pods into a map.
// Keys for each entry are in the format (POD_ID)/(VOLUME_NAME)
2015-04-03 22:51:50 +00:00
func getDesiredVolumes ( pods [ ] * api . Pod ) map [ string ] api . Volume {
2014-07-30 21:04:19 +00:00
desiredVolumes := make ( map [ string ] api . Volume )
2014-07-25 20:16:59 +00:00
for _ , pod := range pods {
2014-10-08 19:56:02 +00:00
for _ , volume := range pod . Spec . Volumes {
2014-11-23 15:47:25 +00:00
identifier := path . Join ( string ( pod . UID ) , volume . Name )
2014-07-30 21:04:19 +00:00
desiredVolumes [ identifier ] = volume
2014-07-25 20:16:59 +00:00
}
}
2014-07-30 21:04:19 +00:00
return desiredVolumes
2014-07-25 20:16:59 +00:00
}
2015-07-01 22:25:41 +00:00
// cleanupOrphanedPodDirs removes a pod directory if the pod is not in the
// desired set of pods and there is no running containers in the pod.
func ( kl * Kubelet ) cleanupOrphanedPodDirs ( pods [ ] * api . Pod , runningPods [ ] * kubecontainer . Pod ) error {
2015-09-09 17:45:01 +00:00
active := sets . NewString ( )
2015-04-03 22:51:50 +00:00
for _ , pod := range pods {
2015-07-01 22:25:41 +00:00
active . Insert ( string ( pod . UID ) )
2015-01-12 00:42:11 +00:00
}
2015-07-01 22:25:41 +00:00
for _ , pod := range runningPods {
active . Insert ( string ( pod . ID ) )
}
2015-01-12 00:42:11 +00:00
found , err := kl . listPodsFromDisk ( )
if err != nil {
return err
}
errlist := [ ] error { }
2015-08-28 05:17:57 +00:00
for _ , uid := range found {
if active . Has ( string ( uid ) ) {
continue
}
2015-09-01 22:32:03 +00:00
if volumes , err := kl . getPodVolumes ( uid ) ; err != nil || len ( volumes ) != 0 {
glog . V ( 3 ) . Infof ( "Orphaned pod %q found, but volumes are not cleaned up; err: %v, volumes: %v " , uid , err , volumes )
2015-08-28 05:17:57 +00:00
continue
}
2015-09-01 22:32:03 +00:00
2015-08-28 05:17:57 +00:00
glog . V ( 3 ) . Infof ( "Orphaned pod %q found, removing" , uid )
if err := os . RemoveAll ( kl . getPodDir ( uid ) ) ; err != nil {
errlist = append ( errlist , err )
2015-01-12 00:42:11 +00:00
}
}
2015-10-14 05:18:37 +00:00
return utilerrors . NewAggregate ( errlist )
2015-01-12 00:42:11 +00:00
}
2015-08-10 22:08:31 +00:00
func ( kl * Kubelet ) cleanupBandwidthLimits ( allPods [ ] * api . Pod ) error {
if kl . shaper == nil {
return nil
}
currentCIDRs , err := kl . shaper . GetCIDRs ( )
if err != nil {
return err
}
2015-09-09 17:45:01 +00:00
possibleCIDRs := sets . String { }
2015-08-10 22:08:31 +00:00
for ix := range allPods {
pod := allPods [ ix ]
ingress , egress , err := extractBandwidthResources ( pod )
if err != nil {
return err
}
if ingress == nil && egress == nil {
glog . V ( 8 ) . Infof ( "Not a bandwidth limited container..." )
continue
}
status , found := kl . statusManager . GetPodStatus ( pod . UID )
if ! found {
statusPtr , err := kl . containerRuntime . GetPodStatus ( pod )
if err != nil {
return err
}
status = * statusPtr
}
if status . Phase == api . PodRunning {
possibleCIDRs . Insert ( fmt . Sprintf ( "%s/32" , status . PodIP ) )
}
}
for _ , cidr := range currentCIDRs {
if ! possibleCIDRs . Has ( cidr ) {
glog . V ( 2 ) . Infof ( "Removing CIDR: %s (%v)" , cidr , possibleCIDRs )
if err := kl . shaper . Reset ( cidr ) ; err != nil {
return err
}
}
}
return nil
}
2014-07-30 21:04:19 +00:00
// Compares the map of current volumes to the map of desired volumes.
// If an active volume does not have a respective desired volume, clean it up.
2015-04-29 17:47:25 +00:00
func ( kl * Kubelet ) cleanupOrphanedVolumes ( pods [ ] * api . Pod , runningPods [ ] * kubecontainer . Pod ) error {
2014-07-30 21:04:19 +00:00
desiredVolumes := getDesiredVolumes ( pods )
2014-11-23 15:47:25 +00:00
currentVolumes := kl . getPodVolumesFromDisk ( )
2015-04-29 17:47:25 +00:00
2015-09-09 17:45:01 +00:00
runningSet := sets . String { }
2015-04-29 17:47:25 +00:00
for _ , pod := range runningPods {
runningSet . Insert ( string ( pod . ID ) )
2015-02-03 20:14:16 +00:00
}
2015-04-29 17:47:25 +00:00
2014-07-30 21:04:19 +00:00
for name , vol := range currentVolumes {
if _ , ok := desiredVolumes [ name ] ; ! ok {
2015-02-03 20:14:16 +00:00
parts := strings . Split ( name , "/" )
if runningSet . Has ( parts [ 0 ] ) {
2015-03-19 23:51:34 +00:00
glog . Infof ( "volume %q, still has a container running %q, skipping teardown" , name , parts [ 0 ] )
2015-02-03 20:14:16 +00:00
continue
}
2014-07-30 21:04:19 +00:00
//TODO (jonesdl) We should somehow differentiate between volumes that are supposed
//to be deleted and volumes that are leftover after a crash.
2015-01-06 00:38:47 +00:00
glog . Warningf ( "Orphaned volume %q found, tearing down volume" , name )
2015-04-16 00:40:07 +00:00
// TODO(yifan): Refactor this hacky string manipulation.
kl . volumeManager . DeleteVolumes ( types . UID ( parts [ 0 ] ) )
2014-07-30 21:04:19 +00:00
//TODO (jonesdl) This should not block other kubelet synchronization procedures
err := vol . TearDown ( )
2014-07-29 17:20:50 +00:00
if err != nil {
2015-01-06 00:38:47 +00:00
glog . Errorf ( "Could not tear down volume %q: %v" , name , err )
2014-07-29 17:20:50 +00:00
}
2014-07-25 20:16:59 +00:00
}
}
return nil
}
2015-08-20 01:57:58 +00:00
// Delete any pods that are no longer running and are marked for deletion.
func ( kl * Kubelet ) cleanupTerminatedPods ( pods [ ] * api . Pod , runningPods [ ] * kubecontainer . Pod ) error {
var terminating [ ] * api . Pod
for _ , pod := range pods {
if pod . DeletionTimestamp != nil {
found := false
for _ , runningPod := range runningPods {
if runningPod . ID == pod . UID {
found = true
break
}
}
if found {
podFullName := kubecontainer . GetPodFullName ( pod )
glog . V ( 5 ) . Infof ( "Keeping terminated pod %q and uid %q, still running" , podFullName , pod . UID )
continue
}
terminating = append ( terminating , pod )
}
}
if ! kl . statusManager . TerminatePods ( terminating ) {
return errors . New ( "not all pods were successfully terminated" )
}
return nil
}
2015-05-16 00:01:56 +00:00
// pastActiveDeadline returns true if the pod has been active for more than
// ActiveDeadlineSeconds.
func ( kl * Kubelet ) pastActiveDeadline ( pod * api . Pod ) bool {
2015-09-17 22:21:55 +00:00
now := unversioned . Now ( )
2015-05-16 00:01:56 +00:00
if pod . Spec . ActiveDeadlineSeconds != nil {
2015-08-18 20:26:56 +00:00
podStatus , ok := kl . statusManager . GetPodStatus ( pod . UID )
2015-05-16 00:01:56 +00:00
if ! ok {
podStatus = pod . Status
2015-05-09 05:01:43 +00:00
}
2015-05-16 00:01:56 +00:00
if ! podStatus . StartTime . IsZero ( ) {
startTime := podStatus . StartTime . Time
duration := now . Time . Sub ( startTime )
allowedDuration := time . Duration ( * pod . Spec . ActiveDeadlineSeconds ) * time . Second
if duration >= allowedDuration {
return true
}
2015-05-09 05:01:43 +00:00
}
}
2015-05-16 00:01:56 +00:00
return false
}
2015-08-19 00:52:26 +00:00
// Returns true if pod is in the terminated state ("Failed" or "Succeeded").
func ( kl * Kubelet ) podIsTerminated ( pod * api . Pod ) bool {
var status api . PodStatus
// Check the cached pod status which was set after the last sync.
status , ok := kl . statusManager . GetPodStatus ( pod . UID )
if ! ok {
// If there is no cached status, use the status from the
// apiserver. This is useful if kubelet has recently been
// restarted.
status = pod . Status
}
2015-05-16 00:01:56 +00:00
if status . Phase == api . PodFailed || status . Phase == api . PodSucceeded {
return true
}
2015-08-19 00:52:26 +00:00
2015-05-16 00:01:56 +00:00
return false
2015-05-09 05:01:43 +00:00
}
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) filterOutTerminatedPods ( pods [ ] * api . Pod ) [ ] * api . Pod {
var filteredPods [ ] * api . Pod
for _ , p := range pods {
if kl . podIsTerminated ( p ) {
2015-04-24 18:20:23 +00:00
continue
}
2015-08-19 00:52:26 +00:00
filteredPods = append ( filteredPods , p )
2015-04-24 18:20:23 +00:00
}
2015-08-19 00:52:26 +00:00
return filteredPods
2015-08-11 23:25:17 +00:00
}
2014-07-15 17:26:56 +00:00
2015-08-11 23:25:17 +00:00
// removeOrphanedPodStatuses removes obsolete entries in podStatus where
// the pod is no longer considered bound to this node.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) removeOrphanedPodStatuses ( pods [ ] * api . Pod , mirrorPods [ ] * api . Pod ) {
2015-08-18 20:26:56 +00:00
podUIDs := make ( map [ types . UID ] bool )
2015-08-11 23:25:17 +00:00
for _ , pod := range pods {
2015-08-18 20:26:56 +00:00
podUIDs [ pod . UID ] = true
2015-08-11 23:25:17 +00:00
}
2015-08-18 20:26:56 +00:00
for _ , pod := range mirrorPods {
podUIDs [ pod . UID ] = true
}
kl . statusManager . RemoveOrphanedStatuses ( podUIDs )
2015-08-11 23:25:17 +00:00
}
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) deletePod ( uid types . UID ) error {
2015-10-06 01:20:57 +00:00
if ! kl . allSourcesReady ( ) {
2015-08-19 00:52:26 +00:00
// If the sources aren't ready, skip deletion, as we may accidentally delete pods
// for sources that haven't reported yet.
return fmt . Errorf ( "skipping delete because sources aren't ready yet" )
2014-07-01 16:37:45 +00:00
}
2015-08-19 00:52:26 +00:00
kl . podWorkers . ForgetWorker ( uid )
2015-08-11 23:25:17 +00:00
2015-08-19 00:52:26 +00:00
// Runtime cache may not have been updated to with the pod, but it's okay
// because the periodic cleanup routine will attempt to delete again later.
runningPods , err := kl . runtimeCache . GetPods ( )
if err != nil {
return fmt . Errorf ( "error listing containers: %v" , err )
2015-08-11 23:25:17 +00:00
}
2015-08-19 00:52:26 +00:00
pod := kubecontainer . Pods ( runningPods ) . FindPod ( "" , uid )
if pod . IsEmpty ( ) {
return fmt . Errorf ( "pod not found" )
}
kl . podKillingCh <- & pod
// TODO: delete the mirror pod here?
2015-02-27 09:19:41 +00:00
2015-08-19 00:52:26 +00:00
// We leave the volume/directory cleanup to the periodic cleanup routine.
return nil
}
// HandlePodCleanups performs a series of cleanup work, including terminating
// pod workers, killing unwanted pods, and removing orphaned volumes/pod
// directories.
// TODO(yujuhong): This function is executed by the main sync loop, so it
// should not contain any blocking calls. Re-examine the function and decide
// whether or not we should move it into a separte goroutine.
func ( kl * Kubelet ) HandlePodCleanups ( ) error {
allPods , mirrorPods := kl . podManager . GetPodsAndMirrorPods ( )
// Pod phase progresses monotonically. Once a pod has reached a final state,
// it should never leave regardless of the restart policy. The statuses
// of such pods should not be changed, and there is no need to sync them.
// TODO: the logic here does not handle two cases:
// 1. If the containers were removed immediately after they died, kubelet
// may fail to generate correct statuses, let alone filtering correctly.
// 2. If kubelet restarted before writing the terminated status for a pod
// to the apiserver, it could still restart the terminated pod (even
// though the pod was not considered terminated by the apiserver).
// These two conditions could be alleviated by checkpointing kubelet.
activePods := kl . filterOutTerminatedPods ( allPods )
desiredPods := make ( map [ types . UID ] empty )
for _ , pod := range activePods {
desiredPods [ pod . UID ] = empty { }
}
// Stop the workers for no-longer existing pods.
// TODO: is here the best place to forget pod workers?
kl . podWorkers . ForgetNonExistingPodWorkers ( desiredPods )
2015-08-25 17:39:41 +00:00
kl . probeManager . CleanupPods ( activePods )
2015-08-19 00:52:26 +00:00
2015-08-11 23:25:17 +00:00
runningPods , err := kl . runtimeCache . GetPods ( )
if err != nil {
glog . Errorf ( "Error listing containers: %#v" , err )
return err
}
2015-08-19 00:52:26 +00:00
for _ , pod := range runningPods {
if _ , found := desiredPods [ pod . ID ] ; ! found {
kl . podKillingCh <- pod
}
2015-04-29 17:47:25 +00:00
}
2015-08-19 00:52:26 +00:00
kl . removeOrphanedPodStatuses ( allPods , mirrorPods )
2015-04-29 17:47:25 +00:00
// Note that we just killed the unwanted pods. This may not have reflected
2015-05-11 17:50:14 +00:00
// in the cache. We need to bypass the cache to get the latest set of
2015-04-29 17:47:25 +00:00
// running pods to clean up the volumes.
// TODO: Evaluate the performance impact of bypassing the runtime cache.
2015-05-01 22:25:11 +00:00
runningPods , err = kl . containerRuntime . GetPods ( false )
2015-04-29 17:47:25 +00:00
if err != nil {
glog . Errorf ( "Error listing containers: %#v" , err )
2015-02-03 20:14:16 +00:00
return err
}
2015-02-04 01:46:28 +00:00
// Remove any orphaned volumes.
2015-05-18 20:12:35 +00:00
// Note that we pass all pods (including terminated pods) to the function,
// so that we don't remove volumes associated with terminated but not yet
// deleted pods.
err = kl . cleanupOrphanedVolumes ( allPods , runningPods )
2015-01-12 00:42:11 +00:00
if err != nil {
2015-04-14 22:26:50 +00:00
glog . Errorf ( "Failed cleaning up orphaned volumes: %v" , err )
2015-01-12 00:42:11 +00:00
return err
}
2015-04-14 22:26:50 +00:00
// Remove any orphaned pod directories.
2015-05-18 20:12:35 +00:00
// Note that we pass all pods (including terminated pods) to the function,
// so that we don't remove directories associated with terminated but not yet
// deleted pods.
2015-07-01 22:25:41 +00:00
err = kl . cleanupOrphanedPodDirs ( allPods , runningPods )
2015-01-12 00:42:11 +00:00
if err != nil {
2015-04-14 22:26:50 +00:00
glog . Errorf ( "Failed cleaning up orphaned pod directories: %v" , err )
2015-01-12 00:42:11 +00:00
return err
}
2014-07-30 21:04:19 +00:00
2015-03-09 22:46:47 +00:00
// Remove any orphaned mirror pods.
2015-03-23 19:17:12 +00:00
kl . podManager . DeleteOrphanedMirrorPods ( )
2015-03-09 22:46:47 +00:00
2015-08-20 01:57:58 +00:00
if err := kl . cleanupTerminatedPods ( allPods , runningPods ) ; err != nil {
glog . Errorf ( "Failed to cleanup terminated pods: %v" , err )
}
2015-08-10 22:08:31 +00:00
// Clear out any old bandwith rules
if err = kl . cleanupBandwidthLimits ( allPods ) ; err != nil {
return err
}
2015-08-13 12:59:15 +00:00
kl . backOff . GC ( )
2014-06-06 23:40:48 +00:00
return err
}
2015-08-19 00:52:26 +00:00
// podKiller launches a goroutine to kill a pod received from the channel if
// another goroutine isn't already in action.
func ( kl * Kubelet ) podKiller ( ) {
2015-09-09 17:45:01 +00:00
killing := sets . NewString ( )
2015-08-19 00:52:26 +00:00
resultCh := make ( chan types . UID )
defer close ( resultCh )
for {
select {
case pod , ok := <- kl . podKillingCh :
if ! ok {
2015-04-14 22:26:50 +00:00
return
}
2015-08-19 00:52:26 +00:00
if killing . Has ( string ( pod . ID ) ) {
// The pod is already being killed.
break
}
killing . Insert ( string ( pod . ID ) )
go func ( pod * kubecontainer . Pod , ch chan types . UID ) {
defer func ( ) {
ch <- pod . ID
} ( )
glog . V ( 2 ) . Infof ( "Killing unwanted pod %q" , pod . Name )
err := kl . killPod ( nil , * pod )
if err != nil {
glog . Errorf ( "Failed killing the pod %q: %v" , pod . Name , err )
}
} ( pod , resultCh )
2015-04-14 22:26:50 +00:00
2015-08-19 00:52:26 +00:00
case podID := <- resultCh :
killing . Delete ( string ( podID ) )
2015-04-14 22:26:50 +00:00
}
}
}
2015-04-03 22:51:50 +00:00
type podsByCreationTime [ ] * api . Pod
2015-02-27 21:43:21 +00:00
func ( s podsByCreationTime ) Len ( ) int {
return len ( s )
}
func ( s podsByCreationTime ) Swap ( i , j int ) {
s [ i ] , s [ j ] = s [ j ] , s [ i ]
}
func ( s podsByCreationTime ) Less ( i , j int ) bool {
return s [ i ] . CreationTimestamp . Before ( s [ j ] . CreationTimestamp )
}
2015-03-20 16:52:32 +00:00
// checkHostPortConflicts detects pods with conflicted host ports.
2015-08-19 00:52:26 +00:00
func hasHostPortConflicts ( pods [ ] * api . Pod ) bool {
2015-09-09 17:45:01 +00:00
ports := sets . String { }
2015-04-03 22:51:50 +00:00
for _ , pod := range pods {
2015-08-19 00:52:26 +00:00
if errs := validation . AccumulateUniqueHostPorts ( pod . Spec . Containers , & ports ) ; len ( errs ) > 0 {
2015-03-23 17:14:30 +00:00
glog . Errorf ( "Pod %q: HostPort is already allocated, ignoring: %v" , kubecontainer . GetPodFullName ( pod ) , errs )
2015-08-19 00:52:26 +00:00
return true
2014-07-08 04:48:47 +00:00
}
}
2015-08-19 00:52:26 +00:00
return false
2015-03-03 18:33:25 +00:00
}
2015-08-19 00:52:26 +00:00
// hasInsufficientfFreeResources detects pods that exceeds node's resources.
// TODO: Consider integrate disk space into this function, and returns a
// suitable reason and message per resource type.
func ( kl * Kubelet ) hasInsufficientfFreeResources ( pods [ ] * api . Pod ) ( bool , bool ) {
2015-09-21 18:06:38 +00:00
info , err := kl . GetCachedMachineInfo ( )
2015-03-16 12:50:00 +00:00
if err != nil {
2015-03-31 22:32:02 +00:00
glog . Errorf ( "error getting machine info: %v" , err )
2015-08-19 00:52:26 +00:00
// TODO: Should we admit the pod when machine info is unavailable?
return false , false
2015-03-16 12:50:00 +00:00
}
capacity := CapacityFromMachineInfo ( info )
2015-08-19 00:52:26 +00:00
_ , notFittingCPU , notFittingMemory := predicates . CheckPodsExceedingFreeResources ( pods , capacity )
return len ( notFittingCPU ) > 0 , len ( notFittingMemory ) > 0
2015-03-20 16:52:32 +00:00
}
2015-05-12 08:24:08 +00:00
// handleOutOfDisk detects if pods can't fit due to lack of disk space.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) isOutOfDisk ( ) bool {
2015-05-12 08:24:08 +00:00
outOfDockerDisk := false
outOfRootDisk := false
// Check disk space once globally and reject or accept all new pods.
withinBounds , err := kl . diskSpaceManager . IsDockerDiskSpaceAvailable ( )
// Assume enough space in case of errors.
if err == nil && ! withinBounds {
outOfDockerDisk = true
}
withinBounds , err = kl . diskSpaceManager . IsRootDiskSpaceAvailable ( )
// Assume enough space in case of errors.
if err == nil && ! withinBounds {
outOfRootDisk = true
}
// Kubelet would indicate all pods as newly created on the first run after restart.
// We ignore the first disk check to ensure that running pods are not killed.
// Disk manager will only declare out of disk problems if unfreeze has been called.
kl . diskSpaceManager . Unfreeze ( )
2015-05-14 20:02:36 +00:00
2015-08-19 00:52:26 +00:00
return outOfDockerDisk || outOfRootDisk
2015-05-12 08:24:08 +00:00
}
2015-08-19 00:52:26 +00:00
// matchesNodeSelector returns true if pod matches node's labels.
func ( kl * Kubelet ) matchesNodeSelector ( pod * api . Pod ) bool {
2015-06-13 00:28:34 +00:00
if kl . standaloneMode {
2015-08-19 00:52:26 +00:00
return true
2015-06-13 00:28:34 +00:00
}
2015-09-21 18:00:04 +00:00
node , err := kl . GetNode ( )
2015-03-20 16:52:32 +00:00
if err != nil {
glog . Errorf ( "error getting node: %v" , err )
2015-08-19 00:52:26 +00:00
return true
2015-03-20 16:52:32 +00:00
}
2015-08-19 00:52:26 +00:00
return predicates . PodMatchesNodeLabels ( pod , node )
2015-03-16 12:50:00 +00:00
}
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) rejectPod ( pod * api . Pod , reason , message string ) {
kl . recorder . Eventf ( pod , reason , message )
kl . statusManager . SetPodStatus ( pod , api . PodStatus {
Phase : api . PodFailed ,
Reason : reason ,
Message : "Pod " + message } )
}
2015-05-14 20:02:36 +00:00
2015-08-19 00:52:26 +00:00
// canAdmitPod determines if a pod can be admitted, and gives a reason if it
// cannot. "pod" is new pod, while "pods" include all admitted pods plus the
// new pod. The function returns a boolean value indicating whether the pod
// can be admitted, a brief single-word reason and a message explaining why
// the pod cannot be admitted.
func ( kl * Kubelet ) canAdmitPod ( pods [ ] * api . Pod , pod * api . Pod ) ( bool , string , string ) {
if hasHostPortConflicts ( pods ) {
return false , "HostPortConflict" , "cannot start the pod due to host port conflict."
}
if ! kl . matchesNodeSelector ( pod ) {
return false , "NodeSelectorMismatching" , "cannot be started due to node selector mismatch"
}
cpu , memory := kl . hasInsufficientfFreeResources ( pods )
if cpu {
return false , "InsufficientFreeCPU" , "cannot start the pod due to insufficient free CPU."
} else if memory {
return false , "InsufficientFreeMemory" , "cannot be started due to insufficient free memory"
}
if kl . isOutOfDisk ( ) {
return false , "OutOfDisk" , "cannot be started due to lack of disk space."
}
2015-05-14 20:02:36 +00:00
2015-08-19 00:52:26 +00:00
return true , "" , ""
2014-07-08 04:48:47 +00:00
}
2014-07-01 20:01:39 +00:00
// syncLoop is the main loop for processing changes. It watches for changes from
2015-03-11 23:40:20 +00:00
// three channels (file, apiserver, and http) and creates a union of them. For
2014-06-06 23:40:48 +00:00
// any new change seen, will run a sync against desired state and running state. If
// no changes are seen to the configuration, will synchronize the last known desired
2015-08-11 20:29:50 +00:00
// state every sync-frequency seconds. Never returns.
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) syncLoop ( updates <- chan kubetypes . PodUpdate , handler SyncHandler ) {
2015-04-08 20:57:19 +00:00
glog . Info ( "Starting kubelet main sync loop." )
2015-10-27 01:50:57 +00:00
kl . resyncTicker = time . NewTicker ( kl . resyncInterval )
2015-08-28 01:07:57 +00:00
var housekeepingTimestamp time . Time
2014-06-06 23:40:48 +00:00
for {
2015-09-21 18:06:38 +00:00
if ! kl . containerRuntimeUp ( ) {
2015-08-28 01:07:57 +00:00
time . Sleep ( 5 * time . Second )
glog . Infof ( "Skipping pod synchronization, container runtime is not up." )
continue
}
2015-09-21 18:06:38 +00:00
if ! kl . doneNetworkConfigure ( ) {
2015-08-28 01:07:57 +00:00
time . Sleep ( 5 * time . Second )
glog . Infof ( "Skipping pod synchronization, network is not configured" )
continue
}
2015-09-15 19:29:34 +00:00
// Make sure we sync first to receive the pods from the sources before
// performing housekeeping.
if ! kl . syncLoopIteration ( updates , handler ) {
break
}
2015-08-28 01:07:57 +00:00
// We don't want to perform housekeeping too often, so we set a minimum
// period for it. Housekeeping would be performed at least once every
// kl.resyncInterval, and *no* more than once every
// housekeepingMinimumPeriod.
// TODO (#13418): Investigate whether we can/should spawn a dedicated
// goroutine for housekeeping
2015-10-06 01:20:57 +00:00
if ! kl . allSourcesReady ( ) {
2015-09-15 19:29:34 +00:00
// If the sources aren't ready, skip housekeeping, as we may
// accidentally delete pods from unready sources.
glog . V ( 4 ) . Infof ( "Skipping cleanup, sources aren't ready yet." )
} else if housekeepingTimestamp . IsZero ( ) {
housekeepingTimestamp = time . Now ( )
} else if time . Since ( housekeepingTimestamp ) > housekeepingMinimumPeriod {
2015-08-28 01:07:57 +00:00
glog . V ( 4 ) . Infof ( "SyncLoop (housekeeping)" )
if err := handler . HandlePodCleanups ( ) ; err != nil {
glog . Errorf ( "Failed cleaning pods: %v" , err )
}
housekeepingTimestamp = time . Now ( )
}
2015-06-17 22:31:46 +00:00
}
}
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) syncLoopIteration ( updates <- chan kubetypes . PodUpdate , handler SyncHandler ) bool {
2015-06-17 22:31:46 +00:00
kl . syncLoopMonitor . Store ( time . Now ( ) )
select {
2015-08-30 19:47:24 +00:00
case u , open := <- updates :
if ! open {
2015-06-17 22:31:46 +00:00
glog . Errorf ( "Update channel is closed. Exiting the sync loop." )
2015-08-30 19:47:24 +00:00
return false
2015-06-18 05:34:11 +00:00
}
2015-10-06 01:20:57 +00:00
kl . addSource ( u . Source )
2015-08-19 00:52:26 +00:00
switch u . Op {
2015-10-09 17:24:31 +00:00
case kubetypes . ADD :
2015-10-14 03:46:32 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (ADD, %q): %q" , u . Source , kubeletutil . FormatPodNames ( u . Pods ) )
2015-08-19 00:52:26 +00:00
handler . HandlePodAdditions ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . UPDATE :
2015-10-14 03:46:32 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (UPDATE, %q): %q" , u . Source , kubeletutil . FormatPodNames ( u . Pods ) )
2015-08-19 00:52:26 +00:00
handler . HandlePodUpdates ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . REMOVE :
2015-10-14 03:46:32 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (REMOVE, %q): %q" , u . Source , kubeletutil . FormatPodNames ( u . Pods ) )
2015-08-19 00:52:26 +00:00
handler . HandlePodDeletions ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . SET :
2015-08-19 00:52:26 +00:00
// TODO: Do we want to support this?
glog . Errorf ( "Kubelet does not support snapshot update" )
}
2015-10-27 01:50:57 +00:00
case <- kl . resyncTicker . C :
2015-08-19 00:52:26 +00:00
// Periodically syncs all the pods and performs cleanup tasks.
glog . V ( 4 ) . Infof ( "SyncLoop (periodic sync)" )
handler . HandlePodSyncs ( kl . podManager . GetPods ( ) )
2015-10-19 22:15:59 +00:00
case update := <- kl . livenessManager . Updates ( ) :
// We only care about failures (signalling container death) here.
if update . Result == proberesults . Failure {
glog . V ( 1 ) . Infof ( "SyncLoop (container unhealthy)." )
handler . HandlePodSyncs ( [ ] * api . Pod { update . Pod } )
}
2015-06-17 22:31:46 +00:00
}
2015-08-19 00:52:26 +00:00
kl . syncLoopMonitor . Store ( time . Now ( ) )
2015-08-30 19:47:24 +00:00
return true
2015-08-19 00:52:26 +00:00
}
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) dispatchWork ( pod * api . Pod , syncType kubetypes . SyncPodType , mirrorPod * api . Pod , start time . Time ) {
2015-08-19 00:52:26 +00:00
if kl . podIsTerminated ( pod ) {
return
}
// Run the sync in an async worker.
2015-10-01 23:25:07 +00:00
kl . podWorkers . UpdatePod ( pod , mirrorPod , syncType , func ( ) {
2015-08-19 00:52:26 +00:00
metrics . PodWorkerLatency . WithLabelValues ( syncType . String ( ) ) . Observe ( metrics . SinceInMicroseconds ( start ) )
} )
// Note the number of containers for new pods.
2015-10-09 17:24:31 +00:00
if syncType == kubetypes . SyncPodCreate {
2015-08-19 00:52:26 +00:00
metrics . ContainersPerPodCount . Observe ( float64 ( len ( pod . Spec . Containers ) ) )
}
}
// TODO: Consider handling all mirror pods updates in a separate component.
func ( kl * Kubelet ) handleMirrorPod ( mirrorPod * api . Pod , start time . Time ) {
// Mirror pod ADD/UPDATE/DELETE operations are considered an UPDATE to the
// corresponding static pod. Send update to the pod worker if the static
// pod exists.
if pod , ok := kl . podManager . GetPodByMirrorPod ( mirrorPod ) ; ok {
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodUpdate , mirrorPod , start )
2015-08-19 00:52:26 +00:00
}
}
func ( kl * Kubelet ) HandlePodAdditions ( pods [ ] * api . Pod ) {
2015-06-17 22:31:46 +00:00
start := time . Now ( )
2015-08-19 00:52:26 +00:00
sort . Sort ( podsByCreationTime ( pods ) )
for _ , pod := range pods {
kl . podManager . AddPod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// Note that allPods includes the new pod since we added at the
// beginning of the loop.
allPods := kl . podManager . GetPods ( )
// We failed pods that we rejected, so activePods include all admitted
// pods that are alive and the new pod.
activePods := kl . filterOutTerminatedPods ( allPods )
// Check if we can admit the pod; if not, reject it.
if ok , reason , message := kl . canAdmitPod ( activePods , pod ) ; ! ok {
kl . rejectPod ( pod , reason , message )
continue
2014-06-06 23:40:48 +00:00
}
2015-08-19 00:52:26 +00:00
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodCreate , mirrorPod , start )
2015-08-25 17:39:41 +00:00
kl . probeManager . AddPod ( pod )
2014-06-06 23:40:48 +00:00
}
2015-08-19 00:52:26 +00:00
}
func ( kl * Kubelet ) HandlePodUpdates ( pods [ ] * api . Pod ) {
start := time . Now ( )
for _ , pod := range pods {
kl . podManager . UpdatePod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// TODO: Evaluate if we need to validate and reject updates.
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodUpdate , mirrorPod , start )
2015-08-19 00:52:26 +00:00
}
}
func ( kl * Kubelet ) HandlePodDeletions ( pods [ ] * api . Pod ) {
start := time . Now ( )
for _ , pod := range pods {
kl . podManager . DeletePod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// Deletion is allowed to fail because the periodic cleanup routine
// will trigger deletion again.
if err := kl . deletePod ( pod . UID ) ; err != nil {
2015-10-14 03:46:32 +00:00
glog . V ( 2 ) . Infof ( "Failed to delete pod %q, err: %v" , kubeletutil . FormatPodName ( pod ) , err )
2015-08-19 00:52:26 +00:00
}
2015-08-25 17:39:41 +00:00
kl . probeManager . RemovePod ( pod )
2015-08-19 00:52:26 +00:00
}
}
func ( kl * Kubelet ) HandlePodSyncs ( pods [ ] * api . Pod ) {
start := time . Now ( )
for _ , pod := range pods {
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodSync , mirrorPod , start )
2015-06-17 22:31:46 +00:00
}
}
func ( kl * Kubelet ) LatestLoopEntryTime ( ) time . Time {
val := kl . syncLoopMonitor . Load ( )
if val == nil {
return time . Time { }
}
return val . ( time . Time )
2014-06-06 23:40:48 +00:00
}
2015-04-21 20:02:50 +00:00
// Returns the container runtime version for this Kubelet.
func ( kl * Kubelet ) GetContainerRuntimeVersion ( ) ( kubecontainer . Version , error ) {
2015-05-01 22:25:11 +00:00
if kl . containerRuntime == nil {
2015-04-21 20:02:50 +00:00
return nil , fmt . Errorf ( "no container runtime" )
2015-02-04 17:14:17 +00:00
}
2015-05-01 22:25:11 +00:00
return kl . containerRuntime . Version ( )
2015-02-04 17:14:17 +00:00
}
2015-02-24 00:33:43 +00:00
func ( kl * Kubelet ) validatePodPhase ( podStatus * api . PodStatus ) error {
switch podStatus . Phase {
case api . PodRunning , api . PodSucceeded , api . PodFailed :
return nil
}
return fmt . Errorf ( "pod is not in 'Running', 'Succeeded' or 'Failed' state - State: %q" , podStatus . Phase )
}
2015-10-07 17:58:05 +00:00
func ( kl * Kubelet ) validateContainerStatus ( podStatus * api . PodStatus , containerName string , previous bool ) ( containerID kubecontainer . ContainerID , err error ) {
2015-05-07 18:34:16 +00:00
var cID string
2015-03-25 11:09:35 +00:00
cStatus , found := api . GetContainerStatus ( podStatus . ContainerStatuses , containerName )
if ! found {
2015-10-07 17:58:05 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q not found" , containerName )
2015-02-24 00:33:43 +00:00
}
2015-05-07 18:34:16 +00:00
if previous {
2015-05-27 22:02:11 +00:00
if cStatus . LastTerminationState . Terminated == nil {
2015-10-07 17:58:05 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "previous terminated container %q not found" , containerName )
2015-05-07 18:34:16 +00:00
}
2015-05-27 22:02:11 +00:00
cID = cStatus . LastTerminationState . Terminated . ContainerID
2015-05-07 18:34:16 +00:00
} else {
if cStatus . State . Waiting != nil {
2015-10-07 17:58:05 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q is in waiting state." , containerName )
2015-05-07 18:34:16 +00:00
}
cID = cStatus . ContainerID
2015-03-25 11:09:35 +00:00
}
2015-10-07 17:58:05 +00:00
return kubecontainer . ParseContainerID ( cID ) , nil
2015-02-24 00:33:43 +00:00
}
2014-08-27 19:41:32 +00:00
// GetKubeletContainerLogs returns logs from the container
2015-01-07 15:18:56 +00:00
// TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt
// or all of them.
2015-09-10 03:46:11 +00:00
func ( kl * Kubelet ) GetKubeletContainerLogs ( podFullName , containerName string , logOptions * api . PodLogOptions , stdout , stderr io . Writer ) error {
2015-05-01 23:12:14 +00:00
// TODO(vmarmol): Refactor to not need the pod status and verification.
2015-05-15 22:30:28 +00:00
// Pod workers periodically write status to statusManager. If status is not
// cached there, something is wrong (or kubelet just restarted and hasn't
// caught up yet). Just assume the pod is not ready yet.
2015-08-18 20:26:56 +00:00
name , namespace , err := kubecontainer . ParsePodFullName ( podFullName )
if err != nil {
return fmt . Errorf ( "unable to parse pod full name %q: %v" , podFullName , err )
}
pod , ok := kl . GetPodByName ( namespace , name )
if ! ok {
2015-09-07 06:28:26 +00:00
return fmt . Errorf ( "unable to get logs for container %q in pod %q namespace %q: unable to find pod" , containerName , name , namespace )
2015-08-18 20:26:56 +00:00
}
2015-10-23 21:31:40 +00:00
podUID := pod . UID
if mirrorPod , ok := kl . podManager . GetMirrorPodByPod ( pod ) ; ok {
podUID = mirrorPod . UID
}
podStatus , found := kl . statusManager . GetPodStatus ( podUID )
2015-05-15 22:30:28 +00:00
if ! found {
2015-09-07 06:28:26 +00:00
return fmt . Errorf ( "failed to get status for pod %q in namespace %q" , name , namespace )
2014-09-17 19:00:09 +00:00
}
2015-08-18 20:26:56 +00:00
2015-02-24 00:33:43 +00:00
if err := kl . validatePodPhase ( & podStatus ) ; err != nil {
2015-04-09 18:57:53 +00:00
// No log is available if pod is not in a "known" phase (e.g. Unknown).
2015-09-07 06:28:26 +00:00
return fmt . Errorf ( "Pod %q in namespace %q : %v" , name , namespace , err )
2015-02-12 01:03:59 +00:00
}
2015-09-10 03:46:11 +00:00
containerID , err := kl . validateContainerStatus ( & podStatus , containerName , logOptions . Previous )
2015-02-24 00:33:43 +00:00
if err != nil {
2015-04-09 18:57:53 +00:00
// No log is available if the container status is missing or is in the
// waiting state.
2015-09-07 06:28:26 +00:00
return fmt . Errorf ( "Pod %q in namespace %q: %v" , name , namespace , err )
2015-02-12 01:03:59 +00:00
}
2015-09-10 03:46:11 +00:00
return kl . containerRuntime . GetContainerLogs ( pod , containerID , logOptions , stdout , stderr )
2014-08-27 19:41:32 +00:00
}
2015-02-09 16:40:42 +00:00
// GetHostname Returns the hostname as the kubelet sees it.
func ( kl * Kubelet ) GetHostname ( ) string {
return kl . hostname
}
2015-03-24 12:35:38 +00:00
// Returns host IP or nil in case of error.
func ( kl * Kubelet ) GetHostIP ( ) ( net . IP , error ) {
2015-09-21 18:00:04 +00:00
node , err := kl . GetNode ( )
if err != nil {
return nil , fmt . Errorf ( "cannot get node: %v" , err )
}
return nodeutil . GetNodeHostIP ( node )
2015-03-24 12:35:38 +00:00
}
2015-03-09 22:46:47 +00:00
// GetPods returns all pods bound to the kubelet and their spec, and the mirror
2015-03-23 19:17:12 +00:00
// pods.
2015-04-03 22:51:50 +00:00
func ( kl * Kubelet ) GetPods ( ) [ ] * api . Pod {
2015-03-21 00:22:02 +00:00
return kl . podManager . GetPods ( )
2014-10-22 23:52:38 +00:00
}
2015-06-23 23:01:12 +00:00
// GetRunningPods returns all pods running on kubelet from looking at the
// container runtime cache. This function converts kubecontainer.Pod to
// api.Pod, so only the fields that exist in both kubecontainer.Pod and
// api.Pod are considered meaningful.
func ( kl * Kubelet ) GetRunningPods ( ) ( [ ] * api . Pod , error ) {
pods , err := kl . runtimeCache . GetPods ( )
if err != nil {
return nil , err
}
apiPods := make ( [ ] * api . Pod , 0 , len ( pods ) )
for _ , pod := range pods {
apiPods = append ( apiPods , pod . ToAPIPod ( ) )
}
return apiPods , nil
}
2015-03-19 23:51:34 +00:00
func ( kl * Kubelet ) GetPodByFullName ( podFullName string ) ( * api . Pod , bool ) {
2015-03-21 00:22:02 +00:00
return kl . podManager . GetPodByFullName ( podFullName )
2015-03-19 23:51:34 +00:00
}
// GetPodByName provides the first pod that matches namespace and name, as well
// as whether the pod was found.
2015-03-13 13:19:07 +00:00
func ( kl * Kubelet ) GetPodByName ( namespace , name string ) ( * api . Pod , bool ) {
2015-03-21 00:22:02 +00:00
return kl . podManager . GetPodByName ( namespace , name )
2015-01-07 15:18:56 +00:00
}
2015-05-05 10:19:54 +00:00
func ( kl * Kubelet ) updateRuntimeUp ( ) {
2015-06-21 20:22:16 +00:00
start := time . Now ( )
2015-05-05 10:19:54 +00:00
err := waitUntilRuntimeIsUp ( kl . containerRuntime , 100 * time . Millisecond )
kl . runtimeMutex . Lock ( )
defer kl . runtimeMutex . Unlock ( )
if err == nil {
kl . lastTimestampRuntimeUp = time . Now ( )
2015-06-21 20:22:16 +00:00
} else {
glog . Errorf ( "Container runtime sanity check failed after %v, err: %v" , time . Since ( start ) , err )
2015-05-05 10:19:54 +00:00
}
}
2015-05-11 21:07:24 +00:00
func ( kl * Kubelet ) reconcileCBR0 ( podCIDR string ) error {
2015-05-08 18:47:33 +00:00
if podCIDR == "" {
glog . V ( 5 ) . Info ( "PodCIDR not set. Will not configure cbr0." )
return nil
}
2015-06-24 18:10:10 +00:00
glog . V ( 5 ) . Infof ( "PodCIDR is set to %q" , podCIDR )
2015-05-08 18:47:33 +00:00
_ , cidr , err := net . ParseCIDR ( podCIDR )
if err != nil {
return err
}
// Set cbr0 interface address to first address in IPNet
cidr . IP . To4 ( ) [ 3 ] += 1
if err := ensureCbr0 ( cidr ) ; err != nil {
return err
}
2015-08-10 22:08:31 +00:00
if kl . shaper == nil {
glog . V ( 5 ) . Info ( "Shaper is nil, creating" )
kl . shaper = bandwidth . NewTCShaper ( "cbr0" )
}
return kl . shaper . ReconcileInterface ( )
2015-05-08 18:47:33 +00:00
}
2015-09-21 18:06:38 +00:00
// updateNodeStatus updates node status to master with retries.
func ( kl * Kubelet ) updateNodeStatus ( ) error {
for i := 0 ; i < nodeStatusUpdateRetry ; i ++ {
if err := kl . tryUpdateNodeStatus ( ) ; err != nil {
glog . Errorf ( "Error updating node status, will retry: %v" , err )
} else {
return nil
}
}
return fmt . Errorf ( "update node status exceeds retry count" )
}
func ( kl * Kubelet ) recordNodeStatusEvent ( event string ) {
glog . V ( 2 ) . Infof ( "Recording %s event message for node %s" , event , kl . nodeName )
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
kl . recorder . Eventf ( kl . nodeRef , event , "Node %s status is now: %s" , kl . nodeName , event )
}
2015-04-09 01:22:44 +00:00
// Maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
var oldNodeUnschedulable bool
2015-06-24 18:10:10 +00:00
func ( kl * Kubelet ) syncNetworkStatus ( ) {
kl . networkConfigMutex . Lock ( )
defer kl . networkConfigMutex . Unlock ( )
networkConfigured := true
if kl . configureCBR0 {
2015-06-24 19:56:36 +00:00
if err := ensureIPTablesMasqRule ( ) ; err != nil {
networkConfigured = false
glog . Errorf ( "Error on adding ip table rules: %v" , err )
}
2015-09-21 18:06:38 +00:00
if len ( kl . podCIDR ) == 0 {
2015-07-08 21:58:14 +00:00
glog . Warningf ( "ConfigureCBR0 requested, but PodCIDR not set. Will not configure CBR0 right now" )
2015-06-24 18:10:10 +00:00
networkConfigured = false
2015-09-21 18:06:38 +00:00
} else if err := kl . reconcileCBR0 ( kl . podCIDR ) ; err != nil {
2015-06-24 18:10:10 +00:00
networkConfigured = false
glog . Errorf ( "Error configuring cbr0: %v" , err )
}
}
kl . networkConfigured = networkConfigured
}
2015-09-21 18:06:38 +00:00
// setNodeStatus fills in the Status fields of the given Node, overwriting
// any fields that are currently set.
2015-10-22 19:14:56 +00:00
// TODO(madhusudancs): Simplify the logic for setting node conditions and
// refactor the node status condtion code out to a different file.
2015-09-21 18:06:38 +00:00
func ( kl * Kubelet ) setNodeStatus ( node * api . Node ) error {
// Set addresses for the node.
if kl . cloud != nil {
instances , ok := kl . cloud . Instances ( )
if ! ok {
return fmt . Errorf ( "failed to get instances from cloud provider" )
}
// TODO(roberthbailey): Can we do this without having credentials to talk
// to the cloud provider?
// TODO(justinsb): We can if CurrentNodeName() was actually CurrentNode() and returned an interface
nodeAddresses , err := instances . NodeAddresses ( kl . nodeName )
if err != nil {
return fmt . Errorf ( "failed to get node address from cloud provider: %v" , err )
}
node . Status . Addresses = nodeAddresses
} else {
addr := net . ParseIP ( kl . hostname )
if addr != nil {
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : addr . String ( ) } ,
{ Type : api . NodeInternalIP , Address : addr . String ( ) } ,
}
} else {
addrs , err := net . LookupIP ( node . Name )
if err != nil {
return fmt . Errorf ( "can't get ip address of node %s: %v" , node . Name , err )
} else if len ( addrs ) == 0 {
return fmt . Errorf ( "no ip address for node %v" , node . Name )
} else {
// check all ip addresses for this node.Name and try to find the first non-loopback IPv4 address.
// If no match is found, it uses the IP of the interface with gateway on it.
for _ , ip := range addrs {
if ip . IsLoopback ( ) {
continue
}
if ip . To4 ( ) != nil {
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : ip . String ( ) } ,
{ Type : api . NodeInternalIP , Address : ip . String ( ) } ,
}
break
}
}
if len ( node . Status . Addresses ) == 0 {
ip , err := util . ChooseHostInterface ( )
if err != nil {
return err
}
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : ip . String ( ) } ,
{ Type : api . NodeInternalIP , Address : ip . String ( ) } ,
}
}
}
}
}
// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
info , err := kl . GetCachedMachineInfo ( )
if err != nil {
// TODO(roberthbailey): This is required for test-cmd.sh to pass.
// See if the test should be updated instead.
node . Status . Capacity = api . ResourceList {
api . ResourceCPU : * resource . NewMilliQuantity ( 0 , resource . DecimalSI ) ,
api . ResourceMemory : resource . MustParse ( "0Gi" ) ,
api . ResourcePods : * resource . NewQuantity ( int64 ( kl . pods ) , resource . DecimalSI ) ,
}
glog . Errorf ( "Error getting machine info: %v" , err )
} else {
node . Status . NodeInfo . MachineID = info . MachineID
node . Status . NodeInfo . SystemUUID = info . SystemUUID
node . Status . Capacity = CapacityFromMachineInfo ( info )
node . Status . Capacity [ api . ResourcePods ] = * resource . NewQuantity (
int64 ( kl . pods ) , resource . DecimalSI )
if node . Status . NodeInfo . BootID != "" &&
node . Status . NodeInfo . BootID != info . BootID {
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
kl . recorder . Eventf ( kl . nodeRef , "Rebooted" ,
"Node %s has been rebooted, boot id: %s" , kl . nodeName , info . BootID )
}
node . Status . NodeInfo . BootID = info . BootID
}
verinfo , err := kl . cadvisor . VersionInfo ( )
if err != nil {
glog . Errorf ( "Error getting version info: %v" , err )
} else {
node . Status . NodeInfo . KernelVersion = verinfo . KernelVersion
node . Status . NodeInfo . OsImage = verinfo . ContainerOsVersion
// TODO: Determine the runtime is docker or rocket
node . Status . NodeInfo . ContainerRuntimeVersion = "docker://" + verinfo . DockerVersion
node . Status . NodeInfo . KubeletVersion = version . Get ( ) . String ( )
// TODO: kube-proxy might be different version from kubelet in the future
node . Status . NodeInfo . KubeProxyVersion = version . Get ( ) . String ( )
}
node . Status . DaemonEndpoints = * kl . daemonEndpoints
// Check whether container runtime can be reported as up.
containerRuntimeUp := kl . containerRuntimeUp ( )
// Check whether network is configured properly
networkConfigured := kl . doneNetworkConfigure ( )
2015-10-21 20:04:10 +00:00
// Check whether runtime version meets the minimal requirements
containerRuntimeVersionRequirementMet := kl . containerRuntimeVersionRequirementMet ( )
2015-09-21 18:06:38 +00:00
2015-09-21 18:13:46 +00:00
currentTime := unversioned . Now ( )
2015-09-21 18:06:38 +00:00
var newNodeReadyCondition api . NodeCondition
var oldNodeReadyConditionStatus api . ConditionStatus
2015-10-21 20:04:10 +00:00
if containerRuntimeUp && networkConfigured && containerRuntimeVersionRequirementMet {
2015-09-21 18:06:38 +00:00
newNodeReadyCondition = api . NodeCondition {
Type : api . NodeReady ,
Status : api . ConditionTrue ,
Reason : "KubeletReady" ,
Message : "kubelet is posting ready status" ,
LastHeartbeatTime : currentTime ,
}
} else {
var messages [ ] string
if ! containerRuntimeUp {
messages = append ( messages , "container runtime is down" )
}
if ! networkConfigured {
2015-10-02 00:41:14 +00:00
messages = append ( messages , "network not configured correctly" )
2015-09-21 18:06:38 +00:00
}
2015-10-21 20:04:10 +00:00
if ! containerRuntimeVersionRequirementMet {
messages = append ( messages , fmt . Sprintf ( "container runtime version is older than %s" , dockertools . MinimumDockerAPIVersion ) )
}
2015-09-21 18:06:38 +00:00
newNodeReadyCondition = api . NodeCondition {
Type : api . NodeReady ,
Status : api . ConditionFalse ,
Reason : "KubeletNotReady" ,
Message : strings . Join ( messages , "," ) ,
LastHeartbeatTime : currentTime ,
}
}
updated := false
for i := range node . Status . Conditions {
if node . Status . Conditions [ i ] . Type == api . NodeReady {
oldNodeReadyConditionStatus = node . Status . Conditions [ i ] . Status
if oldNodeReadyConditionStatus == newNodeReadyCondition . Status {
newNodeReadyCondition . LastTransitionTime = node . Status . Conditions [ i ] . LastTransitionTime
} else {
newNodeReadyCondition . LastTransitionTime = currentTime
}
node . Status . Conditions [ i ] = newNodeReadyCondition
updated = true
}
}
if ! updated {
newNodeReadyCondition . LastTransitionTime = currentTime
node . Status . Conditions = append ( node . Status . Conditions , newNodeReadyCondition )
}
if ! updated || oldNodeReadyConditionStatus != newNodeReadyCondition . Status {
if newNodeReadyCondition . Status == api . ConditionTrue {
kl . recordNodeStatusEvent ( "NodeReady" )
} else {
kl . recordNodeStatusEvent ( "NodeNotReady" )
}
}
2015-10-22 19:14:56 +00:00
var nodeOODCondition * api . NodeCondition
// Check if NodeOutOfDisk condition already exists and if it does, just pick it up for update.
for i := range node . Status . Conditions {
if node . Status . Conditions [ i ] . Type == api . NodeOutOfDisk {
nodeOODCondition = & node . Status . Conditions [ i ]
}
}
newOODCondition := false
// If the NodeOutOfDisk condition doesn't exist, create one.
if nodeOODCondition == nil {
nodeOODCondition = & api . NodeCondition {
Type : api . NodeOutOfDisk ,
Status : api . ConditionUnknown ,
LastTransitionTime : currentTime ,
}
// nodeOODCondition cannot be appended to node.Status.Conditions here because it gets
// copied to the slice. So if we append nodeOODCondition to the slice here none of the
// updates we make to nodeOODCondition below are reflected in the slice.
newOODCondition = true
}
// Update the heartbeat time irrespective of all the conditions.
nodeOODCondition . LastHeartbeatTime = currentTime
// Note: The conditions below take care of the case when a new NodeOutOfDisk condition is
// created and as well as the case when the condition already exists. When a new condition
// is created its status is set to api.ConditionUnknown which matches either
// nodeOODCondition.Status != api.ConditionTrue or
// nodeOODCondition.Status != api.ConditionFalse in the conditions below depending on whether
// the kubelet is out of disk or not.
if kl . isOutOfDisk ( ) {
if nodeOODCondition . Status != api . ConditionTrue {
nodeOODCondition . Status = api . ConditionTrue
nodeOODCondition . Reason = "KubeletOutOfDisk"
nodeOODCondition . Message = "out of disk space"
nodeOODCondition . LastTransitionTime = currentTime
kl . recordNodeStatusEvent ( "NodeOutOfDisk" )
}
} else {
if nodeOODCondition . Status != api . ConditionFalse {
nodeOODCondition . Status = api . ConditionFalse
nodeOODCondition . Reason = "KubeletHasSufficientDisk"
nodeOODCondition . Message = "kubelet has sufficient disk space available"
nodeOODCondition . LastTransitionTime = currentTime
kl . recordNodeStatusEvent ( "NodeHasSufficientDisk" )
}
}
if newOODCondition {
node . Status . Conditions = append ( node . Status . Conditions , * nodeOODCondition )
}
2015-09-21 18:06:38 +00:00
if oldNodeUnschedulable != node . Spec . Unschedulable {
if node . Spec . Unschedulable {
kl . recordNodeStatusEvent ( "NodeNotSchedulable" )
} else {
kl . recordNodeStatusEvent ( "NodeSchedulable" )
}
oldNodeUnschedulable = node . Spec . Unschedulable
}
return nil
}
func ( kl * Kubelet ) containerRuntimeUp ( ) bool {
2015-06-18 05:34:11 +00:00
kl . runtimeMutex . Lock ( )
defer kl . runtimeMutex . Unlock ( )
return kl . lastTimestampRuntimeUp . Add ( kl . runtimeUpThreshold ) . After ( time . Now ( ) )
}
2015-09-21 18:06:38 +00:00
func ( kl * Kubelet ) doneNetworkConfigure ( ) bool {
2015-06-24 18:10:10 +00:00
kl . networkConfigMutex . Lock ( )
defer kl . networkConfigMutex . Unlock ( )
return kl . networkConfigured
}
2015-10-21 20:04:10 +00:00
func ( kl * Kubelet ) containerRuntimeVersionRequirementMet ( ) bool {
switch kl . GetRuntime ( ) . Type ( ) {
case "docker" :
version , err := kl . GetContainerRuntimeVersion ( )
if err != nil {
return true
}
// Verify the docker version.
result , err := version . Compare ( dockertools . MinimumDockerAPIVersion )
if err != nil {
glog . Errorf ( "Cannot compare current docker version %v with minimum support Docker version %q" , version , dockertools . MinimumDockerAPIVersion )
return false
}
return ( result >= 0 )
case "rkt" :
// TODO(dawnchen): Rkt support here
return true
default :
glog . Errorf ( "unsupported container runtime %s specified" , kl . GetRuntime ( ) . Type ( ) )
return true
}
}
2015-09-21 18:06:38 +00:00
// tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0
// is set, this function will also confirm that cbr0 is configured correctly.
func ( kl * Kubelet ) tryUpdateNodeStatus ( ) error {
node , err := kl . kubeClient . Nodes ( ) . Get ( kl . nodeName )
if err != nil {
return fmt . Errorf ( "error getting node %q: %v" , kl . nodeName , err )
}
if node == nil {
return fmt . Errorf ( "no node instance returned for %q" , kl . nodeName )
}
kl . networkConfigMutex . Lock ( )
2015-09-16 04:53:33 +00:00
if kl . reconcileCIDR {
kl . podCIDR = node . Spec . PodCIDR
}
2015-09-21 18:06:38 +00:00
kl . networkConfigMutex . Unlock ( )
if err := kl . setNodeStatus ( node ) ; err != nil {
return err
}
// Update the current status on the API server
_ , err = kl . kubeClient . Nodes ( ) . UpdateStatus ( node )
return err
}
2015-06-12 11:11:53 +00:00
// GetPhase returns the phase of a pod given its container info.
// This func is exported to simplify integration with 3rd party kubelet
// integrations like kubernetes-mesos.
func GetPhase ( spec * api . PodSpec , info [ ] api . ContainerStatus ) api . PodPhase {
2015-01-28 17:56:35 +00:00
running := 0
waiting := 0
stopped := 0
failed := 0
succeeded := 0
unknown := 0
for _ , container := range spec . Containers {
2015-03-25 11:09:35 +00:00
if containerStatus , ok := api . GetContainerStatus ( info , container . Name ) ; ok {
2015-01-28 17:56:35 +00:00
if containerStatus . State . Running != nil {
running ++
2015-05-27 22:02:11 +00:00
} else if containerStatus . State . Terminated != nil {
2015-01-28 17:56:35 +00:00
stopped ++
2015-05-27 22:02:11 +00:00
if containerStatus . State . Terminated . ExitCode == 0 {
2015-01-28 17:56:35 +00:00
succeeded ++
} else {
failed ++
}
} else if containerStatus . State . Waiting != nil {
2015-10-16 20:02:00 +00:00
if containerStatus . LastTerminationState . Terminated != nil {
stopped ++
} else {
waiting ++
}
2015-01-28 17:56:35 +00:00
} else {
unknown ++
}
} else {
unknown ++
}
}
switch {
case waiting > 0 :
2015-02-09 21:55:36 +00:00
glog . V ( 5 ) . Infof ( "pod waiting > 0, pending" )
2015-01-28 17:56:35 +00:00
// One or more containers has not been started
return api . PodPending
case running > 0 && unknown == 0 :
// All containers have been started, and at least
// one container is running
return api . PodRunning
case running == 0 && stopped > 0 && unknown == 0 :
// All containers are terminated
2015-03-14 01:38:07 +00:00
if spec . RestartPolicy == api . RestartPolicyAlways {
2015-01-28 17:56:35 +00:00
// All containers are in the process of restarting
return api . PodRunning
}
if stopped == succeeded {
// RestartPolicy is not Always, and all
// containers are terminated in success
return api . PodSucceeded
}
2015-03-14 01:38:07 +00:00
if spec . RestartPolicy == api . RestartPolicyNever {
2015-01-28 17:56:35 +00:00
// RestartPolicy is Never, and all containers are
// terminated with at least one in failure
return api . PodFailed
}
// RestartPolicy is OnFailure, and at least one in failure
// and in the process of restarting
return api . PodRunning
default :
2015-02-09 21:55:36 +00:00
glog . V ( 5 ) . Infof ( "pod default case, pending" )
2015-01-28 17:56:35 +00:00
return api . PodPending
}
}
2015-09-29 20:04:08 +00:00
func readyPodCondition ( isPodReady bool , reason , message string ) [ ] api . PodCondition {
condition := api . PodCondition {
Type : api . PodReady ,
}
if isPodReady {
condition . Status = api . ConditionTrue
} else {
condition . Status = api . ConditionFalse
}
condition . Reason = reason
condition . Message = message
return [ ] api . PodCondition { condition }
}
2015-08-19 03:09:36 +00:00
// getPodReadyCondition returns ready condition if all containers in a pod are ready, else it returns an unready condition.
2015-09-29 20:04:08 +00:00
func getPodReadyCondition ( spec * api . PodSpec , containerStatuses [ ] api . ContainerStatus ) [ ] api . PodCondition {
// Find if all containers are ready or not.
2015-08-19 03:09:36 +00:00
if containerStatuses == nil {
2015-09-29 20:04:08 +00:00
return readyPodCondition ( false , "UnknownContainerStatuses" , "" )
2015-08-19 03:09:36 +00:00
}
2015-09-29 20:04:08 +00:00
unknownContainers := [ ] string { }
unreadyContainers := [ ] string { }
2015-02-02 18:51:52 +00:00
for _ , container := range spec . Containers {
2015-08-19 03:09:36 +00:00
if containerStatus , ok := api . GetContainerStatus ( containerStatuses , container . Name ) ; ok {
2015-02-02 18:51:52 +00:00
if ! containerStatus . Ready {
2015-09-29 20:04:08 +00:00
unreadyContainers = append ( unreadyContainers , container . Name )
2015-02-02 18:51:52 +00:00
}
} else {
2015-09-29 20:04:08 +00:00
unknownContainers = append ( unknownContainers , container . Name )
2015-02-02 18:51:52 +00:00
}
}
2015-09-29 20:04:08 +00:00
unreadyMessages := [ ] string { }
if len ( unknownContainers ) > 0 {
unreadyMessages = append ( unreadyMessages , fmt . Sprintf ( "containers with unknown status: %s" , unknownContainers ) )
}
if len ( unreadyContainers ) > 0 {
unreadyMessages = append ( unreadyMessages , fmt . Sprintf ( "containers with unready status: %s" , unreadyContainers ) )
}
unreadyMessage := strings . Join ( unreadyMessages , ", " )
if unreadyMessage != "" {
// return unready status.
2015-11-02 13:07:07 +00:00
return readyPodCondition ( false , "ContainersNotReady" , unreadyMessage )
2015-09-29 20:04:08 +00:00
}
// return ready status.
return readyPodCondition ( true , "" , "" )
2015-02-02 18:51:52 +00:00
}
2015-03-19 23:51:34 +00:00
// By passing the pod directly, this method avoids pod lookup, which requires
// grabbing a lock.
2015-04-08 18:53:31 +00:00
func ( kl * Kubelet ) generatePodStatus ( pod * api . Pod ) ( api . PodStatus , error ) {
2015-06-09 21:01:23 +00:00
start := time . Now ( )
defer func ( ) {
metrics . PodStatusLatency . Observe ( metrics . SinceInMicroseconds ( start ) )
} ( )
2015-03-23 17:14:30 +00:00
podFullName := kubecontainer . GetPodFullName ( pod )
2015-03-19 23:51:34 +00:00
glog . V ( 3 ) . Infof ( "Generating status for %q" , podFullName )
2015-01-14 02:11:24 +00:00
2015-05-16 00:01:56 +00:00
// TODO: Consider include the container information.
if kl . pastActiveDeadline ( pod ) {
2015-06-09 15:58:16 +00:00
reason := "DeadlineExceeded"
kl . recorder . Eventf ( pod , reason , "Pod was active on the node longer than specified deadline" )
2015-05-16 00:01:56 +00:00
return api . PodStatus {
Phase : api . PodFailed ,
2015-06-09 15:58:16 +00:00
Reason : reason ,
2015-05-16 00:01:56 +00:00
Message : "Pod was active on the node longer than specified deadline" } , nil
}
2015-05-09 05:01:43 +00:00
2015-05-16 00:01:56 +00:00
spec := & pod . Spec
2015-05-01 22:25:11 +00:00
podStatus , err := kl . containerRuntime . GetPodStatus ( pod )
2015-01-14 02:11:24 +00:00
2015-02-09 21:55:36 +00:00
if err != nil {
2015-02-09 21:55:36 +00:00
// Error handling
2015-05-11 17:41:52 +00:00
glog . Infof ( "Query container info for pod %q failed with error (%v)" , podFullName , err )
2015-02-09 21:55:36 +00:00
if strings . Contains ( err . Error ( ) , "resource temporarily unavailable" ) {
// Leave upstream layer to decide what to do
2015-03-18 16:44:50 +00:00
return api . PodStatus { } , err
2015-02-09 21:55:36 +00:00
}
2015-06-09 15:58:16 +00:00
pendingStatus := api . PodStatus {
Phase : api . PodPending ,
Reason : "GeneralError" ,
Message : fmt . Sprintf ( "Query container info failed with error (%v)" , err ) ,
}
return pendingStatus , nil
2015-02-09 21:55:36 +00:00
}
2015-02-09 21:55:36 +00:00
// Assume info is ready to process
2015-06-12 11:11:53 +00:00
podStatus . Phase = GetPhase ( spec , podStatus . ContainerStatuses )
2015-08-25 17:39:41 +00:00
kl . probeManager . UpdatePodStatus ( pod . UID , podStatus )
2015-09-29 20:04:08 +00:00
podStatus . Conditions = append ( podStatus . Conditions , getPodReadyCondition ( spec , podStatus . ContainerStatuses ) ... )
2015-04-08 18:53:31 +00:00
2015-06-12 17:20:26 +00:00
if ! kl . standaloneMode {
2015-09-21 18:00:04 +00:00
hostIP , err := kl . GetHostIP ( )
2015-06-12 17:20:26 +00:00
if err != nil {
glog . V ( 4 ) . Infof ( "Cannot get host IP: %v" , err )
} else {
podStatus . HostIP = hostIP . String ( )
2015-09-14 21:56:51 +00:00
if podUsesHostNetwork ( pod ) && podStatus . PodIP == "" {
2015-06-18 18:30:59 +00:00
podStatus . PodIP = hostIP . String ( )
}
2015-06-12 17:20:26 +00:00
}
2015-03-24 12:35:38 +00:00
}
2015-01-28 17:56:35 +00:00
2015-03-18 16:44:50 +00:00
return * podStatus , nil
2014-07-15 17:26:56 +00:00
}
2014-07-15 07:04:30 +00:00
// Returns logs of current machine.
func ( kl * Kubelet ) ServeLogs ( w http . ResponseWriter , req * http . Request ) {
// TODO: whitelist logs we are willing to serve
2014-07-22 21:40:59 +00:00
kl . logServer . ServeHTTP ( w , req )
2014-07-15 07:04:30 +00:00
}
2014-08-07 18:15:11 +00:00
2015-04-06 23:58:34 +00:00
// findContainer finds and returns the container with the given pod ID, full name, and container name.
// It returns nil if not found.
func ( kl * Kubelet ) findContainer ( podFullName string , podUID types . UID , containerName string ) ( * kubecontainer . Container , error ) {
2015-05-01 22:25:11 +00:00
pods , err := kl . containerRuntime . GetPods ( false )
2015-04-06 23:58:34 +00:00
if err != nil {
return nil , err
}
pod := kubecontainer . Pods ( pods ) . FindPod ( podFullName , podUID )
return pod . FindContainerByName ( containerName ) , nil
}
2014-08-07 18:15:11 +00:00
// Run a command in a container, returns the combined stdout, stderr as an array of bytes
2015-04-06 23:58:34 +00:00
func ( kl * Kubelet ) RunInContainer ( podFullName string , podUID types . UID , containerName string , cmd [ ] string ) ( [ ] byte , error ) {
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-04-06 23:58:34 +00:00
container , err := kl . findContainer ( podFullName , podUID , containerName )
2014-08-07 18:15:11 +00:00
if err != nil {
return nil , err
}
2015-04-06 23:58:34 +00:00
if container == nil {
return nil , fmt . Errorf ( "container not found (%q)" , containerName )
2014-08-07 18:15:11 +00:00
}
2015-10-07 17:58:05 +00:00
return kl . runner . RunInContainer ( container . ID , cmd )
2014-08-07 18:15:11 +00:00
}
2014-11-10 21:13:57 +00:00
2015-01-08 20:41:38 +00:00
// ExecInContainer executes a command in a container, connecting the supplied
// stdin/stdout/stderr to the command's IO streams.
2015-04-06 23:58:34 +00:00
func ( kl * Kubelet ) ExecInContainer ( podFullName string , podUID types . UID , containerName string , cmd [ ] string , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool ) error {
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-04-06 23:58:34 +00:00
container , err := kl . findContainer ( podFullName , podUID , containerName )
2015-01-08 20:41:38 +00:00
if err != nil {
return err
}
2015-04-06 23:58:34 +00:00
if container == nil {
return fmt . Errorf ( "container not found (%q)" , containerName )
2015-01-08 20:41:38 +00:00
}
2015-10-07 17:58:05 +00:00
return kl . runner . ExecInContainer ( container . ID , cmd , stdin , stdout , stderr , tty )
2015-01-08 20:41:38 +00:00
}
2015-07-28 04:48:55 +00:00
func ( kl * Kubelet ) AttachContainer ( podFullName string , podUID types . UID , containerName string , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool ) error {
podUID = kl . podManager . TranslatePodUID ( podUID )
container , err := kl . findContainer ( podFullName , podUID , containerName )
if err != nil {
return err
}
if container == nil {
return fmt . Errorf ( "container not found (%q)" , containerName )
}
2015-10-07 17:58:05 +00:00
return kl . containerRuntime . AttachContainer ( container . ID , stdin , stdout , stderr , tty )
2015-07-28 04:48:55 +00:00
}
2015-01-08 20:41:38 +00:00
// PortForward connects to the pod's port and copies data between the port
// and the stream.
2015-04-06 23:58:34 +00:00
func ( kl * Kubelet ) PortForward ( podFullName string , podUID types . UID , port uint16 , stream io . ReadWriteCloser ) error {
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-05-01 22:25:11 +00:00
pods , err := kl . containerRuntime . GetPods ( false )
2015-01-08 20:41:38 +00:00
if err != nil {
return err
}
2015-04-06 23:58:34 +00:00
pod := kubecontainer . Pods ( pods ) . FindPod ( podFullName , podUID )
2015-06-05 21:10:45 +00:00
if pod . IsEmpty ( ) {
return fmt . Errorf ( "pod not found (%q)" , podFullName )
}
2015-04-06 23:58:34 +00:00
return kl . runner . PortForward ( & pod , port , stream )
2015-01-08 20:41:38 +00:00
}
2015-03-30 13:20:20 +00:00
// BirthCry sends an event that the kubelet has started up.
func ( kl * Kubelet ) BirthCry ( ) {
// Make an event that kubelet restarted.
2015-08-11 07:25:10 +00:00
kl . recorder . Eventf ( kl . nodeRef , "Starting" , "Starting kubelet." )
2014-11-10 21:13:57 +00:00
}
2015-01-08 20:41:38 +00:00
func ( kl * Kubelet ) StreamingConnectionIdleTimeout ( ) time . Duration {
return kl . streamingConnectionIdleTimeout
}
2015-03-06 07:56:30 +00:00
2015-06-17 22:31:46 +00:00
func ( kl * Kubelet ) ResyncInterval ( ) time . Duration {
return kl . resyncInterval
}
2015-03-06 07:56:30 +00:00
// GetContainerInfo returns stats (from Cadvisor) for a container.
2015-10-16 03:00:28 +00:00
func ( kl * Kubelet ) GetContainerInfo ( podFullName string , podUID types . UID , containerName string , req * cadvisorapi . ContainerInfoRequest ) ( * cadvisorapi . ContainerInfo , error ) {
2015-03-20 20:55:26 +00:00
2015-04-30 19:15:23 +00:00
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-06-15 19:33:59 +00:00
pods , err := kl . runtimeCache . GetPods ( )
2015-03-06 07:56:30 +00:00
if err != nil {
return nil , err
}
2015-06-15 19:33:59 +00:00
pod := kubecontainer . Pods ( pods ) . FindPod ( podFullName , podUID )
container := pod . FindContainerByName ( containerName )
2015-04-30 19:15:23 +00:00
if container == nil {
2015-03-06 07:56:30 +00:00
return nil , ErrContainerNotFound
}
2015-10-07 17:58:05 +00:00
ci , err := kl . cadvisor . DockerContainer ( container . ID . ID , req )
2015-03-06 07:56:30 +00:00
if err != nil {
return nil , err
}
return & ci , nil
}
2015-04-23 17:14:08 +00:00
// Returns stats (from Cadvisor) for a non-Kubernetes container.
2015-10-16 03:00:28 +00:00
func ( kl * Kubelet ) GetRawContainerInfo ( containerName string , req * cadvisorapi . ContainerInfoRequest , subcontainers bool ) ( map [ string ] * cadvisorapi . ContainerInfo , error ) {
2015-04-23 17:14:08 +00:00
if subcontainers {
return kl . cadvisor . SubcontainerInfo ( containerName , req )
} else {
containerInfo , err := kl . cadvisor . ContainerInfo ( containerName , req )
if err != nil {
return nil , err
}
2015-10-16 03:00:28 +00:00
return map [ string ] * cadvisorapi . ContainerInfo {
2015-04-23 17:14:08 +00:00
containerInfo . Name : containerInfo ,
} , nil
}
2015-03-06 07:56:30 +00:00
}
2015-09-21 18:06:38 +00:00
// GetCachedMachineInfo assumes that the machine info can't change without a reboot
2015-10-16 03:00:28 +00:00
func ( kl * Kubelet ) GetCachedMachineInfo ( ) ( * cadvisorapi . MachineInfo , error ) {
2015-03-16 12:50:00 +00:00
if kl . machineInfo == nil {
info , err := kl . cadvisor . MachineInfo ( )
if err != nil {
return nil , err
}
kl . machineInfo = info
}
return kl . machineInfo , nil
2015-03-06 07:56:30 +00:00
}
2015-03-26 12:31:54 +00:00
2015-09-29 03:32:20 +00:00
func ( kl * Kubelet ) ListenAndServe ( address net . IP , port uint , tlsOptions * TLSOptions , auth AuthInterface , enableDebuggingHandlers bool ) {
ListenAndServeKubeletServer ( kl , address , port , tlsOptions , auth , enableDebuggingHandlers )
2015-03-26 12:31:54 +00:00
}
func ( kl * Kubelet ) ListenAndServeReadOnly ( address net . IP , port uint ) {
ListenAndServeKubeletReadOnlyServer ( kl , address , port )
}
2015-06-09 13:27:34 +00:00
// GetRuntime returns the current Runtime implementation in use by the kubelet. This func
// is exported to simplify integration with third party kubelet extensions (e.g. kubernetes-mesos).
func ( kl * Kubelet ) GetRuntime ( ) kubecontainer . Runtime {
return kl . containerRuntime
}
2015-08-10 22:08:31 +00:00
var minRsrc = resource . MustParse ( "1k" )
var maxRsrc = resource . MustParse ( "1P" )
func validateBandwidthIsReasonable ( rsrc * resource . Quantity ) error {
if rsrc . Value ( ) < minRsrc . Value ( ) {
return fmt . Errorf ( "resource is unreasonably small (< 1kbit)" )
}
if rsrc . Value ( ) > maxRsrc . Value ( ) {
return fmt . Errorf ( "resoruce is unreasonably large (> 1Pbit)" )
}
return nil
}
func extractBandwidthResources ( pod * api . Pod ) ( ingress , egress * resource . Quantity , err error ) {
2015-10-09 23:57:59 +00:00
str , found := pod . Annotations [ "net.alpha.kubernetes.io/ingress-bandwidth" ]
2015-08-10 22:08:31 +00:00
if found {
if ingress , err = resource . ParseQuantity ( str ) ; err != nil {
return nil , nil , err
}
if err := validateBandwidthIsReasonable ( ingress ) ; err != nil {
return nil , nil , err
}
}
2015-10-09 23:57:59 +00:00
str , found = pod . Annotations [ "net.alpha.kubernetes.io/egress-bandwidth" ]
2015-08-10 22:08:31 +00:00
if found {
if egress , err = resource . ParseQuantity ( str ) ; err != nil {
return nil , nil , err
}
if err := validateBandwidthIsReasonable ( egress ) ; err != nil {
return nil , nil , err
}
}
return ingress , egress , nil
}