2014-06-06 23:40:48 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2015 The Kubernetes Authors .
2014-06-06 23:40:48 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubelet
import (
2015-10-21 17:17:27 +00:00
"bytes"
2014-06-06 23:40:48 +00:00
"fmt"
2014-09-24 21:27:10 +00:00
"io"
2014-11-12 05:21:40 +00:00
"io/ioutil"
2016-05-18 15:18:10 +00:00
"math"
2014-11-12 05:21:40 +00:00
"net"
2014-06-06 23:40:48 +00:00
"net/http"
2014-11-07 06:41:16 +00:00
"os"
2014-07-29 17:20:50 +00:00
"path"
2015-10-07 19:19:06 +00:00
"path/filepath"
2016-05-10 17:38:57 +00:00
goRuntime "runtime"
2014-10-28 00:29:55 +00:00
"sort"
2014-06-06 23:40:48 +00:00
"strings"
2015-05-05 10:19:54 +00:00
"sync"
2016-05-04 04:24:56 +00:00
"sync/atomic"
2014-06-06 23:40:48 +00:00
"time"
2015-08-05 22:05:17 +00:00
"github.com/golang/glog"
2015-10-16 03:00:28 +00:00
cadvisorapi "github.com/google/cadvisor/info/v1"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2015-09-21 18:06:38 +00:00
apierrors "k8s.io/kubernetes/pkg/api/errors"
2016-02-02 18:59:54 +00:00
utilpod "k8s.io/kubernetes/pkg/api/pod"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api/resource"
2015-09-17 22:21:55 +00:00
"k8s.io/kubernetes/pkg/api/unversioned"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api/validation"
2016-02-16 18:23:58 +00:00
"k8s.io/kubernetes/pkg/apis/componentconfig"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/cache"
2016-02-05 21:58:03 +00:00
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/record"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/fieldpath"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
2015-10-10 00:09:53 +00:00
"k8s.io/kubernetes/pkg/kubelet/cm"
2016-04-26 17:58:12 +00:00
"k8s.io/kubernetes/pkg/kubelet/config"
2015-08-05 22:03:47 +00:00
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/envvars"
2016-04-25 19:48:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/eviction"
2016-04-15 18:17:17 +00:00
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/network"
2015-08-07 21:42:21 +00:00
"k8s.io/kubernetes/pkg/kubelet/pleg"
2015-10-12 23:28:23 +00:00
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
2015-08-25 17:39:41 +00:00
"k8s.io/kubernetes/pkg/kubelet/prober"
2015-10-19 22:15:59 +00:00
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/rkt"
2015-12-10 20:14:26 +00:00
"k8s.io/kubernetes/pkg/kubelet/server"
2016-01-14 19:19:26 +00:00
"k8s.io/kubernetes/pkg/kubelet/server/stats"
2015-09-11 19:22:01 +00:00
"k8s.io/kubernetes/pkg/kubelet/status"
2015-10-09 17:24:31 +00:00
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
2015-11-20 17:54:37 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/format"
2016-04-28 09:33:15 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
2015-09-02 17:18:11 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/queue"
2016-07-02 01:50:25 +00:00
"k8s.io/kubernetes/pkg/kubelet/volumemanager"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/runtime"
2015-10-07 19:19:06 +00:00
"k8s.io/kubernetes/pkg/securitycontext"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util"
2015-08-10 22:08:31 +00:00
"k8s.io/kubernetes/pkg/util/bandwidth"
2015-10-14 05:18:37 +00:00
utilerrors "k8s.io/kubernetes/pkg/util/errors"
2016-04-15 01:01:40 +00:00
utilexec "k8s.io/kubernetes/pkg/util/exec"
2016-03-09 02:58:24 +00:00
"k8s.io/kubernetes/pkg/util/flowcontrol"
2015-09-14 09:51:40 +00:00
kubeio "k8s.io/kubernetes/pkg/util/io"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/util/mount"
2016-01-06 15:56:41 +00:00
utilnet "k8s.io/kubernetes/pkg/util/net"
2015-08-04 00:28:33 +00:00
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/procfs"
2016-01-15 07:32:10 +00:00
utilruntime "k8s.io/kubernetes/pkg/util/runtime"
2015-10-07 19:19:06 +00:00
"k8s.io/kubernetes/pkg/util/selinux"
2015-09-09 17:45:01 +00:00
"k8s.io/kubernetes/pkg/util/sets"
2016-02-02 18:59:54 +00:00
utilvalidation "k8s.io/kubernetes/pkg/util/validation"
2015-11-06 23:30:52 +00:00
"k8s.io/kubernetes/pkg/util/validation/field"
2016-02-02 10:57:06 +00:00
"k8s.io/kubernetes/pkg/util/wait"
2015-09-21 18:06:38 +00:00
"k8s.io/kubernetes/pkg/version"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/volume"
2016-05-30 22:48:21 +00:00
"k8s.io/kubernetes/pkg/volume/util/volumehelper"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/watch"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
2016-01-06 01:10:59 +00:00
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
2016-06-29 05:48:59 +00:00
"k8s.io/kubernetes/third_party/forked/golang/expansion"
2014-06-06 23:40:48 +00:00
)
2015-03-06 00:37:08 +00:00
const (
2015-05-04 20:14:55 +00:00
// Max amount of time to wait for the container runtime to come up.
maxWaitForContainerRuntime = 5 * time . Minute
2015-02-23 21:04:45 +00:00
2015-09-21 18:06:38 +00:00
// nodeStatusUpdateRetry specifies how many times kubelet retries when posting node status failed.
nodeStatusUpdateRetry = 5
2015-04-27 20:03:55 +00:00
// Location of container logs.
containerLogsDir = "/var/log/containers"
2015-08-13 12:59:15 +00:00
2015-10-15 04:58:20 +00:00
// max backoff period, exported for the e2e test
MaxContainerBackOff = 300 * time . Second
2015-08-19 00:52:26 +00:00
// Capacity of the channel for storing pods to kill. A small number should
// suffice because a goroutine is dedicated to check the channel and does
// not block on anything else.
podKillingChannelCapacity = 50
2015-07-28 18:54:32 +00:00
2015-11-03 18:03:39 +00:00
// Period for performing global cleanup tasks.
housekeepingPeriod = time . Second * 2
2015-10-21 17:17:27 +00:00
2016-05-13 03:35:18 +00:00
// Period for performing eviction monitoring.
// TODO ensure this is in sync with internal cadvisor housekeeping.
evictionMonitoringPeriod = time . Second * 10
2016-04-28 04:26:36 +00:00
// The path in containers' filesystems where the hosts file is mounted.
2015-10-21 17:17:27 +00:00
etcHostsPath = "/etc/hosts"
2015-08-07 21:42:21 +00:00
2016-02-12 19:33:32 +00:00
// Capacity of the channel for receiving pod lifecycle events. This number
2015-08-07 21:42:21 +00:00
// is a bit arbitrary and may be adjusted in the future.
plegChannelCapacity = 1000
2015-11-09 18:01:53 +00:00
// Generic PLEG relies on relisting for discovering container events.
2016-01-20 02:15:10 +00:00
// A longer period means that kubelet will take longer to detect container
// changes and to update pod status. On the other hand, a shorter period
// will cause more frequent relisting (e.g., container runtime operations),
// leading to higher cpu usage.
// Note that even though we set the period to 1s, the relisting itself can
// take more than 1s to finish if the container runtime responds slowly
// and/or when there are many container changes in one cycle.
plegRelistPeriod = time . Second * 1
2015-11-09 18:01:53 +00:00
2016-04-28 04:26:36 +00:00
// backOffPeriod is the period to back off when pod syncing results in an
2015-11-09 18:01:53 +00:00
// error. It is also used as the base period for the exponential backoff
// container restarts and image pulls.
backOffPeriod = time . Second * 10
2016-03-02 21:29:53 +00:00
// Period for performing container garbage collection.
ContainerGCPeriod = time . Minute
// Period for performing image garbage collection.
ImageGCPeriod = 5 * time . Minute
2016-04-29 19:29:59 +00:00
2016-05-08 16:41:23 +00:00
// maxImagesInStatus is the number of max images we store in image status.
maxImagesInNodeStatus = 50
2015-03-06 00:37:08 +00:00
)
2015-02-20 03:17:44 +00:00
2014-07-15 20:24:41 +00:00
// SyncHandler is an interface implemented by Kubelet, for testability
type SyncHandler interface {
2015-08-19 00:52:26 +00:00
HandlePodAdditions ( pods [ ] * api . Pod )
HandlePodUpdates ( pods [ ] * api . Pod )
2016-06-14 09:29:18 +00:00
HandlePodRemoves ( pods [ ] * api . Pod )
2015-12-09 03:13:09 +00:00
HandlePodReconcile ( pods [ ] * api . Pod )
2015-08-19 00:52:26 +00:00
HandlePodSyncs ( pods [ ] * api . Pod )
HandlePodCleanups ( ) error
2014-07-15 20:24:41 +00:00
}
2016-02-19 01:54:48 +00:00
// Option is a functional option type for Kubelet
type Option func ( * Kubelet )
2016-04-26 17:58:12 +00:00
// NewMainKubelet instantiates a new Kubelet object along with all the required internal modules.
2015-11-07 01:03:39 +00:00
// No initialization of Kubelet and its modules should happen here.
2014-07-22 21:40:59 +00:00
func NewMainKubelet (
2015-01-07 02:31:40 +00:00
hostname string ,
2015-06-12 15:40:34 +00:00
nodeName string ,
2015-01-07 02:31:40 +00:00
dockerClient dockertools . DockerInterface ,
2016-02-01 22:30:47 +00:00
kubeClient clientset . Interface ,
2015-01-07 02:31:40 +00:00
rootDirectory string ,
2016-05-08 22:26:37 +00:00
seccompProfileRoot string ,
2015-01-21 00:59:26 +00:00
podInfraContainerImage string ,
2015-01-07 02:31:40 +00:00
resyncInterval time . Duration ,
2014-09-26 04:24:44 +00:00
pullQPS float32 ,
2014-10-28 00:29:55 +00:00
pullBurst int ,
2015-09-09 08:57:21 +00:00
eventQPS float32 ,
eventBurst int ,
2015-10-05 22:35:32 +00:00
containerGCPolicy kubecontainer . ContainerGCPolicy ,
2016-04-26 17:58:12 +00:00
sourcesReadyFn config . SourcesReadyFn ,
2015-05-20 21:21:03 +00:00
registerNode bool ,
2015-09-16 04:53:33 +00:00
registerSchedulable bool ,
2015-06-12 17:20:26 +00:00
standaloneMode bool ,
2014-11-12 05:21:40 +00:00
clusterDomain string ,
2015-01-08 15:25:14 +00:00
clusterDNS net . IP ,
2014-11-23 15:47:25 +00:00
masterServiceNamespace string ,
2015-03-19 05:18:31 +00:00
volumePlugins [ ] volume . VolumePlugin ,
2015-03-19 23:14:13 +00:00
networkPlugins [ ] network . NetworkPlugin ,
networkPluginName string ,
2015-03-03 06:06:20 +00:00
streamingConnectionIdleTimeout time . Duration ,
2015-03-06 07:56:30 +00:00
recorder record . EventRecorder ,
2015-02-23 21:04:45 +00:00
cadvisorInterface cadvisor . Interface ,
2015-03-23 22:31:13 +00:00
imageGCPolicy ImageGCPolicy ,
2015-05-12 08:24:08 +00:00
diskSpacePolicy DiskSpacePolicy ,
2015-03-31 11:17:12 +00:00
cloud cloudprovider . Interface ,
2016-06-29 22:30:20 +00:00
autoDetectCloudProvider bool ,
2016-01-13 23:56:51 +00:00
nodeLabels map [ string ] string ,
2015-04-14 00:30:57 +00:00
nodeStatusUpdateFrequency time . Duration ,
2015-04-24 00:07:52 +00:00
osInterface kubecontainer . OSInterface ,
2015-05-01 21:24:07 +00:00
cgroupRoot string ,
2015-05-04 14:43:10 +00:00
containerRuntime string ,
2016-06-17 21:28:30 +00:00
runtimeRequestTimeout time . Duration ,
2015-08-17 17:03:45 +00:00
rktPath string ,
2016-03-19 00:22:11 +00:00
rktAPIEndpoint string ,
2015-09-01 02:25:26 +00:00
rktStage1Image string ,
2015-05-12 16:59:02 +00:00
mounter mount . Interface ,
2015-09-14 09:51:40 +00:00
writer kubeio . Writer ,
2015-03-17 14:43:49 +00:00
configureCBR0 bool ,
2016-01-07 20:44:40 +00:00
nonMasqueradeCIDR string ,
2015-06-24 18:10:10 +00:00
podCIDR string ,
2015-09-16 04:53:33 +00:00
reconcileCIDR bool ,
2015-11-17 01:15:40 +00:00
maxPods int ,
2016-05-18 15:18:10 +00:00
podsPerCore int ,
2016-04-27 00:54:19 +00:00
nvidiaGPUs int ,
2015-07-28 18:54:32 +00:00
dockerExecHandler dockertools . ExecHandler ,
2015-09-01 13:27:01 +00:00
resolverConfig string ,
2015-08-13 14:05:32 +00:00
cpuCFSQuota bool ,
2015-09-30 10:55:37 +00:00
daemonEndpoints * api . NodeDaemonEndpoints ,
2015-10-20 21:49:44 +00:00
oomAdjuster * oom . OOMAdjuster ,
serializeImagePulls bool ,
2015-10-10 00:09:53 +00:00
containerManager cm . ContainerManager ,
2016-01-04 20:03:28 +00:00
outOfDiskTransitionFrequency time . Duration ,
2015-11-24 02:11:51 +00:00
flannelExperimentalOverlay bool ,
2015-12-10 02:05:35 +00:00
nodeIP net . IP ,
2015-12-12 01:51:39 +00:00
reservation kubetypes . Reservation ,
2016-01-21 16:44:28 +00:00
enableCustomMetrics bool ,
2016-01-14 19:19:26 +00:00
volumeStatsAggPeriod time . Duration ,
2016-02-05 15:47:06 +00:00
containerRuntimeOptions [ ] kubecontainer . Option ,
2016-02-16 18:23:58 +00:00
hairpinMode string ,
2016-03-01 09:12:53 +00:00
babysitDaemons bool ,
2016-05-10 21:12:01 +00:00
evictionConfig eviction . Config ,
2016-02-19 01:54:48 +00:00
kubeOptions [ ] Option ,
2016-05-23 20:37:30 +00:00
enableControllerAttachDetach bool ,
2015-10-20 21:49:44 +00:00
) ( * Kubelet , error ) {
2015-01-12 00:42:11 +00:00
if rootDirectory == "" {
return nil , fmt . Errorf ( "invalid root directory %q" , rootDirectory )
}
2015-01-07 02:31:40 +00:00
if resyncInterval <= 0 {
return nil , fmt . Errorf ( "invalid sync frequency %d" , resyncInterval )
}
2015-03-06 00:37:08 +00:00
2015-01-26 21:44:53 +00:00
serviceStore := cache . NewStore ( cache . MetaNamespaceKeyFunc )
2015-01-16 21:39:31 +00:00
if kubeClient != nil {
2015-02-27 18:44:44 +00:00
// TODO: cache.NewListWatchFromClient is limited as it takes a client implementation rather
// than an interface. There is no way to construct a list+watcher using resource name.
listWatch := & cache . ListWatch {
2015-12-10 09:39:03 +00:00
ListFunc : func ( options api . ListOptions ) ( runtime . Object , error ) {
2016-02-03 21:21:05 +00:00
return kubeClient . Core ( ) . Services ( api . NamespaceAll ) . List ( options )
2015-02-27 18:44:44 +00:00
} ,
2015-12-10 09:39:03 +00:00
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
2016-02-03 21:21:05 +00:00
return kubeClient . Core ( ) . Services ( api . NamespaceAll ) . Watch ( options )
2015-02-27 18:44:44 +00:00
} ,
}
cache . NewReflector ( listWatch , & api . Service { } , serviceStore , 0 ) . Run ( )
2015-01-16 21:39:31 +00:00
}
2015-08-08 01:52:23 +00:00
serviceLister := & cache . StoreToServiceLister { Store : serviceStore }
2015-01-08 15:25:14 +00:00
2015-09-21 18:00:04 +00:00
nodeStore := cache . NewStore ( cache . MetaNamespaceKeyFunc )
if kubeClient != nil {
// TODO: cache.NewListWatchFromClient is limited as it takes a client implementation rather
// than an interface. There is no way to construct a list+watcher using resource name.
2016-02-12 18:58:43 +00:00
fieldSelector := fields . Set { api . ObjectNameField : nodeName } . AsSelector ( )
2015-09-21 18:00:04 +00:00
listWatch := & cache . ListWatch {
2015-12-10 09:39:03 +00:00
ListFunc : func ( options api . ListOptions ) ( runtime . Object , error ) {
options . FieldSelector = fieldSelector
2016-02-03 21:21:05 +00:00
return kubeClient . Core ( ) . Nodes ( ) . List ( options )
2015-09-21 18:00:04 +00:00
} ,
2015-12-10 09:39:03 +00:00
WatchFunc : func ( options api . ListOptions ) ( watch . Interface , error ) {
options . FieldSelector = fieldSelector
2016-02-03 21:21:05 +00:00
return kubeClient . Core ( ) . Nodes ( ) . Watch ( options )
2015-09-21 18:00:04 +00:00
} ,
}
cache . NewReflector ( listWatch , & api . Node { } , nodeStore , 0 ) . Run ( )
}
nodeLister := & cache . StoreToNodeLister { Store : nodeStore }
2016-03-23 23:45:24 +00:00
nodeInfo := & predicates . CachedNodeInfo { StoreToNodeLister : nodeLister }
2015-09-21 18:00:04 +00:00
2015-09-09 14:18:17 +00:00
// TODO: get the real node object of ourself,
// and use the real node name and UID.
2015-03-27 20:12:48 +00:00
// TODO: what is namespace for node?
nodeRef := & api . ObjectReference {
Kind : "Node" ,
2015-06-12 15:40:34 +00:00
Name : nodeName ,
UID : types . UID ( nodeName ) ,
2015-03-27 20:12:48 +00:00
Namespace : "" ,
}
2015-05-12 08:24:08 +00:00
diskSpaceManager , err := newDiskSpaceManager ( cadvisorInterface , diskSpacePolicy )
if err != nil {
return nil , fmt . Errorf ( "failed to initialize disk manager: %v" , err )
}
2015-04-23 21:16:59 +00:00
containerRefManager := kubecontainer . NewRefManager ( )
2015-04-11 00:29:56 +00:00
oomWatcher := NewOOMWatcher ( cadvisorInterface , recorder )
2015-12-16 23:31:10 +00:00
// TODO: remove when internal cbr0 implementation gets removed in favor
// of the kubenet network plugin
if networkPluginName == "kubenet" {
configureCBR0 = false
flannelExperimentalOverlay = false
}
2015-01-12 00:42:11 +00:00
klet := & Kubelet {
2015-04-17 22:54:28 +00:00
hostname : hostname ,
2015-06-12 15:40:34 +00:00
nodeName : nodeName ,
2015-04-17 22:54:28 +00:00
dockerClient : dockerClient ,
kubeClient : kubeClient ,
rootDirectory : rootDirectory ,
resyncInterval : resyncInterval ,
2015-04-23 21:16:59 +00:00
containerRefManager : containerRefManager ,
2015-04-17 22:54:28 +00:00
httpClient : & http . Client { } ,
2016-04-26 17:58:12 +00:00
sourcesReady : config . NewSourcesReady ( sourcesReadyFn ) ,
2015-09-21 18:06:38 +00:00
registerNode : registerNode ,
2015-09-16 04:53:33 +00:00
registerSchedulable : registerSchedulable ,
2015-06-12 17:20:26 +00:00
standaloneMode : standaloneMode ,
2015-04-17 22:54:28 +00:00
clusterDomain : clusterDomain ,
clusterDNS : clusterDNS ,
serviceLister : serviceLister ,
2015-09-21 18:00:04 +00:00
nodeLister : nodeLister ,
2015-11-26 08:57:26 +00:00
nodeInfo : nodeInfo ,
2015-04-17 22:54:28 +00:00
masterServiceNamespace : masterServiceNamespace ,
2015-01-08 20:41:38 +00:00
streamingConnectionIdleTimeout : streamingConnectionIdleTimeout ,
2015-03-03 06:06:20 +00:00
recorder : recorder ,
2015-03-06 07:56:30 +00:00
cadvisor : cadvisorInterface ,
2015-05-12 08:24:08 +00:00
diskSpaceManager : diskSpaceManager ,
2015-03-23 22:31:13 +00:00
cloud : cloud ,
2016-06-29 22:30:20 +00:00
autoDetectCloudProvider : autoDetectCloudProvider ,
nodeRef : nodeRef ,
nodeLabels : nodeLabels ,
nodeStatusUpdateFrequency : nodeStatusUpdateFrequency ,
2016-02-05 01:49:17 +00:00
os : osInterface ,
oomWatcher : oomWatcher ,
cgroupRoot : cgroupRoot ,
mounter : mounter ,
writer : writer ,
configureCBR0 : configureCBR0 ,
nonMasqueradeCIDR : nonMasqueradeCIDR ,
reconcileCIDR : reconcileCIDR ,
maxPods : maxPods ,
2016-05-18 15:18:10 +00:00
podsPerCore : podsPerCore ,
2016-04-27 00:54:19 +00:00
nvidiaGPUs : nvidiaGPUs ,
2016-02-05 01:49:17 +00:00
syncLoopMonitor : atomic . Value { } ,
resolverConfig : resolverConfig ,
cpuCFSQuota : cpuCFSQuota ,
daemonEndpoints : daemonEndpoints ,
containerManager : containerManager ,
flannelExperimentalOverlay : flannelExperimentalOverlay ,
2016-05-25 13:09:39 +00:00
flannelHelper : nil ,
2016-02-05 01:49:17 +00:00
nodeIP : nodeIP ,
clock : util . RealClock { } ,
2016-01-04 20:03:28 +00:00
outOfDiskTransitionFrequency : outOfDiskTransitionFrequency ,
2015-12-12 01:51:39 +00:00
reservation : reservation ,
2016-01-21 16:44:28 +00:00
enableCustomMetrics : enableCustomMetrics ,
2016-03-01 09:12:53 +00:00
babysitDaemons : babysitDaemons ,
2016-05-23 20:37:30 +00:00
enableControllerAttachDetach : enableControllerAttachDetach ,
2015-11-24 02:11:51 +00:00
}
2016-01-14 19:19:26 +00:00
2015-11-24 02:11:51 +00:00
if klet . flannelExperimentalOverlay {
2016-05-25 13:09:39 +00:00
klet . flannelHelper = NewFlannelHelper ( )
2015-11-24 02:11:51 +00:00
glog . Infof ( "Flannel is in charge of podCIDR and overlay networking." )
2015-11-21 03:41:32 +00:00
}
2015-12-10 02:05:35 +00:00
if klet . nodeIP != nil {
if err := klet . validateNodeIP ( ) ; err != nil {
return nil , err
}
glog . Infof ( "Using node IP: %q" , klet . nodeIP . String ( ) )
}
2016-03-31 22:20:04 +00:00
if mode , err := effectiveHairpinMode ( componentconfig . HairpinMode ( hairpinMode ) , containerRuntime , configureCBR0 , networkPluginName ) ; err != nil {
// This is a non-recoverable error. Returning it up the callstack will just
// lead to retries of the same failure, so just fail hard.
glog . Fatalf ( "Invalid hairpin mode: %v" , err )
} else {
klet . hairpinMode = mode
}
glog . Infof ( "Hairpin mode set to %q" , klet . hairpinMode )
2016-06-09 17:32:28 +00:00
if plug , err := network . InitNetworkPlugin ( networkPlugins , networkPluginName , & networkHost { klet } , klet . hairpinMode , klet . nonMasqueradeCIDR ) ; err != nil {
2015-04-28 18:02:29 +00:00
return nil , err
} else {
klet . networkPlugin = plug
}
2015-05-01 21:24:07 +00:00
2015-09-21 18:06:38 +00:00
machineInfo , err := klet . GetCachedMachineInfo ( )
2015-08-04 00:28:33 +00:00
if err != nil {
return nil , err
}
2015-11-13 23:47:25 +00:00
procFs := procfs . NewProcFS ( )
2016-03-09 02:58:24 +00:00
imageBackOff := flowcontrol . NewBackOff ( backOffPeriod , MaxContainerBackOff )
2015-10-19 22:15:59 +00:00
2015-11-10 22:00:12 +00:00
klet . livenessManager = proberesults . NewManager ( )
2015-10-19 22:15:59 +00:00
2016-01-20 02:15:10 +00:00
klet . podCache = kubecontainer . NewCache ( )
2016-03-02 21:29:53 +00:00
klet . podManager = kubepod . NewBasicPodManager ( kubepod . NewBasicMirrorClient ( klet . kubeClient ) )
2016-01-20 02:15:10 +00:00
2015-05-01 21:24:07 +00:00
// Initialize the runtime.
switch containerRuntime {
case "docker" :
// Only supported one for now, continue.
2015-05-01 22:25:11 +00:00
klet . containerRuntime = dockertools . NewDockerManager (
dockerClient ,
2015-10-26 07:18:45 +00:00
kubecontainer . FilterEventRecorder ( recorder ) ,
2015-10-19 22:15:59 +00:00
klet . livenessManager ,
2015-05-01 22:25:11 +00:00
containerRefManager ,
2016-03-02 21:29:53 +00:00
klet . podManager ,
2015-08-04 00:28:33 +00:00
machineInfo ,
2015-05-01 22:25:11 +00:00
podInfraContainerImage ,
pullQPS ,
pullBurst ,
containerLogsDir ,
osInterface ,
klet . networkPlugin ,
klet ,
klet . httpClient ,
2015-08-04 00:28:33 +00:00
dockerExecHandler ,
oomAdjuster ,
2015-09-01 13:27:01 +00:00
procFs ,
2015-10-02 13:45:46 +00:00
klet . cpuCFSQuota ,
2015-10-20 21:49:44 +00:00
imageBackOff ,
serializeImagePulls ,
2016-01-21 16:44:28 +00:00
enableCustomMetrics ,
2016-03-22 11:00:18 +00:00
klet . hairpinMode == componentconfig . HairpinVeth ,
2016-05-08 22:26:37 +00:00
seccompProfileRoot ,
2016-02-05 15:47:06 +00:00
containerRuntimeOptions ... ,
2015-10-20 21:49:44 +00:00
)
2015-05-08 06:26:07 +00:00
case "rkt" :
2016-02-08 18:52:30 +00:00
// TODO: Include hairpin mode settings in rkt?
2015-08-17 17:03:45 +00:00
conf := & rkt . Config {
2016-01-26 21:48:00 +00:00
Path : rktPath ,
Stage1Image : rktStage1Image ,
InsecureOptions : "image,ondisk" ,
2015-08-17 17:03:45 +00:00
}
2015-05-08 06:26:07 +00:00
rktRuntime , err := rkt . New (
2016-03-19 00:22:11 +00:00
rktAPIEndpoint ,
2015-05-08 06:26:07 +00:00
conf ,
klet ,
recorder ,
containerRefManager ,
2016-04-21 00:49:08 +00:00
klet . podManager ,
2015-10-19 22:15:59 +00:00
klet . livenessManager ,
2016-03-04 22:52:45 +00:00
klet . httpClient ,
2016-05-03 00:49:42 +00:00
klet . networkPlugin ,
klet . hairpinMode == componentconfig . HairpinVeth ,
2016-04-15 01:01:40 +00:00
utilexec . New ( ) ,
kubecontainer . RealOS { } ,
2015-10-20 21:49:44 +00:00
imageBackOff ,
serializeImagePulls ,
2016-06-17 21:28:30 +00:00
runtimeRequestTimeout ,
2015-10-20 21:49:44 +00:00
)
2015-05-08 06:26:07 +00:00
if err != nil {
return nil , err
}
klet . containerRuntime = rktRuntime
2015-05-01 21:24:07 +00:00
default :
return nil , fmt . Errorf ( "unsupported container runtime %q specified" , containerRuntime )
}
2015-04-28 18:02:29 +00:00
2016-03-29 00:05:02 +00:00
// TODO: Factor out "StatsProvider" from Kubelet so we don't have a cyclic dependency
klet . resourceAnalyzer = stats . NewResourceAnalyzer ( klet , volumeStatsAggPeriod , klet . containerRuntime )
2016-03-02 01:46:11 +00:00
klet . pleg = pleg . NewGenericPLEG ( klet . containerRuntime , plegChannelCapacity , plegRelistPeriod , klet . podCache , util . RealClock { } )
2016-04-22 22:23:03 +00:00
klet . runtimeState = newRuntimeState ( maxWaitForContainerRuntime )
2016-01-27 04:02:59 +00:00
klet . updatePodCIDR ( podCIDR )
2015-11-07 01:03:39 +00:00
2015-10-03 15:37:07 +00:00
// setup containerGC
2015-10-05 22:35:32 +00:00
containerGC , err := kubecontainer . NewContainerGC ( klet . containerRuntime , containerGCPolicy )
2015-10-03 15:37:07 +00:00
if err != nil {
return nil , err
}
klet . containerGC = containerGC
2015-09-26 00:29:08 +00:00
// setup imageManager
imageManager , err := newImageManager ( klet . containerRuntime , cadvisorInterface , recorder , nodeRef , imageGCPolicy )
if err != nil {
return nil , fmt . Errorf ( "failed to initialize image manager: %v" , err )
}
klet . imageManager = imageManager
2015-05-01 22:25:11 +00:00
klet . runner = klet . containerRuntime
2015-10-29 21:04:00 +00:00
klet . statusManager = status . NewManager ( kubeClient , klet . podManager )
2015-03-21 00:22:02 +00:00
2015-08-25 17:39:41 +00:00
klet . probeManager = prober . NewManager (
klet . statusManager ,
2015-10-19 22:15:59 +00:00
klet . livenessManager ,
klet . runner ,
containerRefManager ,
recorder )
2015-08-25 17:39:41 +00:00
2016-05-30 02:22:22 +00:00
klet . volumePluginMgr , err =
NewInitializedVolumePluginMgr ( klet , volumePlugins )
if err != nil {
2015-09-24 22:26:25 +00:00
return nil , err
}
2016-07-02 01:50:25 +00:00
klet . volumeManager , err = volumemanager . NewVolumeManager (
2016-05-30 02:22:22 +00:00
enableControllerAttachDetach ,
hostname ,
klet . podManager ,
klet . kubeClient ,
2016-06-28 13:01:07 +00:00
klet . volumePluginMgr ,
klet . containerRuntime )
2016-05-30 02:22:22 +00:00
2015-05-01 22:25:11 +00:00
runtimeCache , err := kubecontainer . NewRuntimeCache ( klet . containerRuntime )
2015-02-19 09:12:53 +00:00
if err != nil {
return nil , err
}
2015-04-14 01:04:11 +00:00
klet . runtimeCache = runtimeCache
2016-01-12 21:28:00 +00:00
klet . reasonCache = NewReasonCache ( )
2016-04-29 01:39:46 +00:00
klet . workQueue = queue . NewBasicWorkQueue ( klet . clock )
2016-01-20 21:26:02 +00:00
klet . podWorkers = newPodWorkers ( klet . syncPod , recorder , klet . workQueue , klet . resyncInterval , backOffPeriod , klet . podCache )
2015-02-19 09:12:53 +00:00
2016-03-09 02:58:24 +00:00
klet . backOff = flowcontrol . NewBackOff ( backOffPeriod , MaxContainerBackOff )
2016-01-31 23:56:55 +00:00
klet . podKillingCh = make ( chan * kubecontainer . PodPair , podKillingChannelCapacity )
2016-02-19 01:54:48 +00:00
klet . setNodeStatusFuncs = klet . defaultNodeStatusFuncs ( )
2016-05-13 03:35:18 +00:00
// setup eviction manager
evictionManager , evictionAdmitHandler , err := eviction . NewManager ( klet . resourceAnalyzer , evictionConfig , killPodNow ( klet . podWorkers ) , recorder , nodeRef , klet . clock )
if err != nil {
return nil , fmt . Errorf ( "failed to initialize eviction manager: %v" , err )
}
klet . evictionManager = evictionManager
klet . AddPodAdmitHandler ( evictionAdmitHandler )
2016-04-22 21:30:35 +00:00
// enable active deadline handler
activeDeadlineHandler , err := newActiveDeadlineHandler ( klet . statusManager , klet . recorder , klet . clock )
if err != nil {
return nil , err
}
klet . AddPodSyncLoopHandler ( activeDeadlineHandler )
klet . AddPodSyncHandler ( activeDeadlineHandler )
2016-02-19 01:54:48 +00:00
// apply functional Option's
for _ , opt := range kubeOptions {
opt ( klet )
}
2015-01-12 00:42:11 +00:00
return klet , nil
2014-07-22 21:40:59 +00:00
}
2015-01-08 15:25:14 +00:00
type serviceLister interface {
List ( ) ( api . ServiceList , error )
}
2015-09-21 18:00:04 +00:00
type nodeLister interface {
List ( ) ( machines api . NodeList , err error )
}
2014-07-10 12:26:24 +00:00
// Kubelet is the main kubelet implementation.
2014-06-06 23:40:48 +00:00
type Kubelet struct {
2015-09-02 17:18:11 +00:00
hostname string
nodeName string
dockerClient dockertools . DockerInterface
runtimeCache kubecontainer . RuntimeCache
2016-02-01 22:30:47 +00:00
kubeClient clientset . Interface
2015-09-02 17:18:11 +00:00
rootDirectory string
2016-04-28 04:26:36 +00:00
// podWorkers handle syncing Pods in response to events.
podWorkers PodWorkers
// resyncInterval is the interval between periodic full reconciliations of
// pods on this node.
2015-04-09 01:56:58 +00:00
resyncInterval time . Duration
2016-04-28 04:26:36 +00:00
2016-04-26 17:58:12 +00:00
// sourcesReady records the sources seen by the kubelet, it is thread-safe.
sourcesReady config . SourcesReady
2014-07-15 20:24:41 +00:00
2016-04-28 04:26:36 +00:00
// podManager is a facade that abstracts away the various sources of pods
// this Kubelet services.
2015-10-12 23:28:23 +00:00
podManager kubepod . Manager
2015-03-18 18:43:59 +00:00
2016-05-13 03:35:18 +00:00
// Needed to observe and respond to situations that could impact node stability
evictionManager eviction . Manager
2014-11-04 00:16:31 +00:00
// Needed to report events for containers belonging to deleted/modified pods.
// Tracks references for reporting events
2015-03-26 18:44:52 +00:00
containerRefManager * kubecontainer . RefManager
2014-11-04 00:16:31 +00:00
2014-07-15 20:24:41 +00:00
// Optional, defaults to /logs/ from /var/log
2014-07-22 21:40:59 +00:00
logServer http . Handler
2014-08-07 18:15:11 +00:00
// Optional, defaults to simple Docker implementation
2015-05-11 22:32:51 +00:00
runner kubecontainer . ContainerCommandRunner
2014-09-03 20:39:56 +00:00
// Optional, client for http requests, defaults to empty client
2015-10-09 17:24:31 +00:00
httpClient kubetypes . HttpGetter
2014-10-09 00:05:04 +00:00
2015-03-06 07:56:30 +00:00
// cAdvisor used for container information.
cadvisor cadvisor . Interface
2014-10-28 00:29:55 +00:00
2015-09-21 18:06:38 +00:00
// Set to true to have the node register itself with the apiserver.
registerNode bool
2015-09-16 04:53:33 +00:00
// Set to true to have the node register itself as schedulable.
registerSchedulable bool
2015-07-01 01:49:18 +00:00
// for internal book keeping; access only from within registerWithApiserver
registrationCompleted bool
2015-05-20 21:21:03 +00:00
2015-06-12 17:20:26 +00:00
// Set to true if the kubelet is in standalone mode (i.e. setup without an apiserver)
standaloneMode bool
2014-11-12 05:21:40 +00:00
// If non-empty, use this for container DNS search.
clusterDomain string
// If non-nil, use this for container DNS server.
clusterDNS net . IP
2015-01-08 15:25:14 +00:00
2016-04-28 04:26:36 +00:00
// masterServiceNamespace is the namespace that the master service is exposed in.
2015-01-08 15:25:14 +00:00
masterServiceNamespace string
2016-04-28 04:26:36 +00:00
// serviceLister knows how to list services
serviceLister serviceLister
// nodeLister knows how to list nodes
nodeLister nodeLister
// nodeInfo knows how to get information about the node for this kubelet.
nodeInfo predicates . NodeInfo
2014-11-23 15:47:25 +00:00
2015-11-13 23:59:23 +00:00
// a list of node labels to register
2016-01-13 23:56:51 +00:00
nodeLabels map [ string ] string
2015-11-13 23:59:23 +00:00
2015-07-03 20:29:14 +00:00
// Last timestamp when runtime responded on ping.
2015-05-05 10:19:54 +00:00
// Mutex is used to protect this value.
2015-09-25 18:00:14 +00:00
runtimeState * runtimeState
2015-06-24 18:10:10 +00:00
2014-11-23 15:47:25 +00:00
// Volume plugins.
2016-05-30 02:22:22 +00:00
volumePluginMgr * volume . VolumePluginMgr
2015-02-02 18:51:52 +00:00
2015-04-17 22:54:28 +00:00
// Network plugin.
2015-03-19 23:14:13 +00:00
networkPlugin network . NetworkPlugin
2015-10-19 22:15:59 +00:00
// Handles container probing.
2015-08-25 17:39:41 +00:00
probeManager prober . Manager
2015-10-19 22:15:59 +00:00
// Manages container health check results.
livenessManager proberesults . Manager
2015-01-08 20:41:38 +00:00
2015-03-20 16:37:08 +00:00
// How long to keep idle streaming command execution/port forwarding
2015-01-08 20:41:38 +00:00
// connections open before terminating them
streamingConnectionIdleTimeout time . Duration
2015-03-03 06:06:20 +00:00
2015-03-20 16:37:08 +00:00
// The EventRecorder to use
2015-03-03 06:06:20 +00:00
recorder record . EventRecorder
2015-03-03 18:33:25 +00:00
2015-03-14 17:13:20 +00:00
// Policy for handling garbage collection of dead containers.
2015-10-05 22:35:32 +00:00
containerGC kubecontainer . ContainerGC
2015-03-16 04:00:46 +00:00
// Manager for images.
imageManager imageManager
2015-03-16 12:50:00 +00:00
2015-05-12 08:24:08 +00:00
// Diskspace manager.
diskSpaceManager diskSpaceManager
2015-03-16 12:50:00 +00:00
// Cached MachineInfo returned by cadvisor.
2015-10-16 03:00:28 +00:00
machineInfo * cadvisorapi . MachineInfo
2015-03-20 16:37:08 +00:00
// Syncs pods statuses with apiserver; also used as a cache of statuses.
2015-09-11 19:22:01 +00:00
statusManager status . Manager
2015-03-23 22:31:13 +00:00
2016-05-30 02:22:22 +00:00
// VolumeManager runs a set of asynchronous loops that figure out which
// volumes need to be attached/mounted/unmounted/detached based on the pods
// scheduled on this node and makes it so.
2016-07-02 01:50:25 +00:00
volumeManager volumemanager . VolumeManager
2015-04-16 00:40:07 +00:00
2016-05-30 02:22:22 +00:00
// Cloud provider interface.
2016-06-29 22:30:20 +00:00
cloud cloudprovider . Interface
autoDetectCloudProvider bool
2015-03-27 20:12:48 +00:00
// Reference to this node.
nodeRef * api . ObjectReference
2015-04-02 20:14:52 +00:00
2015-05-01 22:25:11 +00:00
// Container runtime.
containerRuntime kubecontainer . Runtime
2015-03-31 11:17:12 +00:00
2016-01-12 21:28:00 +00:00
// reasonCache caches the failure reason of the last creation of all containers, which is
// used for generating ContainerStatus.
reasonCache * ReasonCache
2015-09-21 18:06:38 +00:00
// nodeStatusUpdateFrequency specifies how often kubelet posts node status to master.
// Note: be cautious when changing the constant, it must work with nodeMonitorGracePeriod
// in nodecontroller. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeStatusUpdateFrequency needs to be large enough for kubelet to generate node
// status. Kubelet may fail to update node status reliably if the value is too small,
// as it takes time to gather all necessary node information.
nodeStatusUpdateFrequency time . Duration
2015-08-07 21:42:21 +00:00
// Generates pod events.
pleg pleg . PodLifecycleEventGenerator
2016-01-20 02:15:10 +00:00
// Store kubecontainer.PodStatus for all pods.
podCache kubecontainer . Cache
2016-04-28 04:26:36 +00:00
// os is a facade for various syscalls that need to be mocked during testing.
2015-04-29 20:44:29 +00:00
os kubecontainer . OSInterface
// Watcher of out of memory events.
2015-04-11 00:29:56 +00:00
oomWatcher OOMWatcher
2015-04-29 20:44:29 +00:00
2016-01-14 19:19:26 +00:00
// Monitor resource usage
resourceAnalyzer stats . ResourceAnalyzer
2015-04-24 00:07:52 +00:00
// If non-empty, pass this to the container runtime as the root cgroup.
cgroupRoot string
2015-05-04 14:43:10 +00:00
// Mounter to use for volumes.
mounter mount . Interface
2015-05-12 16:59:02 +00:00
2015-09-14 09:51:40 +00:00
// Writer interface to use for volumes.
writer kubeio . Writer
2015-05-12 16:59:02 +00:00
// Manager of non-Runtime containers.
2015-10-10 00:09:53 +00:00
containerManager cm . ContainerManager
nodeConfig cm . NodeConfig
2015-05-11 21:07:24 +00:00
// Whether or not kubelet should take responsibility for keeping cbr0 in
// the correct state.
configureCBR0 bool
2015-09-16 04:53:33 +00:00
reconcileCIDR bool
2015-03-17 14:43:49 +00:00
2016-01-07 20:44:40 +00:00
// Traffic to IPs outside this range will use IP masquerade.
nonMasqueradeCIDR string
2015-11-17 01:15:40 +00:00
// Maximum Number of Pods which can be run by this Kubelet
maxPods int
2015-06-17 22:31:46 +00:00
2016-04-27 00:54:19 +00:00
// Number of NVIDIA GPUs on this node
nvidiaGPUs int
2015-06-17 22:31:46 +00:00
// Monitor Kubelet's sync loop
2015-10-24 10:17:17 +00:00
syncLoopMonitor atomic . Value
2015-08-13 12:59:15 +00:00
// Container restart Backoff
2016-03-09 02:58:24 +00:00
backOff * flowcontrol . Backoff
2015-08-19 00:52:26 +00:00
// Channel for sending pods to kill.
2016-01-31 23:56:55 +00:00
podKillingCh chan * kubecontainer . PodPair
2015-07-28 18:54:32 +00:00
// The configuration file used as the base to generate the container's
// DNS resolver configuration file. This can be used in conjunction with
// clusterDomain and clusterDNS.
resolverConfig string
2015-08-10 22:08:31 +00:00
// Optionally shape the bandwidth of a pod
2015-12-16 23:31:10 +00:00
// TODO: remove when kubenet plugin is ready
2015-08-10 22:08:31 +00:00
shaper bandwidth . BandwidthShaper
2015-09-01 13:27:01 +00:00
// True if container cpu limits should be enforced via cgroup CFS quota
cpuCFSQuota bool
2015-09-21 18:06:38 +00:00
// Information about the ports which are opened by daemons on Node running this Kubelet server.
daemonEndpoints * api . NodeDaemonEndpoints
2015-09-02 17:18:11 +00:00
// A queue used to trigger pod workers.
workQueue queue . WorkQueue
2015-11-07 01:03:39 +00:00
// oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up.
oneTimeInitializer sync . Once
2015-11-21 03:41:32 +00:00
2015-11-24 02:11:51 +00:00
flannelExperimentalOverlay bool
// TODO: Flannelhelper doesn't store any state, we can instantiate it
// on the fly if we're confident the dbus connetions it opens doesn't
// put the system under duress.
flannelHelper * FlannelHelper
2015-12-10 02:05:35 +00:00
// If non-nil, use this IP address for the node
nodeIP net . IP
2016-01-04 20:03:28 +00:00
// clock is an interface that provides time related functionality in a way that makes it
// easy to test the code.
clock util . Clock
// outOfDiskTransitionFrequency specifies the amount of time the kubelet has to be actually
// not out of disk before it can transition the node condition status from out-of-disk to
// not-out-of-disk. This prevents a pod that causes out-of-disk condition from repeatedly
// getting rescheduled onto the node.
outOfDiskTransitionFrequency time . Duration
2015-12-12 01:51:39 +00:00
// reservation specifies resources which are reserved for non-pod usage, including kubernetes and
// non-kubernetes system processes.
reservation kubetypes . Reservation
2016-01-21 16:44:28 +00:00
// support gathering custom metrics.
enableCustomMetrics bool
2016-02-16 18:23:58 +00:00
// How the Kubelet should setup hairpin NAT. Can take the values: "promiscuous-bridge"
// (make cbr0 promiscuous), "hairpin-veth" (set the hairpin flag on veth interfaces)
// or "none" (do nothing).
hairpinMode componentconfig . HairpinMode
2016-02-19 01:54:48 +00:00
2016-03-01 09:12:53 +00:00
// The node has babysitter process monitoring docker and kubelet
babysitDaemons bool
2016-02-19 01:54:48 +00:00
// handlers called during the tryUpdateNodeStatus cycle
setNodeStatusFuncs [ ] func ( * api . Node ) error
2016-04-15 18:17:17 +00:00
// TODO: think about moving this to be centralized in PodWorkers in follow-on.
// the list of handlers to call during pod admission.
lifecycle . PodAdmitHandlers
// the list of handlers to call during pod sync loop.
lifecycle . PodSyncLoopHandlers
// the list of handlers to call during pod sync.
lifecycle . PodSyncHandlers
2016-05-18 15:18:10 +00:00
// the number of allowed pods per core
podsPerCore int
2016-05-23 20:37:30 +00:00
// enableControllerAttachDetach indicates the Attach/Detach controller
// should manage attachment/detachment of volumes scheduled to this node,
// and disable kubelet from executing any attach/detach operations
enableControllerAttachDetach bool
2015-12-10 02:05:35 +00:00
}
2016-04-28 04:26:36 +00:00
// dirExists returns true if the path exists and represents a directory.
2014-11-29 19:02:28 +00:00
func dirExists ( path string ) bool {
s , err := os . Stat ( path )
if err != nil {
return false
}
return s . IsDir ( )
2014-11-29 19:02:28 +00:00
}
2016-04-28 04:26:36 +00:00
// setupDataDirs creates:
// 1. the root directory
// 2. the pods directory
// 3. the plugins directory
2015-01-12 00:42:11 +00:00
func ( kl * Kubelet ) setupDataDirs ( ) error {
kl . rootDirectory = path . Clean ( kl . rootDirectory )
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getRootDir ( ) , 0750 ) ; err != nil {
2015-01-12 00:42:11 +00:00
return fmt . Errorf ( "error creating root directory: %v" , err )
}
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getPodsDir ( ) , 0750 ) ; err != nil {
2015-01-12 00:42:11 +00:00
return fmt . Errorf ( "error creating pods directory: %v" , err )
}
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getPluginsDir ( ) , 0750 ) ; err != nil {
return fmt . Errorf ( "error creating plugins directory: %v" , err )
}
2015-01-12 00:42:11 +00:00
return nil
}
// Get a list of pods that have data directories.
2015-01-14 23:22:21 +00:00
func ( kl * Kubelet ) listPodsFromDisk ( ) ( [ ] types . UID , error ) {
2014-11-23 15:47:25 +00:00
podInfos , err := ioutil . ReadDir ( kl . getPodsDir ( ) )
2015-01-12 00:42:11 +00:00
if err != nil {
return nil , err
}
2015-01-14 23:22:21 +00:00
pods := [ ] types . UID { }
2015-01-12 00:42:11 +00:00
for i := range podInfos {
if podInfos [ i ] . IsDir ( ) {
2015-01-14 23:22:21 +00:00
pods = append ( pods , types . UID ( podInfos [ i ] . Name ( ) ) )
2015-01-12 00:42:11 +00:00
}
}
return pods , nil
}
2015-07-03 20:29:14 +00:00
// Starts garbage collection threads.
2015-03-16 04:00:46 +00:00
func ( kl * Kubelet ) StartGarbageCollection ( ) {
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2016-06-14 21:45:41 +00:00
if err := kl . containerGC . GarbageCollect ( kl . sourcesReady . AllReady ( ) ) ; err != nil {
2015-03-16 04:00:46 +00:00
glog . Errorf ( "Container garbage collection failed: %v" , err )
}
2016-03-02 21:29:53 +00:00
} , ContainerGCPeriod , wait . NeverStop )
2015-03-16 04:00:46 +00:00
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2015-03-16 04:00:46 +00:00
if err := kl . imageManager . GarbageCollect ( ) ; err != nil {
glog . Errorf ( "Image garbage collection failed: %v" , err )
2014-12-22 19:54:07 +00:00
}
2016-03-02 21:29:53 +00:00
} , ImageGCPeriod , wait . NeverStop )
2014-12-22 19:54:07 +00:00
}
2015-11-07 01:03:39 +00:00
// initializeModules will initialize internal modules that do not require the container runtime to be up.
// Note that the modules here must not depend on modules that are not initialized here.
func ( kl * Kubelet ) initializeModules ( ) error {
2016-02-05 01:49:17 +00:00
// Step 1: Promethues metrics.
2015-09-24 22:26:25 +00:00
metrics . Register ( kl . runtimeCache )
2016-02-05 01:49:17 +00:00
// Step 2: Setup filesystem directories.
2015-09-24 22:26:25 +00:00
if err := kl . setupDataDirs ( ) ; err != nil {
return err
2014-07-15 07:04:30 +00:00
}
2015-11-07 01:03:39 +00:00
2016-02-05 01:49:17 +00:00
// Step 3: If the container logs directory does not exist, create it.
2015-09-24 22:26:25 +00:00
if _ , err := os . Stat ( containerLogsDir ) ; err != nil {
2016-05-21 21:23:18 +00:00
if err := kl . os . MkdirAll ( containerLogsDir , 0755 ) ; err != nil {
2015-09-24 22:26:25 +00:00
glog . Errorf ( "Failed to create directory %q: %v" , containerLogsDir , err )
}
2015-02-23 21:04:45 +00:00
}
2015-04-14 00:30:57 +00:00
2016-02-05 01:49:17 +00:00
// Step 4: Start the image manager.
2015-05-20 21:21:03 +00:00
if err := kl . imageManager . Start ( ) ; err != nil {
2015-09-24 22:26:25 +00:00
return fmt . Errorf ( "Failed to start ImageManager, images may not be garbage collected: %v" , err )
2015-05-05 18:15:12 +00:00
}
2016-02-05 01:49:17 +00:00
// Step 5: Start container manager.
if err := kl . containerManager . Start ( ) ; err != nil {
2015-09-24 22:26:25 +00:00
return fmt . Errorf ( "Failed to start ContainerManager %v" , err )
2015-05-12 16:59:02 +00:00
}
2016-02-05 01:49:17 +00:00
// Step 6: Start out of memory watcher.
2015-05-20 21:21:03 +00:00
if err := kl . oomWatcher . Start ( kl . nodeRef ) ; err != nil {
2015-09-24 22:26:25 +00:00
return fmt . Errorf ( "Failed to start OOM watcher %v" , err )
2015-05-15 20:24:24 +00:00
}
2016-01-14 19:19:26 +00:00
// Step 7: Start resource analyzer
kl . resourceAnalyzer . Start ( )
2016-04-25 19:48:47 +00:00
2015-09-24 22:26:25 +00:00
return nil
}
2015-05-15 20:24:24 +00:00
2015-11-07 01:03:39 +00:00
// initializeRuntimeDependentModules will initialize internal modules that require the container runtime to be up.
func ( kl * Kubelet ) initializeRuntimeDependentModules ( ) {
if err := kl . cadvisor . Start ( ) ; err != nil {
kl . runtimeState . setInternalError ( fmt . Errorf ( "Failed to start cAdvisor %v" , err ) )
2015-05-12 16:59:02 +00:00
}
2015-11-07 01:03:39 +00:00
}
2015-05-12 16:59:02 +00:00
2015-09-24 22:26:25 +00:00
// Run starts the kubelet reacting to config updates
func ( kl * Kubelet ) Run ( updates <- chan kubetypes . PodUpdate ) {
if kl . logServer == nil {
kl . logServer = http . StripPrefix ( "/logs/" , http . FileServer ( http . Dir ( "/var/log/" ) ) )
}
if kl . kubeClient == nil {
glog . Warning ( "No api server defined - no node status update will be sent." )
}
2015-11-07 01:03:39 +00:00
if err := kl . initializeModules ( ) ; err != nil {
2015-11-13 22:30:01 +00:00
kl . recorder . Eventf ( kl . nodeRef , api . EventTypeWarning , kubecontainer . KubeletSetupFailed , err . Error ( ) )
2015-09-24 22:26:25 +00:00
glog . Error ( err )
kl . runtimeState . setInitError ( err )
2015-05-18 19:18:12 +00:00
}
2016-05-30 02:22:22 +00:00
// Start volume manager
go kl . volumeManager . Run ( wait . NeverStop )
2015-09-24 22:26:25 +00:00
if kl . kubeClient != nil {
// Start syncing node status immediately, this may set up things the runtime needs to run.
2016-02-02 10:57:06 +00:00
go wait . Until ( kl . syncNodeStatus , kl . nodeStatusUpdateFrequency , wait . NeverStop )
2015-05-18 19:18:12 +00:00
}
2016-02-02 10:57:06 +00:00
go wait . Until ( kl . syncNetworkStatus , 30 * time . Second , wait . NeverStop )
go wait . Until ( kl . updateRuntimeUp , 5 * time . Second , wait . NeverStop )
2015-06-24 18:10:10 +00:00
2015-08-19 00:52:26 +00:00
// Start a goroutine responsible for killing pods (that are not properly
// handled by pod workers).
2016-02-02 10:57:06 +00:00
go wait . Until ( kl . podKiller , 1 * time . Second , wait . NeverStop )
2015-08-19 00:52:26 +00:00
2015-12-09 18:58:15 +00:00
// Start component sync loops.
2015-03-20 16:37:08 +00:00
kl . statusManager . Start ( )
2015-12-09 18:58:15 +00:00
kl . probeManager . Start ( )
2016-05-13 03:35:18 +00:00
kl . evictionManager . Start ( kl . getActivePods , evictionMonitoringPeriod )
2016-04-25 19:48:47 +00:00
2015-08-07 21:42:21 +00:00
// Start the pod lifecycle event generator.
kl . pleg . Start ( )
2014-07-15 20:24:41 +00:00
kl . syncLoop ( updates , kl )
2014-06-06 23:40:48 +00:00
}
2016-05-13 03:35:18 +00:00
// getActivePods returns non-terminal pods
func ( kl * Kubelet ) getActivePods ( ) [ ] * api . Pod {
allPods := kl . podManager . GetPods ( )
activePods := kl . filterOutTerminatedPods ( allPods )
return activePods
}
2016-04-28 04:26:36 +00:00
// initialNodeStatus determines the initial node status, incorporating node
// labels and information from the cloud provider.
2015-09-21 18:06:38 +00:00
func ( kl * Kubelet ) initialNodeStatus ( ) ( * api . Node , error ) {
node := & api . Node {
ObjectMeta : api . ObjectMeta {
2016-05-10 17:38:57 +00:00
Name : kl . nodeName ,
Labels : map [ string ] string {
unversioned . LabelHostname : kl . hostname ,
unversioned . LabelOS : goRuntime . GOOS ,
unversioned . LabelArch : goRuntime . GOARCH ,
} ,
2015-09-21 18:06:38 +00:00
} ,
2015-09-16 04:53:33 +00:00
Spec : api . NodeSpec {
Unschedulable : ! kl . registerSchedulable ,
} ,
2015-09-21 18:06:38 +00:00
}
2016-05-27 09:37:20 +00:00
// Initially, set NodeNetworkUnavailable to true.
if kl . providerRequiresNetworkingConfiguration ( ) {
node . Status . Conditions = append ( node . Status . Conditions , api . NodeCondition {
Type : api . NodeNetworkUnavailable ,
Status : api . ConditionTrue ,
Reason : "NoRouteCreated" ,
Message : "Node created without a route" ,
LastTransitionTime : unversioned . NewTime ( kl . clock . Now ( ) ) ,
} )
}
2015-11-13 23:59:23 +00:00
2016-05-23 20:37:30 +00:00
if kl . enableControllerAttachDetach {
if node . Annotations == nil {
node . Annotations = make ( map [ string ] string )
}
2016-05-30 02:22:22 +00:00
node . Annotations [ volumehelper . ControllerManagedAttachAnnotation ] = "true"
2016-05-23 20:37:30 +00:00
}
2015-11-13 23:59:23 +00:00
// @question: should this be place after the call to the cloud provider? which also applies labels
2016-01-13 23:56:51 +00:00
for k , v := range kl . nodeLabels {
2015-11-13 23:59:23 +00:00
if cv , found := node . ObjectMeta . Labels [ k ] ; found {
glog . Warningf ( "the node label %s=%s will overwrite default setting %s" , k , v , cv )
}
node . ObjectMeta . Labels [ k ] = v
}
2015-09-21 18:06:38 +00:00
if kl . cloud != nil {
instances , ok := kl . cloud . Instances ( )
if ! ok {
return nil , fmt . Errorf ( "failed to get instances from cloud provider" )
}
// TODO(roberthbailey): Can we do this without having credentials to talk
// to the cloud provider?
// TODO: ExternalID is deprecated, we'll have to drop this code
externalID , err := instances . ExternalID ( kl . nodeName )
if err != nil {
return nil , fmt . Errorf ( "failed to get external ID from cloud provider: %v" , err )
}
node . Spec . ExternalID = externalID
// TODO: We can't assume that the node has credentials to talk to the
// cloudprovider from arbitrary nodes. At most, we should talk to a
// local metadata server here.
node . Spec . ProviderID , err = cloudprovider . GetInstanceProviderID ( kl . cloud , kl . nodeName )
if err != nil {
return nil , err
}
2015-11-05 19:32:06 +00:00
2016-02-09 16:34:42 +00:00
instanceType , err := instances . InstanceType ( kl . nodeName )
if err != nil {
return nil , err
}
if instanceType != "" {
glog . Infof ( "Adding node label from cloud provider: %s=%s" , unversioned . LabelInstanceType , instanceType )
node . ObjectMeta . Labels [ unversioned . LabelInstanceType ] = instanceType
}
2015-11-05 19:32:06 +00:00
// If the cloud has zone information, label the node with the zone information
zones , ok := kl . cloud . Zones ( )
if ok {
zone , err := zones . GetZone ( )
if err != nil {
return nil , fmt . Errorf ( "failed to get zone from cloud provider: %v" , err )
}
if zone . FailureDomain != "" {
glog . Infof ( "Adding node label from cloud provider: %s=%s" , unversioned . LabelZoneFailureDomain , zone . FailureDomain )
node . ObjectMeta . Labels [ unversioned . LabelZoneFailureDomain ] = zone . FailureDomain
}
if zone . Region != "" {
glog . Infof ( "Adding node label from cloud provider: %s=%s" , unversioned . LabelZoneRegion , zone . Region )
node . ObjectMeta . Labels [ unversioned . LabelZoneRegion ] = zone . Region
}
}
2015-09-21 18:06:38 +00:00
} else {
node . Spec . ExternalID = kl . hostname
2016-06-29 22:30:20 +00:00
if kl . autoDetectCloudProvider {
// If no cloud provider is defined - use the one detected by cadvisor
info , err := kl . GetCachedMachineInfo ( )
if err == nil {
kl . updateCloudProviderFromMachineInfo ( node , info )
}
2016-02-14 08:33:14 +00:00
}
2015-09-21 18:06:38 +00:00
}
if err := kl . setNodeStatus ( node ) ; err != nil {
return nil , err
}
2016-06-16 17:43:32 +00:00
2015-09-21 18:06:38 +00:00
return node , nil
}
// registerWithApiserver registers the node with the cluster master. It is safe
// to call multiple times, but not concurrently (kl.registrationCompleted is
// not locked).
func ( kl * Kubelet ) registerWithApiserver ( ) {
if kl . registrationCompleted {
return
}
step := 100 * time . Millisecond
for {
time . Sleep ( step )
step = step * 2
if step >= 7 * time . Second {
step = 7 * time . Second
}
node , err := kl . initialNodeStatus ( )
if err != nil {
glog . Errorf ( "Unable to construct api.Node object for kubelet: %v" , err )
continue
}
2016-06-16 17:43:32 +00:00
2015-09-21 18:06:38 +00:00
glog . V ( 2 ) . Infof ( "Attempting to register node %s" , node . Name )
2016-02-03 21:21:05 +00:00
if _ , err := kl . kubeClient . Core ( ) . Nodes ( ) . Create ( node ) ; err != nil {
2015-09-21 18:06:38 +00:00
if ! apierrors . IsAlreadyExists ( err ) {
glog . V ( 2 ) . Infof ( "Unable to register %s with the apiserver: %v" , node . Name , err )
continue
}
2016-02-03 21:21:05 +00:00
currentNode , err := kl . kubeClient . Core ( ) . Nodes ( ) . Get ( kl . nodeName )
2015-09-21 18:06:38 +00:00
if err != nil {
glog . Errorf ( "error getting node %q: %v" , kl . nodeName , err )
continue
}
if currentNode == nil {
glog . Errorf ( "no node instance returned for %q" , kl . nodeName )
continue
}
if currentNode . Spec . ExternalID == node . Spec . ExternalID {
glog . Infof ( "Node %s was previously registered" , node . Name )
kl . registrationCompleted = true
return
}
glog . Errorf (
"Previously %q had externalID %q; now it is %q; will delete and recreate." ,
kl . nodeName , node . Spec . ExternalID , currentNode . Spec . ExternalID ,
)
2016-02-03 21:21:05 +00:00
if err := kl . kubeClient . Core ( ) . Nodes ( ) . Delete ( node . Name , nil ) ; err != nil {
2015-09-21 18:06:38 +00:00
glog . Errorf ( "Unable to delete old node: %v" , err )
} else {
glog . Errorf ( "Deleted old node object %q" , kl . nodeName )
}
continue
}
glog . Infof ( "Successfully registered node %s" , node . Name )
kl . registrationCompleted = true
return
}
}
// syncNodeStatus should be called periodically from a goroutine.
// It synchronizes node status to master, registering the kubelet first if
// necessary.
func ( kl * Kubelet ) syncNodeStatus ( ) {
if kl . kubeClient == nil {
return
}
if kl . registerNode {
// This will exit immediately if it doesn't need to do anything.
kl . registerWithApiserver ( )
}
if err := kl . updateNodeStatus ( ) ; err != nil {
glog . Errorf ( "Unable to update node status: %v" , err )
}
}
2015-10-07 19:19:06 +00:00
// relabelVolumes relabels SELinux volumes to match the pod's
// SELinuxOptions specification. This is only needed if the pod uses
// hostPID or hostIPC. Otherwise relabeling is delegated to docker.
func ( kl * Kubelet ) relabelVolumes ( pod * api . Pod , volumes kubecontainer . VolumeMap ) error {
if pod . Spec . SecurityContext . SELinuxOptions == nil {
return nil
}
rootDirContext , err := kl . getRootDirContext ( )
if err != nil {
return err
}
2016-06-06 23:23:16 +00:00
selinuxRunner := selinux . NewSelinuxContextRunner ( )
2015-10-07 19:19:06 +00:00
// Apply the pod's Level to the rootDirContext
rootDirSELinuxOptions , err := securitycontext . ParseSELinuxOptions ( rootDirContext )
if err != nil {
return err
}
rootDirSELinuxOptions . Level = pod . Spec . SecurityContext . SELinuxOptions . Level
volumeContext := fmt . Sprintf ( "%s:%s:%s:%s" , rootDirSELinuxOptions . User , rootDirSELinuxOptions . Role , rootDirSELinuxOptions . Type , rootDirSELinuxOptions . Level )
2015-10-30 20:25:36 +00:00
for _ , vol := range volumes {
2016-03-23 05:12:21 +00:00
if vol . Mounter . GetAttributes ( ) . Managed && vol . Mounter . GetAttributes ( ) . SupportsSELinux {
2015-10-07 19:19:06 +00:00
// Relabel the volume and its content to match the 'Level' of the pod
2016-03-23 05:12:21 +00:00
err := filepath . Walk ( vol . Mounter . GetPath ( ) , func ( path string , info os . FileInfo , err error ) error {
2015-10-07 19:19:06 +00:00
if err != nil {
return err
}
2016-06-06 23:23:16 +00:00
return selinuxRunner . SetContext ( path , volumeContext )
2015-10-07 19:19:06 +00:00
} )
if err != nil {
return err
}
2015-10-30 20:25:36 +00:00
vol . SELinuxLabeled = true
2015-10-07 19:19:06 +00:00
}
}
return nil
}
2016-04-28 04:26:36 +00:00
// makeMounts determines the mount points for the given container.
2016-03-07 20:24:08 +00:00
func makeMounts ( pod * api . Pod , podDir string , container * api . Container , hostName , hostDomain , podIP string , podVolumes kubecontainer . VolumeMap ) ( [ ] kubecontainer . Mount , error ) {
2015-10-21 17:17:27 +00:00
// Kubernetes only mounts on /etc/hosts if :
// - container does not use hostNetwork and
// - container is not a infrastructure(pause) container
// - container is not already mounting on /etc/hosts
// When the pause container is being created, its IP is still unknown. Hence, PodIP will not have been set.
2016-03-07 20:24:08 +00:00
mountEtcHostsFile := ( pod . Spec . SecurityContext == nil || ! pod . Spec . SecurityContext . HostNetwork ) && len ( podIP ) > 0
glog . V ( 3 ) . Infof ( "container: %v/%v/%v podIP: %q creating hosts mount: %v" , pod . Namespace , pod . Name , container . Name , podIP , mountEtcHostsFile )
2015-10-21 17:17:27 +00:00
mounts := [ ] kubecontainer . Mount { }
2014-08-27 05:08:06 +00:00
for _ , mount := range container . VolumeMounts {
2015-10-21 17:17:27 +00:00
mountEtcHostsFile = mountEtcHostsFile && ( mount . MountPath != etcHostsPath )
2014-08-27 05:08:06 +00:00
vol , ok := podVolumes [ mount . Name ]
if ! ok {
2015-04-16 00:40:07 +00:00
glog . Warningf ( "Mount cannot be satisified for container %q, because the volume is missing: %q" , container . Name , mount )
2014-08-27 05:08:06 +00:00
continue
2014-06-19 23:59:48 +00:00
}
2015-10-07 19:19:06 +00:00
relabelVolume := false
// If the volume supports SELinux and it has not been
// relabeled already and it is not a read-only volume,
// relabel it and mark it as labeled
2016-03-23 05:12:21 +00:00
if vol . Mounter . GetAttributes ( ) . Managed && vol . Mounter . GetAttributes ( ) . SupportsSELinux && ! vol . SELinuxLabeled {
2015-10-07 19:19:06 +00:00
vol . SELinuxLabeled = true
relabelVolume = true
}
2016-03-05 01:40:15 +00:00
hostPath := vol . Mounter . GetPath ( )
if mount . SubPath != "" {
hostPath = filepath . Join ( hostPath , mount . SubPath )
}
2015-05-12 21:49:35 +00:00
mounts = append ( mounts , kubecontainer . Mount {
2015-10-07 19:19:06 +00:00
Name : mount . Name ,
ContainerPath : mount . MountPath ,
2016-03-05 01:40:15 +00:00
HostPath : hostPath ,
2015-10-07 19:19:06 +00:00
ReadOnly : mount . ReadOnly ,
SELinuxRelabel : relabelVolume ,
2015-05-12 21:49:35 +00:00
} )
}
2015-10-21 17:17:27 +00:00
if mountEtcHostsFile {
2016-03-07 20:24:08 +00:00
hostsMount , err := makeHostsMount ( podDir , podIP , hostName , hostDomain )
2015-10-21 17:17:27 +00:00
if err != nil {
return nil , err
}
mounts = append ( mounts , * hostsMount )
}
return mounts , nil
}
2016-04-28 04:26:36 +00:00
// makeHostsMount makes the mountpoint for the hosts file that the containers
// in a pod are injected with.
2016-02-02 18:59:54 +00:00
func makeHostsMount ( podDir , podIP , hostName , hostDomainName string ) ( * kubecontainer . Mount , error ) {
2015-10-21 17:17:27 +00:00
hostsFilePath := path . Join ( podDir , "etc-hosts" )
2016-02-02 18:59:54 +00:00
if err := ensureHostsFile ( hostsFilePath , podIP , hostName , hostDomainName ) ; err != nil {
2015-10-21 17:17:27 +00:00
return nil , err
}
return & kubecontainer . Mount {
Name : "k8s-managed-etc-hosts" ,
ContainerPath : etcHostsPath ,
HostPath : hostsFilePath ,
ReadOnly : false ,
} , nil
}
2016-04-28 04:26:36 +00:00
// ensureHostsFile ensures that the given host file has an up-to-date ip, host
// name, and domain name.
2016-02-02 18:59:54 +00:00
func ensureHostsFile ( fileName , hostIP , hostName , hostDomainName string ) error {
2015-10-21 17:17:27 +00:00
if _ , err := os . Stat ( fileName ) ; os . IsExist ( err ) {
glog . V ( 4 ) . Infof ( "kubernetes-managed etc-hosts file exits. Will not be recreated: %q" , fileName )
return nil
}
var buffer bytes . Buffer
buffer . WriteString ( "# Kubernetes-managed hosts file.\n" )
buffer . WriteString ( "127.0.0.1\tlocalhost\n" ) // ipv4 localhost
buffer . WriteString ( "::1\tlocalhost ip6-localhost ip6-loopback\n" ) // ipv6 localhost
buffer . WriteString ( "fe00::0\tip6-localnet\n" )
buffer . WriteString ( "fe00::0\tip6-mcastprefix\n" )
buffer . WriteString ( "fe00::1\tip6-allnodes\n" )
buffer . WriteString ( "fe00::2\tip6-allrouters\n" )
2016-02-02 18:59:54 +00:00
if len ( hostDomainName ) > 0 {
buffer . WriteString ( fmt . Sprintf ( "%s\t%s.%s\t%s\n" , hostIP , hostName , hostDomainName , hostName ) )
} else {
buffer . WriteString ( fmt . Sprintf ( "%s\t%s\n" , hostIP , hostName ) )
}
2015-10-21 17:17:27 +00:00
return ioutil . WriteFile ( fileName , buffer . Bytes ( ) , 0644 )
2015-05-12 21:49:35 +00:00
}
func makePortMappings ( container * api . Container ) ( ports [ ] kubecontainer . PortMapping ) {
names := make ( map [ string ] struct { } )
for _ , p := range container . Ports {
pm := kubecontainer . PortMapping {
2016-04-27 04:35:14 +00:00
HostPort : int ( p . HostPort ) ,
ContainerPort : int ( p . ContainerPort ) ,
2015-05-12 21:49:35 +00:00
Protocol : p . Protocol ,
HostIP : p . HostIP ,
2014-06-06 23:40:48 +00:00
}
2015-05-12 21:49:35 +00:00
// We need to create some default port name if it's not specified, since
// this is necessary for rkt.
2015-08-06 01:08:26 +00:00
// http://issue.k8s.io/7710
2015-05-12 21:49:35 +00:00
if p . Name == "" {
pm . Name = fmt . Sprintf ( "%s-%s:%d" , container . Name , p . Protocol , p . ContainerPort )
} else {
pm . Name = fmt . Sprintf ( "%s-%s" , container . Name , p . Name )
}
// Protect against exposing the same protocol-port more than once in a container.
if _ , ok := names [ pm . Name ] ; ok {
glog . Warningf ( "Port name conflicted, %q is defined more than once" , pm . Name )
continue
}
ports = append ( ports , pm )
names [ pm . Name ] = struct { } { }
2014-06-06 23:40:48 +00:00
}
2015-04-16 00:40:07 +00:00
return
2014-06-09 20:47:25 +00:00
}
2015-02-23 21:04:45 +00:00
2016-04-14 17:45:29 +00:00
func ( kl * Kubelet ) GeneratePodHostNameAndDomain ( pod * api . Pod ) ( string , string , error ) {
2016-02-02 18:59:54 +00:00
// TODO(vmarmol): Handle better.
// Cap hostname at 63 chars (specification is 64bytes which is 63 chars and the null terminating char).
2016-03-21 21:28:57 +00:00
clusterDomain := kl . clusterDomain
2016-02-02 18:59:54 +00:00
const hostnameMaxLen = 63
podAnnotations := pod . Annotations
if podAnnotations == nil {
podAnnotations = make ( map [ string ] string )
}
hostname := pod . Name
2016-04-14 17:45:29 +00:00
if len ( pod . Spec . Hostname ) > 0 {
2015-12-16 07:49:58 +00:00
if msgs := utilvalidation . IsDNS1123Label ( pod . Spec . Hostname ) ; len ( msgs ) != 0 {
return "" , "" , fmt . Errorf ( "Pod Hostname %q is not a valid DNS label: %s" , pod . Spec . Hostname , strings . Join ( msgs , ";" ) )
2016-04-14 17:45:29 +00:00
}
2015-12-16 07:49:58 +00:00
hostname = pod . Spec . Hostname
2016-04-14 17:45:29 +00:00
} else {
hostnameCandidate := podAnnotations [ utilpod . PodHostnameAnnotation ]
2015-12-16 07:49:58 +00:00
if len ( utilvalidation . IsDNS1123Label ( hostnameCandidate ) ) == 0 {
2016-04-14 17:45:29 +00:00
// use hostname annotation, if specified.
hostname = hostnameCandidate
}
2016-02-02 18:59:54 +00:00
}
if len ( hostname ) > hostnameMaxLen {
hostname = hostname [ : hostnameMaxLen ]
glog . Errorf ( "hostname for pod:%q was longer than %d. Truncated hostname to :%q" , pod . Name , hostnameMaxLen , hostname )
}
hostDomain := ""
2016-04-14 17:45:29 +00:00
if len ( pod . Spec . Subdomain ) > 0 {
2015-12-16 07:49:58 +00:00
if msgs := utilvalidation . IsDNS1123Label ( pod . Spec . Subdomain ) ; len ( msgs ) != 0 {
return "" , "" , fmt . Errorf ( "Pod Subdomain %q is not a valid DNS label: %s" , pod . Spec . Subdomain , strings . Join ( msgs , ";" ) )
2016-04-14 17:45:29 +00:00
}
2015-12-16 07:49:58 +00:00
hostDomain = fmt . Sprintf ( "%s.%s.svc.%s" , pod . Spec . Subdomain , pod . Namespace , clusterDomain )
2016-04-14 17:45:29 +00:00
} else {
subdomainCandidate := pod . Annotations [ utilpod . PodSubdomainAnnotation ]
2015-12-16 07:49:58 +00:00
if len ( utilvalidation . IsDNS1123Label ( subdomainCandidate ) ) == 0 {
2016-04-14 17:45:29 +00:00
hostDomain = fmt . Sprintf ( "%s.%s.svc.%s" , subdomainCandidate , pod . Namespace , clusterDomain )
}
2016-02-02 18:59:54 +00:00
}
2016-04-14 17:45:29 +00:00
return hostname , hostDomain , nil
2016-02-02 18:59:54 +00:00
}
2015-04-23 20:55:50 +00:00
// GenerateRunContainerOptions generates the RunContainerOptions, which can be used by
2015-03-26 18:59:41 +00:00
// the container runtime to set parameters for launching a container.
2016-03-07 20:24:08 +00:00
func ( kl * Kubelet ) GenerateRunContainerOptions ( pod * api . Pod , container * api . Container , podIP string ) ( * kubecontainer . RunContainerOptions , error ) {
2015-03-26 18:59:41 +00:00
var err error
2015-05-12 21:18:00 +00:00
opts := & kubecontainer . RunContainerOptions { CgroupParent : kl . cgroupRoot }
2016-04-14 17:45:29 +00:00
hostname , hostDomainName , err := kl . GeneratePodHostNameAndDomain ( pod )
if err != nil {
return nil , err
}
2016-02-02 18:59:54 +00:00
opts . Hostname = hostname
2016-06-17 19:36:56 +00:00
podName := volumehelper . GetUniquePodName ( pod )
volumes := kl . volumeManager . GetMountedVolumesForPod ( podName )
2015-05-12 21:49:35 +00:00
opts . PortMappings = makePortMappings ( container )
2015-10-07 19:19:06 +00:00
// Docker does not relabel volumes if the container is running
// in the host pid or ipc namespaces so the kubelet must
// relabel the volumes
if pod . Spec . SecurityContext != nil && ( pod . Spec . SecurityContext . HostIPC || pod . Spec . SecurityContext . HostPID ) {
2016-05-30 02:22:22 +00:00
err = kl . relabelVolumes ( pod , volumes )
2015-10-07 19:19:06 +00:00
if err != nil {
return nil , err
}
}
2016-05-30 02:22:22 +00:00
opts . Mounts , err = makeMounts ( pod , kl . getPodDir ( pod . UID ) , container , hostname , hostDomainName , podIP , volumes )
2015-10-21 17:17:27 +00:00
if err != nil {
return nil , err
}
2016-03-07 20:24:08 +00:00
opts . Envs , err = kl . makeEnvironmentVariables ( pod , container , podIP )
2014-06-06 23:40:48 +00:00
if err != nil {
2015-03-26 18:59:41 +00:00
return nil , err
2014-11-14 19:34:41 +00:00
}
2014-11-07 06:41:16 +00:00
if len ( container . TerminationMessagePath ) != 0 {
2014-11-23 15:47:25 +00:00
p := kl . getPodContainerDir ( pod . UID , container . Name )
2014-11-07 06:41:16 +00:00
if err := os . MkdirAll ( p , 0750 ) ; err != nil {
2015-01-06 00:38:47 +00:00
glog . Errorf ( "Error on creating %q: %v" , p , err )
2014-11-07 06:41:16 +00:00
} else {
2015-03-26 18:59:41 +00:00
opts . PodContainerDir = p
2014-11-07 06:41:16 +00:00
}
}
2015-07-28 18:54:32 +00:00
2016-01-28 23:57:38 +00:00
opts . DNS , opts . DNSSearch , err = kl . GetClusterDNS ( pod )
2015-07-28 18:54:32 +00:00
if err != nil {
return nil , err
2014-11-12 05:21:40 +00:00
}
2015-07-28 18:54:32 +00:00
2015-03-26 18:59:41 +00:00
return opts , nil
}
2015-09-09 17:45:01 +00:00
var masterServices = sets . NewString ( "kubernetes" )
2015-01-08 15:25:14 +00:00
// getServiceEnvVarMap makes a map[string]string of env vars for services a pod in namespace ns should see
func ( kl * Kubelet ) getServiceEnvVarMap ( ns string ) ( map [ string ] string , error ) {
var (
serviceMap = make ( map [ string ] api . Service )
m = make ( map [ string ] string )
)
// Get all service resources from the master (via a cache),
2015-07-03 20:29:14 +00:00
// and populate them into service environment variables.
2015-01-08 15:25:14 +00:00
if kl . serviceLister == nil {
// Kubelets without masters (e.g. plain GCE ContainerVM) don't set env vars.
return m , nil
}
services , err := kl . serviceLister . List ( )
if err != nil {
2015-02-16 17:33:20 +00:00
return m , fmt . Errorf ( "failed to list services when setting up env vars." )
2015-01-08 15:25:14 +00:00
}
// project the services in namespace ns onto the master services
for _ , service := range services . Items {
2015-05-23 20:41:11 +00:00
// ignore services where ClusterIP is "None" or empty
2015-03-16 21:36:30 +00:00
if ! api . IsServiceIPSet ( & service ) {
continue
}
2015-01-08 15:25:14 +00:00
serviceName := service . Name
switch service . Namespace {
// for the case whether the master service namespace is the namespace the pod
2015-01-28 17:00:53 +00:00
// is in, the pod should receive all the services in the namespace.
2015-01-08 15:25:14 +00:00
//
// ordering of the case clauses below enforces this
case ns :
serviceMap [ serviceName ] = service
case kl . masterServiceNamespace :
if masterServices . Has ( serviceName ) {
2015-06-02 18:46:57 +00:00
if _ , exists := serviceMap [ serviceName ] ; ! exists {
2015-01-08 15:25:14 +00:00
serviceMap [ serviceName ] = service
}
}
}
}
services . Items = [ ] api . Service { }
for _ , service := range serviceMap {
services . Items = append ( services . Items , service )
}
for _ , e := range envvars . FromServices ( & services ) {
m [ e . Name ] = e . Value
}
return m , nil
}
2016-04-28 04:26:36 +00:00
// Make the environment variables for a pod in the given namespace.
2016-03-07 20:24:08 +00:00
func ( kl * Kubelet ) makeEnvironmentVariables ( pod * api . Pod , container * api . Container , podIP string ) ( [ ] kubecontainer . EnvVar , error ) {
2015-05-12 21:49:35 +00:00
var result [ ] kubecontainer . EnvVar
2016-04-25 04:49:48 +00:00
// Note: These are added to the docker Config, but are not included in the checksum computed
2015-01-08 15:25:14 +00:00
// by dockertools.BuildDockerName(...). That way, we can still determine whether an
// api.Container is already running by its hash. (We don't want to restart a container just
// because some service changed.)
//
// Note that there is a race between Kubelet seeing the pod and kubelet seeing the service.
// To avoid this users can: (1) wait between starting a service and starting; or (2) detect
// missing service env var and exit and be restarted; or (3) use DNS instead of env vars
// and keep trying to resolve the DNS name of the service (recommended).
2015-04-23 20:57:30 +00:00
serviceEnv , err := kl . getServiceEnvVarMap ( pod . Namespace )
2015-01-08 15:25:14 +00:00
if err != nil {
return result , err
}
2015-05-22 22:21:03 +00:00
// Determine the final values of variables:
//
// 1. Determine the final value of each variable:
// a. If the variable's Value is set, expand the `$(var)` references to other
// variables in the .Value field; the sources of variables are the declared
// variables of the container and the service environment variables
// b. If a source is defined for an environment variable, resolve the source
// 2. Create the container's environment in the order variables are declared
// 3. Add remaining service environment vars
2015-12-17 20:51:51 +00:00
var (
tmpEnv = make ( map [ string ] string )
2016-01-15 16:48:36 +00:00
configMaps = make ( map [ string ] * api . ConfigMap )
2016-01-18 17:20:51 +00:00
secrets = make ( map [ string ] * api . Secret )
2015-12-17 20:51:51 +00:00
mappingFunc = expansion . MappingFuncFor ( tmpEnv , serviceEnv )
)
2015-05-22 22:21:03 +00:00
for _ , envVar := range container . Env {
2015-03-12 14:39:22 +00:00
// Accesses apiserver+Pods.
2015-01-08 15:25:14 +00:00
// So, the master may set service env vars, or kubelet may. In case both are doing
// it, we delete the key from the kubelet-generated ones so we don't have duplicate
// env vars.
// TODO: remove this net line once all platforms use apiserver+Pods.
2015-05-22 22:21:03 +00:00
delete ( serviceEnv , envVar . Name )
runtimeVal := envVar . Value
if runtimeVal != "" {
// Step 1a: expand variable references
runtimeVal = expansion . Expand ( runtimeVal , mappingFunc )
2015-12-17 20:51:51 +00:00
} else if envVar . ValueFrom != nil {
2015-05-22 22:21:03 +00:00
// Step 1b: resolve alternate env var sources
2015-12-17 20:51:51 +00:00
switch {
case envVar . ValueFrom . FieldRef != nil :
2016-03-07 20:24:08 +00:00
runtimeVal , err = kl . podFieldSelectorRuntimeValue ( envVar . ValueFrom . FieldRef , pod , podIP )
2015-12-17 20:51:51 +00:00
if err != nil {
return result , err
}
2016-05-23 22:08:22 +00:00
case envVar . ValueFrom . ResourceFieldRef != nil :
2016-06-16 17:43:32 +00:00
defaultedPod , defaultedContainer , err := kl . defaultPodLimitsForDownwardApi ( pod , container )
if err != nil {
return result , err
}
runtimeVal , err = containerResourceRuntimeValue ( envVar . ValueFrom . ResourceFieldRef , defaultedPod , defaultedContainer )
2016-05-23 22:08:22 +00:00
if err != nil {
return result , err
}
2015-12-17 20:51:51 +00:00
case envVar . ValueFrom . ConfigMapKeyRef != nil :
name := envVar . ValueFrom . ConfigMapKeyRef . Name
key := envVar . ValueFrom . ConfigMapKeyRef . Key
configMap , ok := configMaps [ name ]
if ! ok {
2016-02-03 21:21:05 +00:00
configMap , err = kl . kubeClient . Core ( ) . ConfigMaps ( pod . Namespace ) . Get ( name )
2015-12-17 20:51:51 +00:00
if err != nil {
return result , err
}
}
runtimeVal , ok = configMap . Data [ key ]
if ! ok {
return result , fmt . Errorf ( "Couldn't find key %v in ConfigMap %v/%v" , key , pod . Namespace , name )
}
2016-01-18 17:20:51 +00:00
case envVar . ValueFrom . SecretKeyRef != nil :
name := envVar . ValueFrom . SecretKeyRef . Name
key := envVar . ValueFrom . SecretKeyRef . Key
secret , ok := secrets [ name ]
if ! ok {
2016-02-03 21:21:05 +00:00
secret , err = kl . kubeClient . Core ( ) . Secrets ( pod . Namespace ) . Get ( name )
2016-01-18 17:20:51 +00:00
if err != nil {
return result , err
}
}
runtimeValBytes , ok := secret . Data [ key ]
if ! ok {
return result , fmt . Errorf ( "Couldn't find key %v in Secret %v/%v" , key , pod . Namespace , name )
}
runtimeVal = string ( runtimeValBytes )
2015-05-22 22:21:03 +00:00
}
2015-04-23 20:57:30 +00:00
}
2015-05-22 22:21:03 +00:00
tmpEnv [ envVar . Name ] = runtimeVal
result = append ( result , kubecontainer . EnvVar { Name : envVar . Name , Value : tmpEnv [ envVar . Name ] } )
2015-01-08 15:25:14 +00:00
}
// Append remaining service env vars.
for k , v := range serviceEnv {
2015-05-12 21:49:35 +00:00
result = append ( result , kubecontainer . EnvVar { Name : k , Value : v } )
2015-01-08 15:25:14 +00:00
}
return result , nil
}
2016-04-28 04:26:36 +00:00
// podFieldSelectorRuntimeValue returns the runtime value of the given
// selector for a pod.
2016-03-07 20:24:08 +00:00
func ( kl * Kubelet ) podFieldSelectorRuntimeValue ( fs * api . ObjectFieldSelector , pod * api . Pod , podIP string ) ( string , error ) {
2015-04-23 20:57:30 +00:00
internalFieldPath , _ , err := api . Scheme . ConvertFieldLabel ( fs . APIVersion , "Pod" , fs . FieldPath , "" )
if err != nil {
return "" , err
}
2015-08-12 18:14:49 +00:00
switch internalFieldPath {
case "status.podIP" :
2016-03-07 20:24:08 +00:00
return podIP , nil
2015-08-12 18:14:49 +00:00
}
2015-04-23 20:57:30 +00:00
return fieldpath . ExtractFieldPathAsString ( pod , internalFieldPath )
}
2016-05-23 22:08:22 +00:00
// containerResourceRuntimeValue returns the value of the provided container resource
func containerResourceRuntimeValue ( fs * api . ResourceFieldSelector , pod * api . Pod , container * api . Container ) ( string , error ) {
containerName := fs . ContainerName
if len ( containerName ) == 0 {
return fieldpath . ExtractContainerResourceValue ( fs , container )
} else {
return fieldpath . ExtractResourceValueByContainerName ( fs , pod , containerName )
}
}
2016-01-28 23:57:38 +00:00
// GetClusterDNS returns a list of the DNS servers and a list of the DNS search
2015-03-26 18:59:41 +00:00
// domains of the cluster.
2016-01-28 23:57:38 +00:00
func ( kl * Kubelet ) GetClusterDNS ( pod * api . Pod ) ( [ ] string , [ ] string , error ) {
2015-07-28 18:54:32 +00:00
var hostDNS , hostSearch [ ] string
2015-10-14 17:34:29 +00:00
// Get host DNS settings
2015-07-28 18:54:32 +00:00
if kl . resolverConfig != "" {
f , err := os . Open ( kl . resolverConfig )
if err != nil {
return nil , nil , err
}
defer f . Close ( )
2014-11-12 05:21:40 +00:00
2015-10-24 00:01:49 +00:00
hostDNS , hostSearch , err = kl . parseResolvConf ( f )
2015-07-28 18:54:32 +00:00
if err != nil {
return nil , nil , err
}
}
2015-10-14 17:34:29 +00:00
useClusterFirstPolicy := pod . Spec . DNSPolicy == api . DNSClusterFirst
if useClusterFirstPolicy && kl . clusterDNS == nil {
// clusterDNS is not known.
// pod with ClusterDNSFirst Policy cannot be created
kl . recorder . Eventf ( pod , api . EventTypeWarning , "MissingClusterDNS" , "kubelet does not have ClusterDNS IP configured and cannot create Pod using %q policy. Falling back to DNSDefault policy." , pod . Spec . DNSPolicy )
2015-12-21 19:25:38 +00:00
log := fmt . Sprintf ( "kubelet does not have ClusterDNS IP configured and cannot create Pod using %q policy. pod: %q. Falling back to DNSDefault policy." , pod . Spec . DNSPolicy , format . Pod ( pod ) )
2015-10-14 17:34:29 +00:00
kl . recorder . Eventf ( kl . nodeRef , api . EventTypeWarning , "MissingClusterDNS" , log )
// fallback to DNSDefault
useClusterFirstPolicy = false
}
if ! useClusterFirstPolicy {
2015-07-28 18:54:32 +00:00
// When the kubelet --resolv-conf flag is set to the empty string, use
// DNS settings that override the docker default (which is to use
// /etc/resolv.conf) and effectivly disable DNS lookups. According to
// the bind documentation, the behavior of the DNS client library when
// "nameservers" are not specified is to "use the nameserver on the
// local machine". A nameserver setting of localhost is equivalent to
// this documented behavior.
if kl . resolverConfig == "" {
hostDNS = [ ] string { "127.0.0.1" }
hostSearch = [ ] string { "." }
}
return hostDNS , hostSearch , nil
2014-11-12 05:21:40 +00:00
}
2015-03-26 18:59:41 +00:00
2015-10-14 17:34:29 +00:00
// for a pod with DNSClusterFirst policy, the cluster DNS server is the only nameserver configured for
// the pod. The cluster DNS server itself will forward queries to other nameservers that is configured to use,
// in case the cluster DNS server cannot resolve the DNS query itself
dns := [ ] string { kl . clusterDNS . String ( ) }
var dnsSearch [ ] string
2014-11-12 05:21:40 +00:00
if kl . clusterDomain != "" {
2015-06-01 19:21:39 +00:00
nsSvcDomain := fmt . Sprintf ( "%s.svc.%s" , pod . Namespace , kl . clusterDomain )
svcDomain := fmt . Sprintf ( "svc.%s" , kl . clusterDomain )
dnsSearch = append ( [ ] string { nsSvcDomain , svcDomain , kl . clusterDomain } , hostSearch ... )
2015-07-28 18:54:32 +00:00
} else {
dnsSearch = hostSearch
2014-11-12 05:21:40 +00:00
}
2015-03-26 18:59:41 +00:00
return dns , dnsSearch , nil
2014-11-12 05:21:40 +00:00
}
2016-01-20 21:26:02 +00:00
// One of the following aruguements must be non-nil: runningPod, status.
2016-02-12 19:33:32 +00:00
// TODO: Modify containerRuntime.KillPod() to accept the right arguments.
2016-04-27 03:30:59 +00:00
func ( kl * Kubelet ) killPod ( pod * api . Pod , runningPod * kubecontainer . Pod , status * kubecontainer . PodStatus , gracePeriodOverride * int64 ) error {
2016-01-20 21:26:02 +00:00
var p kubecontainer . Pod
if runningPod != nil {
p = * runningPod
} else if status != nil {
p = kubecontainer . ConvertPodStatusToRunningPod ( status )
}
2016-04-27 03:30:59 +00:00
return kl . containerRuntime . KillPod ( pod , p , gracePeriodOverride )
2014-08-08 04:49:17 +00:00
}
2015-03-04 01:33:48 +00:00
// makePodDataDirs creates the dirs for the pod datas.
2015-03-13 13:19:07 +00:00
func ( kl * Kubelet ) makePodDataDirs ( pod * api . Pod ) error {
2015-03-04 01:33:48 +00:00
uid := pod . UID
2016-05-21 21:23:18 +00:00
if err := os . MkdirAll ( kl . getPodDir ( uid ) , 0750 ) ; err != nil && ! os . IsExist ( err ) {
2015-03-04 01:33:48 +00:00
return err
}
2016-05-21 21:23:18 +00:00
if err := os . MkdirAll ( kl . getPodVolumesDir ( uid ) , 0750 ) ; err != nil && ! os . IsExist ( err ) {
2015-03-04 01:33:48 +00:00
return err
}
2016-05-21 21:23:18 +00:00
if err := os . MkdirAll ( kl . getPodPluginsDir ( uid ) , 0750 ) ; err != nil && ! os . IsExist ( err ) {
2015-03-04 01:33:48 +00:00
return err
}
return nil
}
2016-04-28 04:26:36 +00:00
// syncPod is the transaction script for the sync of a single pod.
//
// Arguments:
//
// pod - the pod to sync
// mirrorPod - the mirror pod for the pod to sync, if it is a static pod
// podStatus - the current status (TODO: always from the status manager?)
2016-06-14 09:29:18 +00:00
// updateType - the type of update (ADD, UPDATE, REMOVE, RECONCILE, DELETE)
2016-04-28 04:26:36 +00:00
//
// The workflow is:
// * If the pod is being created, record pod worker start latency
// * Call generateAPIPodStatus to prepare an api.PodStatus for the pod
// * If the pod is being seen as running for the first time, record pod
// start latency
// * Update the status of the pod in the status manager
// * Kill the pod if it should not be running
// * Create a mirror pod if the pod is a static pod, and does not
// already have a mirror pod
// * Create the data directories for the pod if they do not exist
2016-05-30 02:22:22 +00:00
// * Wait for volumes to attach/mount
2016-04-28 04:26:36 +00:00
// * Fetch the pull secrets for the pod
// * Call the container runtime's SyncPod callback
// * Update the traffic shaping for the pod's ingress and egress limits
//
// If any step if this workflow errors, the error is returned, and is repeated
// on the next syncPod call.
2016-05-06 18:07:24 +00:00
func ( kl * Kubelet ) syncPod ( o syncPodOptions ) error {
// pull out the required options
pod := o . pod
mirrorPod := o . mirrorPod
podStatus := o . podStatus
updateType := o . updateType
// if we want to kill a pod, do it now!
if updateType == kubetypes . SyncPodKill {
killPodOptions := o . killPodOptions
if killPodOptions == nil || killPodOptions . PodStatusFunc == nil {
return fmt . Errorf ( "kill pod options are required if update type is kill" )
}
apiPodStatus := killPodOptions . PodStatusFunc ( pod , podStatus )
kl . statusManager . SetPodStatus ( pod , apiPodStatus )
// we kill the pod with the specified grace period since this is a termination
if err := kl . killPod ( pod , nil , podStatus , killPodOptions . PodTerminationGracePeriodSecondsOverride ) ; err != nil {
// there was an error killing the pod, so we return that error directly
utilruntime . HandleError ( err )
return err
}
return nil
}
2016-04-28 04:26:36 +00:00
// Latency measurements for the main workflow are relative to the
// (first time the pod was seen by the API server.
2015-06-05 19:42:23 +00:00
var firstSeenTime time . Time
2016-01-20 02:15:10 +00:00
if firstSeenTimeStr , ok := pod . Annotations [ kubetypes . ConfigFirstSeenAnnotationKey ] ; ok {
2015-10-09 17:24:31 +00:00
firstSeenTime = kubetypes . ConvertToTimestamp ( firstSeenTimeStr ) . Get ( )
2015-06-05 19:42:23 +00:00
}
2015-03-09 14:23:52 +00:00
2016-04-28 04:26:36 +00:00
// Record pod worker start latency if being created
// TODO: make pod workers record their own latencies
2016-01-20 02:15:10 +00:00
if updateType == kubetypes . SyncPodCreate {
if ! firstSeenTime . IsZero ( ) {
// This is the first time we are syncing the pod. Record the latency
// since kubelet first saw the pod if firstSeenTime is set.
metrics . PodWorkerStartLatency . Observe ( metrics . SinceInMicroseconds ( firstSeenTime ) )
2015-03-09 14:23:52 +00:00
} else {
2016-01-20 02:15:10 +00:00
glog . V ( 3 ) . Infof ( "First seen time not recorded for pod %q" , pod . UID )
2015-03-09 14:23:52 +00:00
}
2016-01-20 02:15:10 +00:00
}
2016-04-28 04:26:36 +00:00
// Generate final API pod status with pod and status manager status
2016-02-13 05:56:12 +00:00
apiPodStatus := kl . generateAPIPodStatus ( pod , podStatus )
2016-04-21 22:32:11 +00:00
// The pod IP may be changed in generateAPIPodStatus if the pod is using host network. (See #24576)
// TODO(random-liu): After writing pod spec into container labels, check whether pod is using host network, and
// set pod IP to hostIP directly in runtime.GetPodStatus
podStatus . IP = apiPodStatus . PodIP
2016-04-28 04:26:36 +00:00
2016-01-20 02:15:10 +00:00
// Record the time it takes for the pod to become running.
existingStatus , ok := kl . statusManager . GetPodStatus ( pod . UID )
2016-01-20 21:26:02 +00:00
if ! ok || existingStatus . Phase == api . PodPending && apiPodStatus . Phase == api . PodRunning &&
2016-01-20 02:15:10 +00:00
! firstSeenTime . IsZero ( ) {
metrics . PodStartLatency . Observe ( metrics . SinceInMicroseconds ( firstSeenTime ) )
}
2016-03-29 03:08:54 +00:00
2016-04-28 04:26:36 +00:00
// Update status in the status manager
2016-01-20 02:15:10 +00:00
kl . statusManager . SetPodStatus ( pod , apiPodStatus )
2015-03-09 14:23:52 +00:00
2016-04-28 04:26:36 +00:00
// Kill pod if it should not be running
2016-05-10 13:32:03 +00:00
if errOuter := canRunPod ( pod ) ; errOuter != nil || pod . DeletionTimestamp != nil || apiPodStatus . Phase == api . PodFailed {
if errInner := kl . killPod ( pod , nil , podStatus , nil ) ; errInner != nil {
errOuter = fmt . Errorf ( "error killing pod: %v" , errInner )
utilruntime . HandleError ( errOuter )
2015-08-20 01:57:58 +00:00
}
2016-05-06 18:07:24 +00:00
// there was no error killing the pod, but the pod cannot be run, so we return that err (if any)
2016-05-10 13:32:03 +00:00
return errOuter
2015-03-26 18:25:48 +00:00
}
2015-11-04 18:50:43 +00:00
// Create Mirror Pod for Static Pod if it doesn't already exist
if kubepod . IsStaticPod ( pod ) {
2015-12-07 21:31:02 +00:00
podFullName := kubecontainer . GetPodFullName ( pod )
2016-01-21 19:55:37 +00:00
deleted := false
2016-03-07 05:42:41 +00:00
if mirrorPod != nil {
if mirrorPod . DeletionTimestamp != nil || ! kl . podManager . IsMirrorPodOf ( mirrorPod , pod ) {
// The mirror pod is semantically different from the static pod. Remove
// it. The mirror pod will get recreated later.
glog . Errorf ( "Deleting mirror pod %q because it is outdated" , format . Pod ( mirrorPod ) )
if err := kl . podManager . DeleteMirrorPod ( podFullName ) ; err != nil {
glog . Errorf ( "Failed deleting mirror pod %q: %v" , format . Pod ( mirrorPod ) , err )
} else {
deleted = true
}
2015-11-04 18:50:43 +00:00
}
}
2016-01-21 19:55:37 +00:00
if mirrorPod == nil || deleted {
2015-12-07 21:31:02 +00:00
glog . V ( 3 ) . Infof ( "Creating a mirror pod for static pod %q" , format . Pod ( pod ) )
2015-11-04 18:50:43 +00:00
if err := kl . podManager . CreateMirrorPod ( pod ) ; err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Failed creating a mirror pod for %q: %v" , format . Pod ( pod ) , err )
2015-11-04 18:50:43 +00:00
}
}
}
2016-04-28 04:26:36 +00:00
// Make data directories for the pod
2015-04-17 23:12:08 +00:00
if err := kl . makePodDataDirs ( pod ) ; err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Unable to make pod data directories for pod %q: %v" , format . Pod ( pod ) , err )
2015-04-17 23:12:08 +00:00
return err
}
2016-05-30 02:22:22 +00:00
// Wait for volumes to attach/mount
2016-06-16 17:43:32 +00:00
defaultedPod , _ , err := kl . defaultPodLimitsForDownwardApi ( pod , nil )
if err != nil {
return err
}
if err := kl . volumeManager . WaitForAttachAndMount ( defaultedPod ) ; err != nil {
2016-07-08 20:53:33 +00:00
kl . recorder . Eventf ( pod , api . EventTypeWarning , kubecontainer . FailedMountVolume , "Unable to mount volumes for pod %q: %v" , format . Pod ( pod ) , err )
glog . Errorf ( "Unable to mount volumes for pod %q: %v; skipping pod" , format . Pod ( pod ) , err )
return err
2015-04-16 00:40:07 +00:00
}
2016-04-28 04:26:36 +00:00
// Fetch the pull secrets for the pod
2015-05-08 17:53:00 +00:00
pullSecrets , err := kl . getPullSecretsForPod ( pod )
if err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Unable to get pull secrets for pod %q: %v" , format . Pod ( pod ) , err )
2015-05-08 17:53:00 +00:00
return err
}
2016-04-28 04:26:36 +00:00
// Call the container runtime's SyncPod callback
2016-01-12 10:19:13 +00:00
result := kl . containerRuntime . SyncPod ( pod , apiPodStatus , podStatus , pullSecrets , kl . backOff )
2016-01-12 21:28:00 +00:00
kl . reasonCache . Update ( pod . UID , result )
if err = result . Error ( ) ; err != nil {
2015-05-01 01:37:15 +00:00
return err
2015-03-10 14:09:55 +00:00
}
2016-04-28 04:26:36 +00:00
// early successful exit if pod is not bandwidth-constrained
2016-03-22 16:43:13 +00:00
if ! kl . shapingEnabled ( ) {
return nil
}
2016-04-28 04:26:36 +00:00
// Update the traffic shaping for the pod's ingress and egress limits
2016-04-01 17:00:05 +00:00
ingress , egress , err := bandwidth . ExtractPodBandwidthResources ( pod . Annotations )
2015-08-10 22:08:31 +00:00
if err != nil {
return err
}
if egress != nil || ingress != nil {
2015-09-14 21:56:51 +00:00
if podUsesHostNetwork ( pod ) {
2015-11-13 22:30:01 +00:00
kl . recorder . Event ( pod , api . EventTypeWarning , kubecontainer . HostNetworkNotSupported , "Bandwidth shaping is not currently supported on the host network" )
2015-08-10 22:08:31 +00:00
} else if kl . shaper != nil {
2016-01-18 08:23:48 +00:00
if len ( apiPodStatus . PodIP ) > 0 {
err = kl . shaper . ReconcileCIDR ( fmt . Sprintf ( "%s/32" , apiPodStatus . PodIP ) , egress , ingress )
2015-08-10 22:08:31 +00:00
}
} else {
2015-11-13 22:30:01 +00:00
kl . recorder . Event ( pod , api . EventTypeWarning , kubecontainer . UndefinedShaper , "Pod requests bandwidth shaping, but the shaper is undefined" )
2015-08-10 22:08:31 +00:00
}
}
2014-07-01 05:27:56 +00:00
return nil
}
2016-04-28 04:26:36 +00:00
// returns whether the pod uses the host network namespace.
2015-09-14 21:56:51 +00:00
func podUsesHostNetwork ( pod * api . Pod ) bool {
return pod . Spec . SecurityContext != nil && pod . Spec . SecurityContext . HostNetwork
}
2015-05-08 17:53:00 +00:00
// getPullSecretsForPod inspects the Pod and retrieves the referenced pull secrets
// TODO duplicate secrets are being retrieved multiple times and there is no cache. Creating and using a secret manager interface will make this easier to address.
func ( kl * Kubelet ) getPullSecretsForPod ( pod * api . Pod ) ( [ ] api . Secret , error ) {
pullSecrets := [ ] api . Secret { }
for _ , secretRef := range pod . Spec . ImagePullSecrets {
2016-02-03 21:21:05 +00:00
secret , err := kl . kubeClient . Core ( ) . Secrets ( pod . Namespace ) . Get ( secretRef . Name )
2015-05-08 17:53:00 +00:00
if err != nil {
2015-08-14 16:51:28 +00:00
glog . Warningf ( "Unable to retrieve pull secret %s/%s for %s/%s due to %v. The image pull may not succeed." , pod . Namespace , secretRef . Name , pod . Namespace , pod . Name , err )
continue
2015-05-08 17:53:00 +00:00
}
pullSecrets = append ( pullSecrets , * secret )
}
return pullSecrets , nil
}
2016-04-28 04:26:36 +00:00
// cleanupOrphanedPodDirs removes the volumes of pods that should not be
// running and that have no containers running.
2016-05-30 02:22:22 +00:00
func ( kl * Kubelet ) cleanupOrphanedPodDirs (
pods [ ] * api . Pod , runningPods [ ] * kubecontainer . Pod ) error {
allPods := sets . NewString ( )
2015-04-03 22:51:50 +00:00
for _ , pod := range pods {
2016-05-30 02:22:22 +00:00
allPods . Insert ( string ( pod . UID ) )
2015-01-12 00:42:11 +00:00
}
2015-07-01 22:25:41 +00:00
for _ , pod := range runningPods {
2016-05-30 02:22:22 +00:00
allPods . Insert ( string ( pod . ID ) )
2015-07-01 22:25:41 +00:00
}
2015-01-12 00:42:11 +00:00
found , err := kl . listPodsFromDisk ( )
if err != nil {
return err
}
errlist := [ ] error { }
2015-08-28 05:17:57 +00:00
for _ , uid := range found {
2016-05-30 02:22:22 +00:00
if allPods . Has ( string ( uid ) ) {
2015-08-28 05:17:57 +00:00
continue
}
2016-05-30 02:22:22 +00:00
if podVolumesExist := kl . podVolumesExist ( uid ) ; podVolumesExist {
// If volumes have not been unmounted/detached, do not delete directory.
// Doing so may result in corruption of data.
glog . V ( 3 ) . Infof ( "Orphaned pod %q found, but volumes are not cleaned up; err: %v" , uid , err )
2015-08-28 05:17:57 +00:00
continue
}
2015-09-01 22:32:03 +00:00
2015-08-28 05:17:57 +00:00
glog . V ( 3 ) . Infof ( "Orphaned pod %q found, removing" , uid )
if err := os . RemoveAll ( kl . getPodDir ( uid ) ) ; err != nil {
2016-05-30 02:22:22 +00:00
glog . Infof ( "Failed to remove orphaned pod %q dir; err: %v" , uid , err )
2015-08-28 05:17:57 +00:00
errlist = append ( errlist , err )
2015-01-12 00:42:11 +00:00
}
}
2015-10-14 05:18:37 +00:00
return utilerrors . NewAggregate ( errlist )
2015-01-12 00:42:11 +00:00
}
2015-11-05 05:59:15 +00:00
// Get pods which should be resynchronized. Currently, the following pod should be resynchronized:
// * pod whose work is ready.
2016-04-15 18:17:17 +00:00
// * internal modules that request sync of a pod.
2015-11-05 05:59:15 +00:00
func ( kl * Kubelet ) getPodsToSync ( ) [ ] * api . Pod {
allPods := kl . podManager . GetPods ( )
podUIDs := kl . workQueue . GetWork ( )
podUIDSet := sets . NewString ( )
for _ , podUID := range podUIDs {
podUIDSet . Insert ( string ( podUID ) )
}
var podsToSync [ ] * api . Pod
for _ , pod := range allPods {
if podUIDSet . Has ( string ( pod . UID ) ) {
// The work of the pod is ready
podsToSync = append ( podsToSync , pod )
2016-04-15 18:17:17 +00:00
continue
}
for _ , podSyncLoopHandler := range kl . PodSyncLoopHandlers {
if podSyncLoopHandler . ShouldSync ( pod ) {
podsToSync = append ( podsToSync , pod )
break
}
2015-11-05 05:59:15 +00:00
}
}
return podsToSync
}
2015-08-19 00:52:26 +00:00
// Returns true if pod is in the terminated state ("Failed" or "Succeeded").
func ( kl * Kubelet ) podIsTerminated ( pod * api . Pod ) bool {
var status api . PodStatus
// Check the cached pod status which was set after the last sync.
status , ok := kl . statusManager . GetPodStatus ( pod . UID )
if ! ok {
// If there is no cached status, use the status from the
// apiserver. This is useful if kubelet has recently been
// restarted.
status = pod . Status
}
2015-05-16 00:01:56 +00:00
if status . Phase == api . PodFailed || status . Phase == api . PodSucceeded {
return true
}
2015-08-19 00:52:26 +00:00
2015-05-16 00:01:56 +00:00
return false
2015-05-09 05:01:43 +00:00
}
2016-04-28 04:26:36 +00:00
// filterOutTerminatedPods returns the given pods which the status manager
// does not consider failed or succeeded.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) filterOutTerminatedPods ( pods [ ] * api . Pod ) [ ] * api . Pod {
var filteredPods [ ] * api . Pod
for _ , p := range pods {
if kl . podIsTerminated ( p ) {
2015-04-24 18:20:23 +00:00
continue
}
2015-08-19 00:52:26 +00:00
filteredPods = append ( filteredPods , p )
2015-04-24 18:20:23 +00:00
}
2015-08-19 00:52:26 +00:00
return filteredPods
2015-08-11 23:25:17 +00:00
}
2014-07-15 17:26:56 +00:00
2015-08-11 23:25:17 +00:00
// removeOrphanedPodStatuses removes obsolete entries in podStatus where
// the pod is no longer considered bound to this node.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) removeOrphanedPodStatuses ( pods [ ] * api . Pod , mirrorPods [ ] * api . Pod ) {
2015-08-18 20:26:56 +00:00
podUIDs := make ( map [ types . UID ] bool )
2015-08-11 23:25:17 +00:00
for _ , pod := range pods {
2015-08-18 20:26:56 +00:00
podUIDs [ pod . UID ] = true
2015-08-11 23:25:17 +00:00
}
2015-08-18 20:26:56 +00:00
for _ , pod := range mirrorPods {
podUIDs [ pod . UID ] = true
}
kl . statusManager . RemoveOrphanedStatuses ( podUIDs )
2015-08-11 23:25:17 +00:00
}
2016-04-28 04:26:36 +00:00
// deletePod deletes the pod from the internal state of the kubelet by:
// 1. stopping the associated pod worker asynchronously
// 2. signaling to kill the pod by sending on the podKillingCh channel
//
// deletePod returns an error if not all sources are ready or the pod is not
// found in the runtime cache.
2016-01-31 23:56:55 +00:00
func ( kl * Kubelet ) deletePod ( pod * api . Pod ) error {
if pod == nil {
return fmt . Errorf ( "deletePod does not allow nil pod" )
}
2016-04-26 17:58:12 +00:00
if ! kl . sourcesReady . AllReady ( ) {
2015-08-19 00:52:26 +00:00
// If the sources aren't ready, skip deletion, as we may accidentally delete pods
// for sources that haven't reported yet.
return fmt . Errorf ( "skipping delete because sources aren't ready yet" )
2014-07-01 16:37:45 +00:00
}
2016-01-31 23:56:55 +00:00
kl . podWorkers . ForgetWorker ( pod . UID )
2015-08-11 23:25:17 +00:00
2015-08-19 00:52:26 +00:00
// Runtime cache may not have been updated to with the pod, but it's okay
// because the periodic cleanup routine will attempt to delete again later.
runningPods , err := kl . runtimeCache . GetPods ( )
if err != nil {
return fmt . Errorf ( "error listing containers: %v" , err )
2015-08-11 23:25:17 +00:00
}
2016-01-31 23:56:55 +00:00
runningPod := kubecontainer . Pods ( runningPods ) . FindPod ( "" , pod . UID )
if runningPod . IsEmpty ( ) {
2015-08-19 00:52:26 +00:00
return fmt . Errorf ( "pod not found" )
}
2016-03-23 23:45:24 +00:00
podPair := kubecontainer . PodPair { APIPod : pod , RunningPod : & runningPod }
2015-08-19 00:52:26 +00:00
2016-01-31 23:56:55 +00:00
kl . podKillingCh <- & podPair
2015-08-19 00:52:26 +00:00
// TODO: delete the mirror pod here?
2015-02-27 09:19:41 +00:00
2015-08-19 00:52:26 +00:00
// We leave the volume/directory cleanup to the periodic cleanup routine.
return nil
}
2016-04-28 04:26:36 +00:00
// empty is a placeholder type used to implement a set
type empty struct { }
2015-08-19 00:52:26 +00:00
// HandlePodCleanups performs a series of cleanup work, including terminating
// pod workers, killing unwanted pods, and removing orphaned volumes/pod
// directories.
2016-07-11 05:40:45 +00:00
// NOTE: This function is executed by the main sync loop, so it
// should not contain any blocking calls.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) HandlePodCleanups ( ) error {
allPods , mirrorPods := kl . podManager . GetPodsAndMirrorPods ( )
// Pod phase progresses monotonically. Once a pod has reached a final state,
// it should never leave regardless of the restart policy. The statuses
// of such pods should not be changed, and there is no need to sync them.
// TODO: the logic here does not handle two cases:
// 1. If the containers were removed immediately after they died, kubelet
// may fail to generate correct statuses, let alone filtering correctly.
// 2. If kubelet restarted before writing the terminated status for a pod
// to the apiserver, it could still restart the terminated pod (even
// though the pod was not considered terminated by the apiserver).
// These two conditions could be alleviated by checkpointing kubelet.
activePods := kl . filterOutTerminatedPods ( allPods )
desiredPods := make ( map [ types . UID ] empty )
for _ , pod := range activePods {
desiredPods [ pod . UID ] = empty { }
}
// Stop the workers for no-longer existing pods.
// TODO: is here the best place to forget pod workers?
kl . podWorkers . ForgetNonExistingPodWorkers ( desiredPods )
2015-08-25 17:39:41 +00:00
kl . probeManager . CleanupPods ( activePods )
2015-08-19 00:52:26 +00:00
2015-08-11 23:25:17 +00:00
runningPods , err := kl . runtimeCache . GetPods ( )
if err != nil {
glog . Errorf ( "Error listing containers: %#v" , err )
return err
}
2015-08-19 00:52:26 +00:00
for _ , pod := range runningPods {
if _ , found := desiredPods [ pod . ID ] ; ! found {
2016-03-23 23:45:24 +00:00
kl . podKillingCh <- & kubecontainer . PodPair { APIPod : nil , RunningPod : pod }
2015-08-19 00:52:26 +00:00
}
2015-04-29 17:47:25 +00:00
}
2015-08-19 00:52:26 +00:00
kl . removeOrphanedPodStatuses ( allPods , mirrorPods )
2015-04-29 17:47:25 +00:00
// Note that we just killed the unwanted pods. This may not have reflected
2015-05-11 17:50:14 +00:00
// in the cache. We need to bypass the cache to get the latest set of
2015-04-29 17:47:25 +00:00
// running pods to clean up the volumes.
// TODO: Evaluate the performance impact of bypassing the runtime cache.
2015-05-01 22:25:11 +00:00
runningPods , err = kl . containerRuntime . GetPods ( false )
2015-04-29 17:47:25 +00:00
if err != nil {
glog . Errorf ( "Error listing containers: %#v" , err )
2015-02-03 20:14:16 +00:00
return err
}
2015-02-04 01:46:28 +00:00
// Remove any orphaned volumes.
2015-05-18 20:12:35 +00:00
// Note that we pass all pods (including terminated pods) to the function,
// so that we don't remove volumes associated with terminated but not yet
// deleted pods.
2015-07-01 22:25:41 +00:00
err = kl . cleanupOrphanedPodDirs ( allPods , runningPods )
2015-01-12 00:42:11 +00:00
if err != nil {
2015-04-14 22:26:50 +00:00
glog . Errorf ( "Failed cleaning up orphaned pod directories: %v" , err )
2015-01-12 00:42:11 +00:00
return err
}
2014-07-30 21:04:19 +00:00
2015-03-09 22:46:47 +00:00
// Remove any orphaned mirror pods.
2015-03-23 19:17:12 +00:00
kl . podManager . DeleteOrphanedMirrorPods ( )
2015-03-09 22:46:47 +00:00
2016-02-12 19:33:32 +00:00
// Clear out any old bandwidth rules
2015-08-10 22:08:31 +00:00
if err = kl . cleanupBandwidthLimits ( allPods ) ; err != nil {
return err
}
2015-08-13 12:59:15 +00:00
kl . backOff . GC ( )
2014-06-06 23:40:48 +00:00
return err
}
2015-08-19 00:52:26 +00:00
// podKiller launches a goroutine to kill a pod received from the channel if
// another goroutine isn't already in action.
func ( kl * Kubelet ) podKiller ( ) {
2015-09-09 17:45:01 +00:00
killing := sets . NewString ( )
2015-08-19 00:52:26 +00:00
resultCh := make ( chan types . UID )
defer close ( resultCh )
for {
select {
2016-01-31 23:56:55 +00:00
case podPair , ok := <- kl . podKillingCh :
runningPod := podPair . RunningPod
apiPod := podPair . APIPod
2015-08-19 00:52:26 +00:00
if ! ok {
2015-04-14 22:26:50 +00:00
return
}
2016-01-31 23:56:55 +00:00
if killing . Has ( string ( runningPod . ID ) ) {
2015-08-19 00:52:26 +00:00
// The pod is already being killed.
break
}
2016-01-31 23:56:55 +00:00
killing . Insert ( string ( runningPod . ID ) )
go func ( apiPod * api . Pod , runningPod * kubecontainer . Pod , ch chan types . UID ) {
2015-08-19 00:52:26 +00:00
defer func ( ) {
2016-01-31 23:56:55 +00:00
ch <- runningPod . ID
2015-08-19 00:52:26 +00:00
} ( )
2016-01-31 23:56:55 +00:00
glog . V ( 2 ) . Infof ( "Killing unwanted pod %q" , runningPod . Name )
2016-04-27 03:30:59 +00:00
err := kl . killPod ( apiPod , runningPod , nil , nil )
2015-08-19 00:52:26 +00:00
if err != nil {
2016-01-31 23:56:55 +00:00
glog . Errorf ( "Failed killing the pod %q: %v" , runningPod . Name , err )
2015-08-19 00:52:26 +00:00
}
2016-01-31 23:56:55 +00:00
} ( apiPod , runningPod , resultCh )
2015-04-14 22:26:50 +00:00
2015-08-19 00:52:26 +00:00
case podID := <- resultCh :
killing . Delete ( string ( podID ) )
2015-04-14 22:26:50 +00:00
}
}
}
2016-04-28 04:26:36 +00:00
// podsByCreationTime makes an array of pods sortable by their creation
// timestamps.
// TODO: move into util package
2015-04-03 22:51:50 +00:00
type podsByCreationTime [ ] * api . Pod
2015-02-27 21:43:21 +00:00
func ( s podsByCreationTime ) Len ( ) int {
return len ( s )
}
func ( s podsByCreationTime ) Swap ( i , j int ) {
s [ i ] , s [ j ] = s [ j ] , s [ i ]
}
func ( s podsByCreationTime ) Less ( i , j int ) bool {
return s [ i ] . CreationTimestamp . Before ( s [ j ] . CreationTimestamp )
}
2015-03-20 16:52:32 +00:00
// checkHostPortConflicts detects pods with conflicted host ports.
2015-08-19 00:52:26 +00:00
func hasHostPortConflicts ( pods [ ] * api . Pod ) bool {
2015-09-09 17:45:01 +00:00
ports := sets . String { }
2015-04-03 22:51:50 +00:00
for _ , pod := range pods {
2015-11-06 23:30:52 +00:00
if errs := validation . AccumulateUniqueHostPorts ( pod . Spec . Containers , & ports , field . NewPath ( "spec" , "containers" ) ) ; len ( errs ) > 0 {
2015-12-21 19:25:38 +00:00
glog . Errorf ( "Pod %q: HostPort is already allocated, ignoring: %v" , format . Pod ( pod ) , errs )
2015-08-19 00:52:26 +00:00
return true
2014-07-08 04:48:47 +00:00
}
2016-03-29 03:08:54 +00:00
if errs := validation . AccumulateUniqueHostPorts ( pod . Spec . InitContainers , & ports , field . NewPath ( "spec" , "initContainers" ) ) ; len ( errs ) > 0 {
glog . Errorf ( "Pod %q: HostPort is already allocated, ignoring: %v" , format . Pod ( pod ) , errs )
return true
}
2014-07-08 04:48:47 +00:00
}
2015-08-19 00:52:26 +00:00
return false
2015-03-03 18:33:25 +00:00
}
2015-05-12 08:24:08 +00:00
// handleOutOfDisk detects if pods can't fit due to lack of disk space.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) isOutOfDisk ( ) bool {
2015-05-12 08:24:08 +00:00
// Check disk space once globally and reject or accept all new pods.
2016-05-18 05:05:55 +00:00
withinBounds , err := kl . diskSpaceManager . IsRuntimeDiskSpaceAvailable ( )
2015-05-12 08:24:08 +00:00
// Assume enough space in case of errors.
2016-06-10 22:22:39 +00:00
if err != nil {
glog . Errorf ( "Failed to check if disk space is available for the runtime: %v" , err )
} else if ! withinBounds {
return true
2015-05-12 08:24:08 +00:00
}
withinBounds , err = kl . diskSpaceManager . IsRootDiskSpaceAvailable ( )
// Assume enough space in case of errors.
2016-06-10 22:22:39 +00:00
if err != nil {
glog . Errorf ( "Failed to check if disk space is available on the root partition: %v" , err )
} else if ! withinBounds {
return true
2015-05-12 08:24:08 +00:00
}
2016-06-10 22:22:39 +00:00
return false
2015-05-12 08:24:08 +00:00
}
2016-04-28 04:26:36 +00:00
// rejectPod records an event about the pod with the given reason and message,
// and updates the pod to the failed phase in the status manage.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) rejectPod ( pod * api . Pod , reason , message string ) {
2015-11-13 22:30:01 +00:00
kl . recorder . Eventf ( pod , api . EventTypeWarning , reason , message )
2015-08-19 00:52:26 +00:00
kl . statusManager . SetPodStatus ( pod , api . PodStatus {
Phase : api . PodFailed ,
Reason : reason ,
Message : "Pod " + message } )
}
2015-05-14 20:02:36 +00:00
2015-08-19 00:52:26 +00:00
// canAdmitPod determines if a pod can be admitted, and gives a reason if it
2016-07-07 02:44:52 +00:00
// cannot. "pod" is new pod, while "pods" are all admitted pods
// The function returns a boolean value indicating whether the pod
2015-08-19 00:52:26 +00:00
// can be admitted, a brief single-word reason and a message explaining why
// the pod cannot be admitted.
func ( kl * Kubelet ) canAdmitPod ( pods [ ] * api . Pod , pod * api . Pod ) ( bool , string , string ) {
2016-01-06 01:10:59 +00:00
node , err := kl . getNodeAnyWay ( )
if err != nil {
glog . Errorf ( "Cannot get Node info: %v" , err )
return false , "InvalidNodeInfo" , "Kubelet cannot get node info."
2015-08-19 00:52:26 +00:00
}
2016-04-15 18:17:17 +00:00
// the kubelet will invoke each pod admit handler in sequence
// if any handler rejects, the pod is rejected.
// TODO: move predicate check into a pod admitter
// TODO: move out of disk check into a pod admitter
// TODO: out of resource eviction should have a pod admitter call-out
2016-07-07 02:44:52 +00:00
attrs := & lifecycle . PodAdmitAttributes { Pod : pod , OtherPods : pods }
2016-04-15 18:17:17 +00:00
for _ , podAdmitHandler := range kl . PodAdmitHandlers {
if result := podAdmitHandler . Admit ( attrs ) ; ! result . Admit {
return false , result . Reason , result . Message
}
}
2016-07-07 02:44:52 +00:00
nodeInfo := schedulercache . NewNodeInfo ( pods ... )
2016-04-21 08:24:12 +00:00
nodeInfo . SetNode ( node )
2016-07-08 06:12:44 +00:00
fit , err := predicates . GeneralPredicates ( pod , nil , nodeInfo )
2016-01-06 01:10:59 +00:00
if ! fit {
if re , ok := err . ( * predicates . PredicateFailureError ) ; ok {
reason := re . PredicateName
message := re . Error ( )
glog . V ( 2 ) . Infof ( "Predicate failed on Pod: %v, for reason: %v" , format . Pod ( pod ) , message )
return fit , reason , message
}
if re , ok := err . ( * predicates . InsufficientResourceError ) ; ok {
reason := fmt . Sprintf ( "OutOf%s" , re . ResourceName )
message := re . Error ( )
glog . V ( 2 ) . Infof ( "Predicate failed on Pod: %v, for reason: %v" , format . Pod ( pod ) , message )
return fit , reason , message
}
reason := "UnexpectedPredicateFailureType"
message := fmt . Sprintf ( "GeneralPredicates failed due to %v, which is unexpected." , err )
glog . Warningf ( "Failed to admit pod %v - %s" , format . Pod ( pod ) , message )
return fit , reason , message
}
// TODO: When disk space scheduling is implemented (#11976), remove the out-of-disk check here and
// add the disk space predicate to predicates.GeneralPredicates.
2015-08-19 00:52:26 +00:00
if kl . isOutOfDisk ( ) {
2016-01-06 01:10:59 +00:00
glog . Warningf ( "Failed to admit pod %v - %s" , format . Pod ( pod ) , "predicate fails due to isOutOfDisk" )
2015-08-19 00:52:26 +00:00
return false , "OutOfDisk" , "cannot be started due to lack of disk space."
}
2016-04-15 18:17:17 +00:00
2015-08-19 00:52:26 +00:00
return true , "" , ""
2014-07-08 04:48:47 +00:00
}
2014-07-01 20:01:39 +00:00
// syncLoop is the main loop for processing changes. It watches for changes from
2015-03-11 23:40:20 +00:00
// three channels (file, apiserver, and http) and creates a union of them. For
2014-06-06 23:40:48 +00:00
// any new change seen, will run a sync against desired state and running state. If
// no changes are seen to the configuration, will synchronize the last known desired
2015-08-11 20:29:50 +00:00
// state every sync-frequency seconds. Never returns.
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) syncLoop ( updates <- chan kubetypes . PodUpdate , handler SyncHandler ) {
2015-04-08 20:57:19 +00:00
glog . Info ( "Starting kubelet main sync loop." )
2015-09-02 17:18:11 +00:00
// The resyncTicker wakes up kubelet to checks if there are any pod workers
// that need to be sync'd. A one-second period is sufficient because the
// sync interval is defaulted to 10s.
2015-11-03 18:03:39 +00:00
syncTicker := time . NewTicker ( time . Second )
housekeepingTicker := time . NewTicker ( housekeepingPeriod )
2015-08-07 21:42:21 +00:00
plegCh := kl . pleg . Watch ( )
2014-06-06 23:40:48 +00:00
for {
2015-09-24 22:26:25 +00:00
if rs := kl . runtimeState . errors ( ) ; len ( rs ) != 0 {
glog . Infof ( "skipping pod synchronization - %v" , rs )
2015-08-28 01:07:57 +00:00
time . Sleep ( 5 * time . Second )
continue
}
2015-08-07 21:42:21 +00:00
if ! kl . syncLoopIteration ( updates , handler , syncTicker . C , housekeepingTicker . C , plegCh ) {
2015-09-15 19:29:34 +00:00
break
}
2015-06-17 22:31:46 +00:00
}
}
2016-04-28 04:26:36 +00:00
// syncLoopIteration reads from various channels and dispatches pods to the
// given handler.
//
// Arguments:
// 1. configCh: a channel to read config events from
// 2. handler: the SyncHandler to dispatch pods to
// 3. syncCh: a channel to read periodic sync events from
// 4. houseKeepingCh: a channel to read housekeeping events from
// 5. plegCh: a channel to read PLEG updates from
//
// Events are also read from the kubelet liveness manager's update channel.
//
// The workflow is to read from one of the channels, handle that event, and
// update the timestamp in the sync loop monitor.
//
// Here is an appropriate place to note that despite the syntactical
// similarity to the switch statement, the case statements in a select are
// evaluated in a pseudorandom order if there are multiple channels ready to
// read from when the select is evaluated. In other words, case statements
// are evaluated in random order, and you can not assume that the case
// statements evaluate in order if multiple channels have events.
//
// With that in mind, in truly no particular order, the different channels
// are handled as follows:
//
// * configCh: dispatch the pods for the config change to the appropriate
// handler callback for the event type
// * plegCh: update the runtime cache; sync pod
// * syncCh: sync all pods waiting for sync
// * houseKeepingCh: trigger cleanup of pods
// * liveness manager: sync pods that have failed or in which one or more
// containers have failed liveness checks
func ( kl * Kubelet ) syncLoopIteration ( configCh <- chan kubetypes . PodUpdate , handler SyncHandler ,
2015-08-07 21:42:21 +00:00
syncCh <- chan time . Time , housekeepingCh <- chan time . Time , plegCh <- chan * pleg . PodLifecycleEvent ) bool {
2016-01-04 20:03:28 +00:00
kl . syncLoopMonitor . Store ( kl . clock . Now ( ) )
2015-06-17 22:31:46 +00:00
select {
2016-04-28 04:26:36 +00:00
case u , open := <- configCh :
// Update from a config source; dispatch it to the right handler
// callback.
2015-08-30 19:47:24 +00:00
if ! open {
2015-06-17 22:31:46 +00:00
glog . Errorf ( "Update channel is closed. Exiting the sync loop." )
2015-08-30 19:47:24 +00:00
return false
2015-06-18 05:34:11 +00:00
}
2016-04-26 17:58:12 +00:00
kl . sourcesReady . AddSource ( u . Source )
2015-08-07 21:42:21 +00:00
2015-08-19 00:52:26 +00:00
switch u . Op {
2015-10-09 17:24:31 +00:00
case kubetypes . ADD :
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (ADD, %q): %q" , u . Source , format . Pods ( u . Pods ) )
2016-03-17 18:36:18 +00:00
// After restarting, kubelet will get all existing pods through
// ADD as if they are new pods. These pods will then go through the
// admission process and *may* be rejcted. This can be resolved
// once we have checkpointing.
2015-08-19 00:52:26 +00:00
handler . HandlePodAdditions ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . UPDATE :
2016-06-08 23:32:30 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (UPDATE, %q): %q" , u . Source , format . PodsWithDeletiontimestamps ( u . Pods ) )
2015-08-19 00:52:26 +00:00
handler . HandlePodUpdates ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . REMOVE :
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (REMOVE, %q): %q" , u . Source , format . Pods ( u . Pods ) )
2016-06-14 09:29:18 +00:00
handler . HandlePodRemoves ( u . Pods )
2015-12-09 03:13:09 +00:00
case kubetypes . RECONCILE :
glog . V ( 4 ) . Infof ( "SyncLoop (RECONCILE, %q): %q" , u . Source , format . Pods ( u . Pods ) )
handler . HandlePodReconcile ( u . Pods )
2016-06-14 09:29:18 +00:00
case kubetypes . DELETE :
glog . V ( 2 ) . Infof ( "SyncLoop (DELETE, %q): %q" , u . Source , format . Pods ( u . Pods ) )
// DELETE is treated as a UPDATE because of graceful deletion.
handler . HandlePodUpdates ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . SET :
2015-08-19 00:52:26 +00:00
// TODO: Do we want to support this?
glog . Errorf ( "Kubelet does not support snapshot update" )
2016-06-14 09:29:18 +00:00
2015-08-19 00:52:26 +00:00
}
2015-08-07 21:42:21 +00:00
case e := <- plegCh :
2016-04-28 04:26:36 +00:00
// PLEG event for a pod; sync it.
2015-08-07 21:42:21 +00:00
pod , ok := kl . podManager . GetPodByUID ( e . ID )
if ! ok {
// If the pod no longer exists, ignore the event.
glog . V ( 4 ) . Infof ( "SyncLoop (PLEG): ignore irrelevant event: %#v" , e )
break
}
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (PLEG): %q, event: %#v" , format . Pod ( pod ) , e )
2015-08-07 21:42:21 +00:00
handler . HandlePodSyncs ( [ ] * api . Pod { pod } )
2015-11-03 18:03:39 +00:00
case <- syncCh :
2016-04-28 04:26:36 +00:00
// Sync pods waiting for sync
2015-11-05 05:59:15 +00:00
podsToSync := kl . getPodsToSync ( )
2015-11-04 23:35:10 +00:00
if len ( podsToSync ) == 0 {
break
}
2015-11-20 17:54:37 +00:00
glog . V ( 4 ) . Infof ( "SyncLoop (SYNC): %d pods; %s" , len ( podsToSync ) , format . Pods ( podsToSync ) )
2015-09-02 17:18:11 +00:00
kl . HandlePodSyncs ( podsToSync )
2015-10-19 22:15:59 +00:00
case update := <- kl . livenessManager . Updates ( ) :
if update . Result == proberesults . Failure {
2016-04-28 04:26:36 +00:00
// The liveness manager detected a failure; sync the pod.
2016-02-10 00:32:54 +00:00
// We should not use the pod from livenessManager, because it is never updated after
// initialization.
2016-02-12 05:02:31 +00:00
pod , ok := kl . podManager . GetPodByUID ( update . PodUID )
2016-02-10 00:32:54 +00:00
if ! ok {
// If the pod no longer exists, ignore the update.
glog . V ( 4 ) . Infof ( "SyncLoop (container unhealthy): ignore irrelevant update: %#v" , update )
break
}
glog . V ( 1 ) . Infof ( "SyncLoop (container unhealthy): %q" , format . Pod ( pod ) )
handler . HandlePodSyncs ( [ ] * api . Pod { pod } )
2015-10-19 22:15:59 +00:00
}
2015-11-03 18:03:39 +00:00
case <- housekeepingCh :
2016-04-26 17:58:12 +00:00
if ! kl . sourcesReady . AllReady ( ) {
2015-11-03 18:03:39 +00:00
// If the sources aren't ready, skip housekeeping, as we may
// accidentally delete pods from unready sources.
glog . V ( 4 ) . Infof ( "SyncLoop (housekeeping, skipped): sources aren't ready yet." )
} else {
glog . V ( 4 ) . Infof ( "SyncLoop (housekeeping)" )
if err := handler . HandlePodCleanups ( ) ; err != nil {
glog . Errorf ( "Failed cleaning pods: %v" , err )
}
}
2015-06-17 22:31:46 +00:00
}
2016-01-04 20:03:28 +00:00
kl . syncLoopMonitor . Store ( kl . clock . Now ( ) )
2015-08-30 19:47:24 +00:00
return true
2015-08-19 00:52:26 +00:00
}
2016-04-28 04:26:36 +00:00
// dispatchWork starts the asynchronous sync of the pod in a pod worker.
// If the pod is terminated, dispatchWork
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) dispatchWork ( pod * api . Pod , syncType kubetypes . SyncPodType , mirrorPod * api . Pod , start time . Time ) {
2015-08-19 00:52:26 +00:00
if kl . podIsTerminated ( pod ) {
2016-02-17 23:56:54 +00:00
if pod . DeletionTimestamp != nil {
2016-04-28 04:26:36 +00:00
// If the pod is in a terminated state, there is no pod worker to
2016-02-17 23:56:54 +00:00
// handle the work item. Check if the DeletionTimestamp has been
// set, and force a status update to trigger a pod deletion request
// to the apiserver.
kl . statusManager . TerminatePod ( pod )
}
2015-08-19 00:52:26 +00:00
return
}
// Run the sync in an async worker.
2016-05-06 18:07:24 +00:00
kl . podWorkers . UpdatePod ( & UpdatePodOptions {
Pod : pod ,
MirrorPod : mirrorPod ,
UpdateType : syncType ,
OnCompleteFunc : func ( err error ) {
if err != nil {
metrics . PodWorkerLatency . WithLabelValues ( syncType . String ( ) ) . Observe ( metrics . SinceInMicroseconds ( start ) )
}
} ,
2015-08-19 00:52:26 +00:00
} )
// Note the number of containers for new pods.
2015-10-09 17:24:31 +00:00
if syncType == kubetypes . SyncPodCreate {
2015-08-19 00:52:26 +00:00
metrics . ContainersPerPodCount . Observe ( float64 ( len ( pod . Spec . Containers ) ) )
}
}
// TODO: Consider handling all mirror pods updates in a separate component.
func ( kl * Kubelet ) handleMirrorPod ( mirrorPod * api . Pod , start time . Time ) {
// Mirror pod ADD/UPDATE/DELETE operations are considered an UPDATE to the
// corresponding static pod. Send update to the pod worker if the static
// pod exists.
if pod , ok := kl . podManager . GetPodByMirrorPod ( mirrorPod ) ; ok {
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodUpdate , mirrorPod , start )
2015-08-19 00:52:26 +00:00
}
}
2016-04-28 04:26:36 +00:00
// HandlePodAdditions is the callback in SyncHandler for pods being added from
// a config source.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) HandlePodAdditions ( pods [ ] * api . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
sort . Sort ( podsByCreationTime ( pods ) )
for _ , pod := range pods {
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2016-07-07 02:44:52 +00:00
kl . podManager . AddPod ( pod )
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
2016-07-07 02:44:52 +00:00
// Note that allPods excludes the new pod.
2015-08-19 00:52:26 +00:00
allPods := kl . podManager . GetPods ( )
// We failed pods that we rejected, so activePods include all admitted
2016-07-07 02:44:52 +00:00
// pods that are alive.
2015-08-19 00:52:26 +00:00
activePods := kl . filterOutTerminatedPods ( allPods )
// Check if we can admit the pod; if not, reject it.
if ok , reason , message := kl . canAdmitPod ( activePods , pod ) ; ! ok {
kl . rejectPod ( pod , reason , message )
continue
2014-06-06 23:40:48 +00:00
}
2016-07-07 02:44:52 +00:00
kl . podManager . AddPod ( pod )
2015-08-19 00:52:26 +00:00
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodCreate , mirrorPod , start )
2015-08-25 17:39:41 +00:00
kl . probeManager . AddPod ( pod )
2014-06-06 23:40:48 +00:00
}
2015-08-19 00:52:26 +00:00
}
2016-04-28 04:26:36 +00:00
// HandlePodUpdates is the callback in the SyncHandler interface for pods
// being updated from a config source.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) HandlePodUpdates ( pods [ ] * api . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
for _ , pod := range pods {
kl . podManager . UpdatePod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// TODO: Evaluate if we need to validate and reject updates.
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodUpdate , mirrorPod , start )
2015-08-19 00:52:26 +00:00
}
}
2016-06-14 09:29:18 +00:00
// HandlePodRemoves is the callback in the SyncHandler interface for pods
// being removed from a config source.
func ( kl * Kubelet ) HandlePodRemoves ( pods [ ] * api . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
for _ , pod := range pods {
kl . podManager . DeletePod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// Deletion is allowed to fail because the periodic cleanup routine
// will trigger deletion again.
2016-01-31 23:56:55 +00:00
if err := kl . deletePod ( pod ) ; err != nil {
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "Failed to delete pod %q, err: %v" , format . Pod ( pod ) , err )
2015-08-19 00:52:26 +00:00
}
2015-08-25 17:39:41 +00:00
kl . probeManager . RemovePod ( pod )
2015-08-19 00:52:26 +00:00
}
}
2016-04-28 04:26:36 +00:00
// HandlePodReconcile is the callback in the SyncHandler interface for pods
// that should be reconciled.
2015-12-09 03:13:09 +00:00
func ( kl * Kubelet ) HandlePodReconcile ( pods [ ] * api . Pod ) {
for _ , pod := range pods {
// Update the pod in pod manager, status manager will do periodically reconcile according
// to the pod manager.
kl . podManager . UpdatePod ( pod )
}
}
2016-04-28 04:26:36 +00:00
// HandlePodSyncs is the callback in the syncHandler interface for pods
// that should be dispatched to pod workers for sync.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) HandlePodSyncs ( pods [ ] * api . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
for _ , pod := range pods {
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodSync , mirrorPod , start )
2015-06-17 22:31:46 +00:00
}
}
2016-04-28 04:26:36 +00:00
// LatestLoopEntryTime returns the last time in the sync loop monitor.
2015-06-17 22:31:46 +00:00
func ( kl * Kubelet ) LatestLoopEntryTime ( ) time . Time {
val := kl . syncLoopMonitor . Load ( )
if val == nil {
return time . Time { }
}
return val . ( time . Time )
2014-06-06 23:40:48 +00:00
}
2016-04-28 04:26:36 +00:00
// PLEGHealthCheck returns whether the PLEG is healty.
2016-03-01 00:11:48 +00:00
func ( kl * Kubelet ) PLEGHealthCheck ( ) ( bool , error ) {
return kl . pleg . Healthy ( )
}
2016-01-29 03:27:56 +00:00
// validateContainerLogStatus returns the container ID for the desired container to retrieve logs for, based on the state
// of the container. The previous flag will only return the logs for the the last terminated container, otherwise, the current
// running container is preferred over a previous termination. If info about the container is not available then a specific
// error is returned to the end user.
func ( kl * Kubelet ) validateContainerLogStatus ( podName string , podStatus * api . PodStatus , containerName string , previous bool ) ( containerID kubecontainer . ContainerID , err error ) {
2015-05-07 18:34:16 +00:00
var cID string
2015-03-25 11:09:35 +00:00
cStatus , found := api . GetContainerStatus ( podStatus . ContainerStatuses , containerName )
2016-06-08 12:03:51 +00:00
// if not found, check the init containers
if ! found {
cStatus , found = api . GetContainerStatus ( podStatus . InitContainerStatuses , containerName )
}
2015-03-25 11:09:35 +00:00
if ! found {
2016-01-29 03:27:56 +00:00
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q in pod %q is not available" , containerName , podName )
2015-02-24 00:33:43 +00:00
}
2016-01-29 03:27:56 +00:00
lastState := cStatus . LastTerminationState
waiting , running , terminated := cStatus . State . Waiting , cStatus . State . Running , cStatus . State . Terminated
switch {
case previous :
if lastState . Terminated == nil {
return kubecontainer . ContainerID { } , fmt . Errorf ( "previous terminated container %q in pod %q not found" , containerName , podName )
2015-05-07 18:34:16 +00:00
}
2016-01-29 03:27:56 +00:00
cID = lastState . Terminated . ContainerID
case running != nil :
2015-05-07 18:34:16 +00:00
cID = cStatus . ContainerID
2016-01-29 03:27:56 +00:00
case terminated != nil :
cID = terminated . ContainerID
case lastState . Terminated != nil :
cID = lastState . Terminated . ContainerID
case waiting != nil :
// output some info for the most common pending failures
switch reason := waiting . Reason ; reason {
case kubecontainer . ErrImagePull . Error ( ) :
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q in pod %q is waiting to start: image can't be pulled" , containerName , podName )
case kubecontainer . ErrImagePullBackOff . Error ( ) :
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q in pod %q is waiting to start: trying and failing to pull image" , containerName , podName )
default :
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q in pod %q is waiting to start: %v" , containerName , podName , reason )
}
default :
// unrecognized state
return kubecontainer . ContainerID { } , fmt . Errorf ( "container %q in pod %q is waiting to start - no logs yet" , containerName , podName )
2015-03-25 11:09:35 +00:00
}
2016-01-29 03:27:56 +00:00
2015-10-07 17:58:05 +00:00
return kubecontainer . ParseContainerID ( cID ) , nil
2015-02-24 00:33:43 +00:00
}
2014-08-27 19:41:32 +00:00
// GetKubeletContainerLogs returns logs from the container
2015-01-07 15:18:56 +00:00
// TODO: this method is returning logs of random container attempts, when it should be returning the most recent attempt
// or all of them.
2015-09-10 03:46:11 +00:00
func ( kl * Kubelet ) GetKubeletContainerLogs ( podFullName , containerName string , logOptions * api . PodLogOptions , stdout , stderr io . Writer ) error {
2015-05-01 23:12:14 +00:00
// TODO(vmarmol): Refactor to not need the pod status and verification.
2015-05-15 22:30:28 +00:00
// Pod workers periodically write status to statusManager. If status is not
// cached there, something is wrong (or kubelet just restarted and hasn't
// caught up yet). Just assume the pod is not ready yet.
2015-08-18 20:26:56 +00:00
name , namespace , err := kubecontainer . ParsePodFullName ( podFullName )
if err != nil {
return fmt . Errorf ( "unable to parse pod full name %q: %v" , podFullName , err )
}
pod , ok := kl . GetPodByName ( namespace , name )
if ! ok {
2016-01-29 03:27:56 +00:00
return fmt . Errorf ( "pod %q cannot be found - no logs available" , name )
2015-08-18 20:26:56 +00:00
}
2015-10-23 21:31:40 +00:00
podUID := pod . UID
if mirrorPod , ok := kl . podManager . GetMirrorPodByPod ( pod ) ; ok {
podUID = mirrorPod . UID
}
podStatus , found := kl . statusManager . GetPodStatus ( podUID )
2015-05-15 22:30:28 +00:00
if ! found {
2015-11-06 20:15:40 +00:00
// If there is no cached status, use the status from the
// apiserver. This is useful if kubelet has recently been
// restarted.
podStatus = pod . Status
2014-09-17 19:00:09 +00:00
}
2015-08-18 20:26:56 +00:00
2016-01-29 03:27:56 +00:00
containerID , err := kl . validateContainerLogStatus ( pod . Name , & podStatus , containerName , logOptions . Previous )
2015-02-24 00:33:43 +00:00
if err != nil {
2016-01-29 03:27:56 +00:00
return err
2015-02-12 01:03:59 +00:00
}
2015-09-10 03:46:11 +00:00
return kl . containerRuntime . GetContainerLogs ( pod , containerID , logOptions , stdout , stderr )
2014-08-27 19:41:32 +00:00
}
2016-04-28 04:26:36 +00:00
// updateRuntimeUp calls the container runtime status callback, initializing
// the runtime dependent modules when the container runtime first comes up,
// and returns an error if the status check fails. If the status check is OK,
// update the container runtime uptime in the kubelet runtimeState.
2015-05-05 10:19:54 +00:00
func ( kl * Kubelet ) updateRuntimeUp ( ) {
2016-03-03 10:01:15 +00:00
if err := kl . containerRuntime . Status ( ) ; err != nil {
2015-12-07 19:12:20 +00:00
glog . Errorf ( "Container runtime sanity check failed: %v" , err )
return
2015-05-05 10:19:54 +00:00
}
2015-12-07 19:12:20 +00:00
kl . oneTimeInitializer . Do ( kl . initializeRuntimeDependentModules )
2016-01-04 20:03:28 +00:00
kl . runtimeState . setRuntimeSync ( kl . clock . Now ( ) )
2015-05-05 10:19:54 +00:00
}
2015-09-21 18:06:38 +00:00
// updateNodeStatus updates node status to master with retries.
func ( kl * Kubelet ) updateNodeStatus ( ) error {
for i := 0 ; i < nodeStatusUpdateRetry ; i ++ {
if err := kl . tryUpdateNodeStatus ( ) ; err != nil {
glog . Errorf ( "Error updating node status, will retry: %v" , err )
} else {
return nil
}
}
return fmt . Errorf ( "update node status exceeds retry count" )
}
2016-04-28 04:26:36 +00:00
// recordNodeStatusEvent records an event of the given type with the given
// message for the node.
2015-11-13 22:30:01 +00:00
func ( kl * Kubelet ) recordNodeStatusEvent ( eventtype , event string ) {
2015-09-21 18:06:38 +00:00
glog . V ( 2 ) . Infof ( "Recording %s event message for node %s" , event , kl . nodeName )
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
2015-11-13 22:30:01 +00:00
kl . recorder . Eventf ( kl . nodeRef , eventtype , event , "Node %s status is now: %s" , kl . nodeName , event )
2015-09-21 18:06:38 +00:00
}
2015-11-12 06:25:59 +00:00
// Set addresses for the node.
func ( kl * Kubelet ) setNodeAddress ( node * api . Node ) error {
2015-09-21 18:06:38 +00:00
// Set addresses for the node.
if kl . cloud != nil {
instances , ok := kl . cloud . Instances ( )
if ! ok {
return fmt . Errorf ( "failed to get instances from cloud provider" )
}
// TODO(roberthbailey): Can we do this without having credentials to talk
// to the cloud provider?
// TODO(justinsb): We can if CurrentNodeName() was actually CurrentNode() and returned an interface
nodeAddresses , err := instances . NodeAddresses ( kl . nodeName )
if err != nil {
return fmt . Errorf ( "failed to get node address from cloud provider: %v" , err )
}
node . Status . Addresses = nodeAddresses
} else {
2015-12-10 02:05:35 +00:00
if kl . nodeIP != nil {
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : kl . nodeIP . String ( ) } ,
{ Type : api . NodeInternalIP , Address : kl . nodeIP . String ( ) } ,
}
} else if addr := net . ParseIP ( kl . hostname ) ; addr != nil {
2015-09-21 18:06:38 +00:00
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : addr . String ( ) } ,
{ Type : api . NodeInternalIP , Address : addr . String ( ) } ,
}
} else {
addrs , err := net . LookupIP ( node . Name )
if err != nil {
return fmt . Errorf ( "can't get ip address of node %s: %v" , node . Name , err )
} else if len ( addrs ) == 0 {
return fmt . Errorf ( "no ip address for node %v" , node . Name )
} else {
// check all ip addresses for this node.Name and try to find the first non-loopback IPv4 address.
// If no match is found, it uses the IP of the interface with gateway on it.
for _ , ip := range addrs {
if ip . IsLoopback ( ) {
continue
}
if ip . To4 ( ) != nil {
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : ip . String ( ) } ,
{ Type : api . NodeInternalIP , Address : ip . String ( ) } ,
}
break
}
}
if len ( node . Status . Addresses ) == 0 {
2016-01-06 15:56:41 +00:00
ip , err := utilnet . ChooseHostInterface ( )
2015-09-21 18:06:38 +00:00
if err != nil {
return err
}
node . Status . Addresses = [ ] api . NodeAddress {
{ Type : api . NodeLegacyHostIP , Address : ip . String ( ) } ,
{ Type : api . NodeInternalIP , Address : ip . String ( ) } ,
}
}
}
}
}
2015-11-12 06:25:59 +00:00
return nil
}
2015-09-21 18:06:38 +00:00
2016-02-14 08:33:14 +00:00
func ( kl * Kubelet ) updateCloudProviderFromMachineInfo ( node * api . Node , info * cadvisorapi . MachineInfo ) {
if info . CloudProvider != cadvisorapi . UnknownProvider &&
info . CloudProvider != cadvisorapi . Baremetal {
// The cloud providers from pkg/cloudprovider/providers/* that update ProviderID
// will use the format of cloudprovider://project/availability_zone/instance_name
// here we only have the cloudprovider and the instance name so we leave project
// and availability zone empty for compatibility.
node . Spec . ProviderID = strings . ToLower ( string ( info . CloudProvider ) ) +
":////" + string ( info . InstanceID )
}
}
2015-11-12 06:25:59 +00:00
func ( kl * Kubelet ) setNodeStatusMachineInfo ( node * api . Node ) {
2015-09-21 18:06:38 +00:00
// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
info , err := kl . GetCachedMachineInfo ( )
if err != nil {
// TODO(roberthbailey): This is required for test-cmd.sh to pass.
// See if the test should be updated instead.
node . Status . Capacity = api . ResourceList {
2016-04-27 00:54:19 +00:00
api . ResourceCPU : * resource . NewMilliQuantity ( 0 , resource . DecimalSI ) ,
api . ResourceMemory : resource . MustParse ( "0Gi" ) ,
api . ResourcePods : * resource . NewQuantity ( int64 ( kl . maxPods ) , resource . DecimalSI ) ,
api . ResourceNvidiaGPU : * resource . NewQuantity ( int64 ( kl . nvidiaGPUs ) , resource . DecimalSI ) ,
2015-09-21 18:06:38 +00:00
}
glog . Errorf ( "Error getting machine info: %v" , err )
} else {
node . Status . NodeInfo . MachineID = info . MachineID
node . Status . NodeInfo . SystemUUID = info . SystemUUID
2015-10-10 00:09:53 +00:00
node . Status . Capacity = cadvisor . CapacityFromMachineInfo ( info )
2016-05-18 15:18:10 +00:00
if kl . podsPerCore > 0 {
node . Status . Capacity [ api . ResourcePods ] = * resource . NewQuantity (
int64 ( math . Min ( float64 ( info . NumCores * kl . podsPerCore ) , float64 ( kl . maxPods ) ) ) , resource . DecimalSI )
} else {
node . Status . Capacity [ api . ResourcePods ] = * resource . NewQuantity (
int64 ( kl . maxPods ) , resource . DecimalSI )
}
2016-04-27 00:54:19 +00:00
node . Status . Capacity [ api . ResourceNvidiaGPU ] = * resource . NewQuantity (
int64 ( kl . nvidiaGPUs ) , resource . DecimalSI )
2015-09-21 18:06:38 +00:00
if node . Status . NodeInfo . BootID != "" &&
node . Status . NodeInfo . BootID != info . BootID {
// TODO: This requires a transaction, either both node status is updated
// and event is recorded or neither should happen, see issue #6055.
2015-11-13 22:30:01 +00:00
kl . recorder . Eventf ( kl . nodeRef , api . EventTypeWarning , kubecontainer . NodeRebooted ,
2015-09-21 18:06:38 +00:00
"Node %s has been rebooted, boot id: %s" , kl . nodeName , info . BootID )
}
node . Status . NodeInfo . BootID = info . BootID
}
2015-12-12 01:51:39 +00:00
// Set Allocatable.
node . Status . Allocatable = make ( api . ResourceList )
for k , v := range node . Status . Capacity {
value := * ( v . Copy ( ) )
if kl . reservation . System != nil {
value . Sub ( kl . reservation . System [ k ] )
}
if kl . reservation . Kubernetes != nil {
value . Sub ( kl . reservation . Kubernetes [ k ] )
}
2016-05-17 04:36:56 +00:00
if value . Sign ( ) < 0 {
2015-12-12 01:51:39 +00:00
// Negative Allocatable resources don't make sense.
value . Set ( 0 )
}
node . Status . Allocatable [ k ] = value
}
2015-11-12 06:25:59 +00:00
}
2015-09-21 18:06:38 +00:00
2015-11-12 06:25:59 +00:00
// Set versioninfo for the node.
func ( kl * Kubelet ) setNodeStatusVersionInfo ( node * api . Node ) {
2015-09-21 18:06:38 +00:00
verinfo , err := kl . cadvisor . VersionInfo ( )
if err != nil {
glog . Errorf ( "Error getting version info: %v" , err )
} else {
node . Status . NodeInfo . KernelVersion = verinfo . KernelVersion
2015-12-12 07:53:34 +00:00
node . Status . NodeInfo . OSImage = verinfo . ContainerOsVersion
2016-01-14 23:16:07 +00:00
runtimeVersion := "Unknown"
if runtimeVer , err := kl . containerRuntime . Version ( ) ; err == nil {
runtimeVersion = runtimeVer . String ( )
}
node . Status . NodeInfo . ContainerRuntimeVersion = fmt . Sprintf ( "%s://%s" , kl . containerRuntime . Type ( ) , runtimeVersion )
2015-09-21 18:06:38 +00:00
node . Status . NodeInfo . KubeletVersion = version . Get ( ) . String ( )
// TODO: kube-proxy might be different version from kubelet in the future
node . Status . NodeInfo . KubeProxyVersion = version . Get ( ) . String ( )
}
2015-11-12 06:25:59 +00:00
}
// Set daemonEndpoints for the node.
func ( kl * Kubelet ) setNodeStatusDaemonEndpoints ( node * api . Node ) {
2015-09-21 18:06:38 +00:00
node . Status . DaemonEndpoints = * kl . daemonEndpoints
2015-11-12 06:25:59 +00:00
}
2015-09-21 18:06:38 +00:00
2016-05-10 17:38:57 +00:00
// Set images list for the node
2015-12-02 08:53:56 +00:00
func ( kl * Kubelet ) setNodeStatusImages ( node * api . Node ) {
// Update image list of this node
var imagesOnNode [ ] api . ContainerImage
containerImages , err := kl . imageManager . GetImageList ( )
if err != nil {
glog . Errorf ( "Error getting image list: %v" , err )
} else {
2016-05-08 16:41:23 +00:00
// sort the images from max to min, and only set top N images into the node status.
2016-05-11 11:59:54 +00:00
sort . Sort ( byImageSize ( containerImages ) )
2016-05-08 16:41:23 +00:00
if maxImagesInNodeStatus < len ( containerImages ) {
2016-05-11 11:59:54 +00:00
containerImages = containerImages [ 0 : maxImagesInNodeStatus ]
2016-05-08 16:41:23 +00:00
}
2015-12-02 08:53:56 +00:00
for _ , image := range containerImages {
imagesOnNode = append ( imagesOnNode , api . ContainerImage {
2016-05-03 17:22:39 +00:00
Names : append ( image . RepoTags , image . RepoDigests ... ) ,
2016-02-11 22:15:50 +00:00
SizeBytes : image . Size ,
2015-12-02 08:53:56 +00:00
} )
}
}
node . Status . Images = imagesOnNode
}
2016-05-10 17:38:57 +00:00
// Set the GOOS and GOARCH for this node
func ( kl * Kubelet ) setNodeStatusGoRuntime ( node * api . Node ) {
node . Status . NodeInfo . OperatingSystem = goRuntime . GOOS
node . Status . NodeInfo . Architecture = goRuntime . GOARCH
}
2016-05-11 11:59:54 +00:00
type byImageSize [ ] kubecontainer . Image
2016-05-08 16:41:23 +00:00
// Sort from max to min
2016-05-11 11:59:54 +00:00
func ( a byImageSize ) Less ( i , j int ) bool {
2016-05-08 16:41:23 +00:00
return a [ i ] . Size > a [ j ] . Size
}
2016-05-11 11:59:54 +00:00
func ( a byImageSize ) Len ( ) int { return len ( a ) }
func ( a byImageSize ) Swap ( i , j int ) { a [ i ] , a [ j ] = a [ j ] , a [ i ] }
2016-05-08 16:41:23 +00:00
2015-11-12 06:25:59 +00:00
// Set status for the node.
func ( kl * Kubelet ) setNodeStatusInfo ( node * api . Node ) {
kl . setNodeStatusMachineInfo ( node )
kl . setNodeStatusVersionInfo ( node )
kl . setNodeStatusDaemonEndpoints ( node )
2015-12-02 08:53:56 +00:00
kl . setNodeStatusImages ( node )
2016-05-10 17:38:57 +00:00
kl . setNodeStatusGoRuntime ( node )
2015-11-12 06:25:59 +00:00
}
// Set Readycondition for the node.
func ( kl * Kubelet ) setNodeReadyCondition ( node * api . Node ) {
// NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions.
// This is due to an issue with version skewed kubelet and master components.
// ref: https://github.com/kubernetes/kubernetes/issues/16961
2016-01-04 20:03:28 +00:00
currentTime := unversioned . NewTime ( kl . clock . Now ( ) )
2015-11-12 06:25:59 +00:00
var newNodeReadyCondition api . NodeCondition
if rs := kl . runtimeState . errors ( ) ; len ( rs ) == 0 {
newNodeReadyCondition = api . NodeCondition {
Type : api . NodeReady ,
Status : api . ConditionTrue ,
Reason : "KubeletReady" ,
Message : "kubelet is posting ready status" ,
LastHeartbeatTime : currentTime ,
}
} else {
newNodeReadyCondition = api . NodeCondition {
Type : api . NodeReady ,
Status : api . ConditionFalse ,
Reason : "KubeletNotReady" ,
Message : strings . Join ( rs , "," ) ,
LastHeartbeatTime : currentTime ,
}
}
2016-03-04 00:37:09 +00:00
// Record any soft requirements that were not met in the container manager.
status := kl . containerManager . Status ( )
if status . SoftRequirements != nil {
newNodeReadyCondition . Message = fmt . Sprintf ( "%s. WARNING: %s" , newNodeReadyCondition . Message , status . SoftRequirements . Error ( ) )
}
2015-11-12 06:25:59 +00:00
readyConditionUpdated := false
needToRecordEvent := false
for i := range node . Status . Conditions {
if node . Status . Conditions [ i ] . Type == api . NodeReady {
if node . Status . Conditions [ i ] . Status == newNodeReadyCondition . Status {
newNodeReadyCondition . LastTransitionTime = node . Status . Conditions [ i ] . LastTransitionTime
} else {
newNodeReadyCondition . LastTransitionTime = currentTime
needToRecordEvent = true
}
node . Status . Conditions [ i ] = newNodeReadyCondition
readyConditionUpdated = true
break
}
}
if ! readyConditionUpdated {
newNodeReadyCondition . LastTransitionTime = currentTime
node . Status . Conditions = append ( node . Status . Conditions , newNodeReadyCondition )
}
if needToRecordEvent {
if newNodeReadyCondition . Status == api . ConditionTrue {
kl . recordNodeStatusEvent ( api . EventTypeNormal , kubecontainer . NodeReady )
} else {
kl . recordNodeStatusEvent ( api . EventTypeNormal , kubecontainer . NodeNotReady )
}
}
}
2016-05-13 03:35:18 +00:00
// setNodeMemoryPressureCondition for the node.
// TODO: this needs to move somewhere centralized...
func ( kl * Kubelet ) setNodeMemoryPressureCondition ( node * api . Node ) {
currentTime := unversioned . NewTime ( kl . clock . Now ( ) )
var condition * api . NodeCondition
// Check if NodeMemoryPressure condition already exists and if it does, just pick it up for update.
for i := range node . Status . Conditions {
if node . Status . Conditions [ i ] . Type == api . NodeMemoryPressure {
condition = & node . Status . Conditions [ i ]
}
}
newCondition := false
// If the NodeMemoryPressure condition doesn't exist, create one
if condition == nil {
condition = & api . NodeCondition {
Type : api . NodeMemoryPressure ,
Status : api . ConditionUnknown ,
}
// cannot be appended to node.Status.Conditions here because it gets
// copied to the slice. So if we append to the slice here none of the
// updates we make below are reflected in the slice.
newCondition = true
}
// Update the heartbeat time
condition . LastHeartbeatTime = currentTime
// Note: The conditions below take care of the case when a new NodeMemoryPressure condition is
// created and as well as the case when the condition already exists. When a new condition
// is created its status is set to api.ConditionUnknown which matches either
// condition.Status != api.ConditionTrue or
// condition.Status != api.ConditionFalse in the conditions below depending on whether
// the kubelet is under memory pressure or not.
if kl . evictionManager . IsUnderMemoryPressure ( ) {
if condition . Status != api . ConditionTrue {
condition . Status = api . ConditionTrue
condition . Reason = "KubeletHasInsufficientMemory"
condition . Message = "kubelet has insufficient memory available"
condition . LastTransitionTime = currentTime
kl . recordNodeStatusEvent ( api . EventTypeNormal , "NodeHasInsufficientMemory" )
}
} else {
if condition . Status != api . ConditionFalse {
condition . Status = api . ConditionFalse
condition . Reason = "KubeletHasSufficientMemory"
condition . Message = "kubelet has sufficient memory available"
condition . LastTransitionTime = currentTime
kl . recordNodeStatusEvent ( api . EventTypeNormal , "NodeHasSufficientMemory" )
}
}
if newCondition {
node . Status . Conditions = append ( node . Status . Conditions , * condition )
}
}
2015-11-12 06:25:59 +00:00
// Set OODcondition for the node.
func ( kl * Kubelet ) setNodeOODCondition ( node * api . Node ) {
2016-01-04 20:03:28 +00:00
currentTime := unversioned . NewTime ( kl . clock . Now ( ) )
2015-10-22 19:14:56 +00:00
var nodeOODCondition * api . NodeCondition
// Check if NodeOutOfDisk condition already exists and if it does, just pick it up for update.
for i := range node . Status . Conditions {
if node . Status . Conditions [ i ] . Type == api . NodeOutOfDisk {
nodeOODCondition = & node . Status . Conditions [ i ]
}
}
newOODCondition := false
// If the NodeOutOfDisk condition doesn't exist, create one.
if nodeOODCondition == nil {
nodeOODCondition = & api . NodeCondition {
2016-01-04 20:03:28 +00:00
Type : api . NodeOutOfDisk ,
Status : api . ConditionUnknown ,
2015-10-22 19:14:56 +00:00
}
// nodeOODCondition cannot be appended to node.Status.Conditions here because it gets
// copied to the slice. So if we append nodeOODCondition to the slice here none of the
// updates we make to nodeOODCondition below are reflected in the slice.
newOODCondition = true
}
// Update the heartbeat time irrespective of all the conditions.
nodeOODCondition . LastHeartbeatTime = currentTime
// Note: The conditions below take care of the case when a new NodeOutOfDisk condition is
// created and as well as the case when the condition already exists. When a new condition
// is created its status is set to api.ConditionUnknown which matches either
// nodeOODCondition.Status != api.ConditionTrue or
// nodeOODCondition.Status != api.ConditionFalse in the conditions below depending on whether
// the kubelet is out of disk or not.
if kl . isOutOfDisk ( ) {
if nodeOODCondition . Status != api . ConditionTrue {
nodeOODCondition . Status = api . ConditionTrue
nodeOODCondition . Reason = "KubeletOutOfDisk"
nodeOODCondition . Message = "out of disk space"
nodeOODCondition . LastTransitionTime = currentTime
2015-11-13 22:30:01 +00:00
kl . recordNodeStatusEvent ( api . EventTypeNormal , "NodeOutOfDisk" )
2015-10-22 19:14:56 +00:00
}
} else {
if nodeOODCondition . Status != api . ConditionFalse {
2016-01-04 20:03:28 +00:00
// Update the out of disk condition when the condition status is unknown even if we
// are within the outOfDiskTransitionFrequency duration. We do this to set the
// condition status correctly at kubelet startup.
if nodeOODCondition . Status == api . ConditionUnknown || kl . clock . Since ( nodeOODCondition . LastTransitionTime . Time ) >= kl . outOfDiskTransitionFrequency {
nodeOODCondition . Status = api . ConditionFalse
nodeOODCondition . Reason = "KubeletHasSufficientDisk"
nodeOODCondition . Message = "kubelet has sufficient disk space available"
nodeOODCondition . LastTransitionTime = currentTime
kl . recordNodeStatusEvent ( api . EventTypeNormal , "NodeHasSufficientDisk" )
} else {
glog . Infof ( "Node condition status for OutOfDisk is false, but last transition time is less than %s" , kl . outOfDiskTransitionFrequency )
}
2015-10-22 19:14:56 +00:00
}
}
if newOODCondition {
node . Status . Conditions = append ( node . Status . Conditions , * nodeOODCondition )
}
2015-11-12 06:25:59 +00:00
}
2015-10-22 19:14:56 +00:00
2015-11-12 06:25:59 +00:00
// Maintains Node.Spec.Unschedulable value from previous run of tryUpdateNodeStatus()
var oldNodeUnschedulable bool
2015-11-17 21:05:53 +00:00
2015-11-12 06:25:59 +00:00
// record if node schedulable change.
2016-04-26 21:29:44 +00:00
func ( kl * Kubelet ) recordNodeSchedulableEvent ( node * api . Node ) {
2015-09-21 18:06:38 +00:00
if oldNodeUnschedulable != node . Spec . Unschedulable {
if node . Spec . Unschedulable {
2015-11-13 22:30:01 +00:00
kl . recordNodeStatusEvent ( api . EventTypeNormal , kubecontainer . NodeNotSchedulable )
2015-09-21 18:06:38 +00:00
} else {
2015-11-13 22:30:01 +00:00
kl . recordNodeStatusEvent ( api . EventTypeNormal , kubecontainer . NodeSchedulable )
2015-09-21 18:06:38 +00:00
}
oldNodeUnschedulable = node . Spec . Unschedulable
}
2015-11-12 06:25:59 +00:00
}
2016-05-23 20:37:30 +00:00
// Update VolumesInUse field in Node Status
func ( kl * Kubelet ) setNodeVolumesInUseStatus ( node * api . Node ) {
node . Status . VolumesInUse = kl . volumeManager . GetVolumesInUse ( )
}
2015-11-12 06:25:59 +00:00
// setNodeStatus fills in the Status fields of the given Node, overwriting
// any fields that are currently set.
// TODO(madhusudancs): Simplify the logic for setting node conditions and
// refactor the node status condtion code out to a different file.
func ( kl * Kubelet ) setNodeStatus ( node * api . Node ) error {
2016-02-19 01:54:48 +00:00
for _ , f := range kl . setNodeStatusFuncs {
if err := f ( node ) ; err != nil {
return err
}
2015-11-12 06:25:59 +00:00
}
2015-09-21 18:06:38 +00:00
return nil
}
2016-02-19 01:54:48 +00:00
// defaultNodeStatusFuncs is a factory that generates the default set of setNodeStatus funcs
func ( kl * Kubelet ) defaultNodeStatusFuncs ( ) [ ] func ( * api . Node ) error {
// initial set of node status update handlers, can be modified by Option's
withoutError := func ( f func ( * api . Node ) ) func ( * api . Node ) error {
return func ( n * api . Node ) error {
f ( n )
return nil
}
}
return [ ] func ( * api . Node ) error {
kl . setNodeAddress ,
withoutError ( kl . setNodeStatusInfo ) ,
withoutError ( kl . setNodeOODCondition ) ,
2016-05-13 03:35:18 +00:00
withoutError ( kl . setNodeMemoryPressureCondition ) ,
2016-02-19 01:54:48 +00:00
withoutError ( kl . setNodeReadyCondition ) ,
2016-05-23 20:37:30 +00:00
withoutError ( kl . setNodeVolumesInUseStatus ) ,
2016-04-26 21:29:44 +00:00
withoutError ( kl . recordNodeSchedulableEvent ) ,
2016-02-19 01:54:48 +00:00
}
}
// SetNodeStatus returns a functional Option that adds the given node status update handler to the Kubelet
func SetNodeStatus ( f func ( * api . Node ) error ) Option {
return func ( k * Kubelet ) {
k . setNodeStatusFuncs = append ( k . setNodeStatusFuncs , f )
}
}
2015-09-21 18:06:38 +00:00
// tryUpdateNodeStatus tries to update node status to master. If ReconcileCBR0
// is set, this function will also confirm that cbr0 is configured correctly.
func ( kl * Kubelet ) tryUpdateNodeStatus ( ) error {
2016-02-03 21:21:05 +00:00
node , err := kl . kubeClient . Core ( ) . Nodes ( ) . Get ( kl . nodeName )
2015-09-21 18:06:38 +00:00
if err != nil {
return fmt . Errorf ( "error getting node %q: %v" , kl . nodeName , err )
}
if node == nil {
return fmt . Errorf ( "no node instance returned for %q" , kl . nodeName )
}
2016-05-23 20:37:30 +00:00
2015-11-24 02:11:51 +00:00
// Flannel is the authoritative source of pod CIDR, if it's running.
// This is a short term compromise till we get flannel working in
// reservation mode.
if kl . flannelExperimentalOverlay {
flannelPodCIDR := kl . runtimeState . podCIDR ( )
if node . Spec . PodCIDR != flannelPodCIDR {
node . Spec . PodCIDR = flannelPodCIDR
glog . Infof ( "Updating podcidr to %v" , node . Spec . PodCIDR )
2016-02-03 21:21:05 +00:00
if updatedNode , err := kl . kubeClient . Core ( ) . Nodes ( ) . Update ( node ) ; err != nil {
2015-11-24 02:11:51 +00:00
glog . Warningf ( "Failed to update podCIDR: %v" , err )
} else {
// Update the node resourceVersion so the status update doesn't fail.
node = updatedNode
}
}
2015-11-21 03:41:32 +00:00
} else if kl . reconcileCIDR {
2016-01-27 04:02:59 +00:00
kl . updatePodCIDR ( node . Spec . PodCIDR )
2015-09-16 04:53:33 +00:00
}
2015-09-21 18:06:38 +00:00
if err := kl . setNodeStatus ( node ) ; err != nil {
return err
}
// Update the current status on the API server
2016-06-27 00:33:01 +00:00
updatedNode , err := kl . kubeClient . Core ( ) . Nodes ( ) . UpdateStatus ( node )
if err == nil {
kl . volumeManager . MarkVolumesAsReportedInUse (
updatedNode . Status . VolumesInUse )
}
2015-09-21 18:06:38 +00:00
return err
}
2015-06-12 11:11:53 +00:00
// GetPhase returns the phase of a pod given its container info.
// This func is exported to simplify integration with 3rd party kubelet
// integrations like kubernetes-mesos.
func GetPhase ( spec * api . PodSpec , info [ ] api . ContainerStatus ) api . PodPhase {
2016-03-29 03:08:54 +00:00
initialized := 0
pendingInitialization := 0
failedInitialization := 0
for _ , container := range spec . InitContainers {
containerStatus , ok := api . GetContainerStatus ( info , container . Name )
if ! ok {
pendingInitialization ++
continue
}
switch {
case containerStatus . State . Running != nil :
pendingInitialization ++
case containerStatus . State . Terminated != nil :
if containerStatus . State . Terminated . ExitCode == 0 {
initialized ++
} else {
failedInitialization ++
}
case containerStatus . State . Waiting != nil :
if containerStatus . LastTerminationState . Terminated != nil {
if containerStatus . LastTerminationState . Terminated . ExitCode == 0 {
initialized ++
} else {
failedInitialization ++
}
} else {
pendingInitialization ++
}
default :
pendingInitialization ++
}
}
unknown := 0
2015-01-28 17:56:35 +00:00
running := 0
waiting := 0
stopped := 0
failed := 0
succeeded := 0
for _ , container := range spec . Containers {
2015-12-04 22:46:36 +00:00
containerStatus , ok := api . GetContainerStatus ( info , container . Name )
if ! ok {
unknown ++
continue
}
switch {
case containerStatus . State . Running != nil :
running ++
case containerStatus . State . Terminated != nil :
stopped ++
if containerStatus . State . Terminated . ExitCode == 0 {
succeeded ++
} else {
failed ++
}
case containerStatus . State . Waiting != nil :
if containerStatus . LastTerminationState . Terminated != nil {
2015-01-28 17:56:35 +00:00
stopped ++
} else {
2015-12-04 22:46:36 +00:00
waiting ++
2015-01-28 17:56:35 +00:00
}
2015-12-04 22:46:36 +00:00
default :
2015-01-28 17:56:35 +00:00
unknown ++
}
}
2015-12-04 22:46:36 +00:00
2016-03-29 03:08:54 +00:00
if failedInitialization > 0 && spec . RestartPolicy == api . RestartPolicyNever {
return api . PodFailed
}
2015-01-28 17:56:35 +00:00
switch {
2016-03-29 03:08:54 +00:00
case pendingInitialization > 0 :
fallthrough
2015-01-28 17:56:35 +00:00
case waiting > 0 :
2015-02-09 21:55:36 +00:00
glog . V ( 5 ) . Infof ( "pod waiting > 0, pending" )
2015-01-28 17:56:35 +00:00
// One or more containers has not been started
return api . PodPending
case running > 0 && unknown == 0 :
// All containers have been started, and at least
// one container is running
return api . PodRunning
case running == 0 && stopped > 0 && unknown == 0 :
// All containers are terminated
2015-03-14 01:38:07 +00:00
if spec . RestartPolicy == api . RestartPolicyAlways {
2015-01-28 17:56:35 +00:00
// All containers are in the process of restarting
return api . PodRunning
}
if stopped == succeeded {
// RestartPolicy is not Always, and all
// containers are terminated in success
return api . PodSucceeded
}
2015-03-14 01:38:07 +00:00
if spec . RestartPolicy == api . RestartPolicyNever {
2015-01-28 17:56:35 +00:00
// RestartPolicy is Never, and all containers are
// terminated with at least one in failure
return api . PodFailed
}
// RestartPolicy is OnFailure, and at least one in failure
// and in the process of restarting
return api . PodRunning
default :
2015-02-09 21:55:36 +00:00
glog . V ( 5 ) . Infof ( "pod default case, pending" )
2015-01-28 17:56:35 +00:00
return api . PodPending
}
}
2016-04-28 04:26:36 +00:00
// generateAPIPodStatus creates the final API pod status for a pod, given the
// internal pod status.
2016-02-13 05:56:12 +00:00
func ( kl * Kubelet ) generateAPIPodStatus ( pod * api . Pod , podStatus * kubecontainer . PodStatus ) api . PodStatus {
2015-12-07 21:31:02 +00:00
glog . V ( 3 ) . Infof ( "Generating status for %q" , format . Pod ( pod ) )
2016-04-15 18:17:17 +00:00
// check if an internal module has requested the pod is evicted.
for _ , podSyncHandler := range kl . PodSyncHandlers {
if result := podSyncHandler . ShouldEvict ( pod ) ; result . Evict {
return api . PodStatus {
Phase : api . PodFailed ,
Reason : result . Reason ,
Message : result . Message ,
}
}
}
2016-01-21 01:24:31 +00:00
s := kl . convertStatusToAPIStatus ( pod , podStatus )
2015-02-09 21:55:36 +00:00
2015-02-09 21:55:36 +00:00
// Assume info is ready to process
2016-01-20 02:15:10 +00:00
spec := & pod . Spec
2016-03-29 03:08:54 +00:00
allStatus := append ( append ( [ ] api . ContainerStatus { } , s . ContainerStatuses ... ) , s . InitContainerStatuses ... )
s . Phase = GetPhase ( spec , allStatus )
2016-01-20 02:15:10 +00:00
kl . probeManager . UpdatePodStatus ( pod . UID , s )
2016-03-29 03:08:54 +00:00
s . Conditions = append ( s . Conditions , status . GeneratePodInitializedCondition ( spec , s . InitContainerStatuses , s . Phase ) )
2016-01-20 02:15:10 +00:00
s . Conditions = append ( s . Conditions , status . GeneratePodReadyCondition ( spec , s . ContainerStatuses , s . Phase ) )
2016-04-18 12:26:16 +00:00
// s (the PodStatus we are creating) will not have a PodScheduled condition yet, because converStatusToAPIStatus()
// does not create one. If the existing PodStatus has a PodScheduled condition, then copy it into s and make sure
// it is set to true. If the existing PodStatus does not have a PodScheduled condition, then create one that is set to true.
if _ , oldPodScheduled := api . GetPodCondition ( & pod . Status , api . PodScheduled ) ; oldPodScheduled != nil {
s . Conditions = append ( s . Conditions , * oldPodScheduled )
}
api . UpdatePodCondition ( & pod . Status , & api . PodCondition {
Type : api . PodScheduled ,
Status : api . ConditionTrue ,
} )
2015-04-08 18:53:31 +00:00
2015-06-12 17:20:26 +00:00
if ! kl . standaloneMode {
2016-06-15 23:28:37 +00:00
hostIP , err := kl . getHostIPAnyWay ( )
2015-06-12 17:20:26 +00:00
if err != nil {
glog . V ( 4 ) . Infof ( "Cannot get host IP: %v" , err )
} else {
2016-01-20 02:15:10 +00:00
s . HostIP = hostIP . String ( )
if podUsesHostNetwork ( pod ) && s . PodIP == "" {
s . PodIP = hostIP . String ( )
2015-06-18 18:30:59 +00:00
}
2015-06-12 17:20:26 +00:00
}
2015-03-24 12:35:38 +00:00
}
2015-01-28 17:56:35 +00:00
2016-01-27 01:12:12 +00:00
return * s
2014-07-15 17:26:56 +00:00
}
2016-04-28 04:26:36 +00:00
// convertStatusToAPIStatus creates an api PodStatus for the given pod from
// the given internal pod status. It is purely transformative and does not
// alter the kubelet state at all.
2016-01-18 01:21:08 +00:00
func ( kl * Kubelet ) convertStatusToAPIStatus ( pod * api . Pod , podStatus * kubecontainer . PodStatus ) * api . PodStatus {
var apiPodStatus api . PodStatus
2016-02-13 05:56:12 +00:00
apiPodStatus . PodIP = podStatus . IP
2016-01-18 01:21:08 +00:00
2016-03-29 03:08:54 +00:00
apiPodStatus . ContainerStatuses = kl . convertToAPIContainerStatuses (
pod , podStatus ,
pod . Status . ContainerStatuses ,
pod . Spec . Containers ,
len ( pod . Spec . InitContainers ) > 0 ,
false ,
)
apiPodStatus . InitContainerStatuses = kl . convertToAPIContainerStatuses (
pod , podStatus ,
pod . Status . InitContainerStatuses ,
pod . Spec . InitContainers ,
len ( pod . Spec . InitContainers ) > 0 ,
true ,
)
return & apiPodStatus
}
func ( kl * Kubelet ) convertToAPIContainerStatuses ( pod * api . Pod , podStatus * kubecontainer . PodStatus , previousStatus [ ] api . ContainerStatus , containers [ ] api . Container , hasInitContainers , isInitContainer bool ) [ ] api . ContainerStatus {
2016-01-18 01:21:08 +00:00
convertContainerStatus := func ( cs * kubecontainer . ContainerStatus ) * api . ContainerStatus {
cid := cs . ID . String ( )
status := & api . ContainerStatus {
Name : cs . Name ,
2016-04-27 04:35:14 +00:00
RestartCount : int32 ( cs . RestartCount ) ,
2016-01-18 01:21:08 +00:00
Image : cs . Image ,
ImageID : cs . ImageID ,
ContainerID : cid ,
}
switch cs . State {
case kubecontainer . ContainerStateRunning :
status . State . Running = & api . ContainerStateRunning { StartedAt : unversioned . NewTime ( cs . StartedAt ) }
case kubecontainer . ContainerStateExited :
status . State . Terminated = & api . ContainerStateTerminated {
2016-04-27 04:35:14 +00:00
ExitCode : int32 ( cs . ExitCode ) ,
2016-01-18 01:21:08 +00:00
Reason : cs . Reason ,
Message : cs . Message ,
StartedAt : unversioned . NewTime ( cs . StartedAt ) ,
FinishedAt : unversioned . NewTime ( cs . FinishedAt ) ,
ContainerID : cid ,
}
default :
status . State . Waiting = & api . ContainerStateWaiting { }
}
return status
}
2016-02-13 05:56:12 +00:00
// Fetch old containers statuses from old pod status.
2016-03-29 03:08:54 +00:00
oldStatuses := make ( map [ string ] api . ContainerStatus , len ( containers ) )
for _ , status := range previousStatus {
2016-02-13 05:56:12 +00:00
oldStatuses [ status . Name ] = status
}
2016-02-26 00:18:34 +00:00
2016-02-13 05:56:12 +00:00
// Set all container statuses to default waiting state
2016-03-29 03:08:54 +00:00
statuses := make ( map [ string ] * api . ContainerStatus , len ( containers ) )
2016-02-13 05:56:12 +00:00
defaultWaitingState := api . ContainerState { Waiting : & api . ContainerStateWaiting { Reason : "ContainerCreating" } }
2016-03-29 03:08:54 +00:00
if hasInitContainers {
defaultWaitingState = api . ContainerState { Waiting : & api . ContainerStateWaiting { Reason : "PodInitializing" } }
}
for _ , container := range containers {
2016-02-13 05:56:12 +00:00
status := & api . ContainerStatus {
Name : container . Name ,
Image : container . Image ,
State : defaultWaitingState ,
}
// Apply some values from the old statuses as the default values.
if oldStatus , found := oldStatuses [ container . Name ] ; found {
status . RestartCount = oldStatus . RestartCount
status . LastTerminationState = oldStatus . LastTerminationState
}
statuses [ container . Name ] = status
2016-01-18 01:21:08 +00:00
}
2016-02-13 05:56:12 +00:00
// Make the latest container status comes first.
sort . Sort ( sort . Reverse ( kubecontainer . SortContainerStatusesByCreationTime ( podStatus . ContainerStatuses ) ) )
// Set container statuses according to the statuses seen in pod status
containerSeen := map [ string ] int { }
for _ , cStatus := range podStatus . ContainerStatuses {
cName := cStatus . Name
if _ , ok := statuses [ cName ] ; ! ok {
2016-01-18 01:21:08 +00:00
// This would also ignore the infra container.
continue
}
2016-02-13 05:56:12 +00:00
if containerSeen [ cName ] >= 2 {
2016-01-18 01:21:08 +00:00
continue
}
2016-02-13 05:56:12 +00:00
status := convertContainerStatus ( cStatus )
if containerSeen [ cName ] == 0 {
2016-01-18 01:21:08 +00:00
statuses [ cName ] = status
2016-02-13 05:56:12 +00:00
} else {
statuses [ cName ] . LastTerminationState = status . State
2016-01-18 01:21:08 +00:00
}
2016-02-13 05:56:12 +00:00
containerSeen [ cName ] = containerSeen [ cName ] + 1
2016-01-18 01:21:08 +00:00
}
2016-02-13 05:56:12 +00:00
// Handle the containers failed to be started, which should be in Waiting state.
2016-03-29 03:08:54 +00:00
for _ , container := range containers {
2016-05-19 20:07:17 +00:00
if isInitContainer {
// If the init container is terminated with exit code 0, it won't be restarted.
// TODO(random-liu): Handle this in a cleaner way.
s := podStatus . FindContainerStatusByName ( container . Name )
if s != nil && s . State == kubecontainer . ContainerStateExited && s . ExitCode == 0 {
continue
}
}
2016-02-13 05:56:12 +00:00
// If a container should be restarted in next syncpod, it is *Waiting*.
if ! kubecontainer . ShouldContainerBeRestarted ( & container , pod , podStatus ) {
2016-01-18 01:21:08 +00:00
continue
}
2016-02-13 05:56:12 +00:00
status := statuses [ container . Name ]
2016-03-29 03:08:54 +00:00
reason , message , ok := kl . reasonCache . Get ( pod . UID , container . Name )
2016-01-18 01:21:08 +00:00
if ! ok {
2016-02-13 05:56:12 +00:00
// In fact, we could also apply Waiting state here, but it is less informative,
// and the container will be restarted soon, so we prefer the original state here.
// Note that with the current implementation of ShouldContainerBeRestarted the original state here
// could be:
// * Waiting: There is no associated historical container and start failure reason record.
// * Terminated: The container is terminated.
continue
}
if status . State . Terminated != nil {
status . LastTerminationState = status . State
2016-01-18 01:21:08 +00:00
}
2016-02-13 05:56:12 +00:00
status . State = api . ContainerState {
Waiting : & api . ContainerStateWaiting {
Reason : reason . Error ( ) ,
Message : message ,
} ,
}
statuses [ container . Name ] = status
2016-01-18 01:21:08 +00:00
}
2016-03-29 03:08:54 +00:00
var containerStatuses [ ] api . ContainerStatus
2016-02-13 05:56:12 +00:00
for _ , status := range statuses {
2016-03-29 03:08:54 +00:00
containerStatuses = append ( containerStatuses , * status )
2016-01-18 01:21:08 +00:00
}
// Sort the container statuses since clients of this interface expect the list
// of containers in a pod has a deterministic order.
2016-06-06 10:30:56 +00:00
if isInitContainer {
kubetypes . SortInitContainerStatuses ( pod , containerStatuses )
} else {
sort . Sort ( kubetypes . SortedContainerStatuses ( containerStatuses ) )
}
2016-03-29 03:08:54 +00:00
return containerStatuses
2016-01-18 01:21:08 +00:00
}
2014-07-15 07:04:30 +00:00
// Returns logs of current machine.
func ( kl * Kubelet ) ServeLogs ( w http . ResponseWriter , req * http . Request ) {
// TODO: whitelist logs we are willing to serve
2014-07-22 21:40:59 +00:00
kl . logServer . ServeHTTP ( w , req )
2014-07-15 07:04:30 +00:00
}
2014-08-07 18:15:11 +00:00
2015-04-06 23:58:34 +00:00
// findContainer finds and returns the container with the given pod ID, full name, and container name.
// It returns nil if not found.
func ( kl * Kubelet ) findContainer ( podFullName string , podUID types . UID , containerName string ) ( * kubecontainer . Container , error ) {
2015-05-01 22:25:11 +00:00
pods , err := kl . containerRuntime . GetPods ( false )
2015-04-06 23:58:34 +00:00
if err != nil {
return nil , err
}
pod := kubecontainer . Pods ( pods ) . FindPod ( podFullName , podUID )
return pod . FindContainerByName ( containerName ) , nil
}
2014-08-07 18:15:11 +00:00
// Run a command in a container, returns the combined stdout, stderr as an array of bytes
2015-04-06 23:58:34 +00:00
func ( kl * Kubelet ) RunInContainer ( podFullName string , podUID types . UID , containerName string , cmd [ ] string ) ( [ ] byte , error ) {
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-04-06 23:58:34 +00:00
container , err := kl . findContainer ( podFullName , podUID , containerName )
2014-08-07 18:15:11 +00:00
if err != nil {
return nil , err
}
2015-04-06 23:58:34 +00:00
if container == nil {
return nil , fmt . Errorf ( "container not found (%q)" , containerName )
2014-08-07 18:15:11 +00:00
}
2016-04-28 09:33:15 +00:00
var buffer bytes . Buffer
output := ioutils . WriteCloserWrapper ( & buffer )
err = kl . runner . ExecInContainer ( container . ID , cmd , nil , output , output , false )
if err != nil {
return nil , err
}
return buffer . Bytes ( ) , nil
2014-08-07 18:15:11 +00:00
}
2014-11-10 21:13:57 +00:00
2015-01-08 20:41:38 +00:00
// ExecInContainer executes a command in a container, connecting the supplied
// stdin/stdout/stderr to the command's IO streams.
2015-04-06 23:58:34 +00:00
func ( kl * Kubelet ) ExecInContainer ( podFullName string , podUID types . UID , containerName string , cmd [ ] string , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool ) error {
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-04-06 23:58:34 +00:00
container , err := kl . findContainer ( podFullName , podUID , containerName )
2015-01-08 20:41:38 +00:00
if err != nil {
return err
}
2015-04-06 23:58:34 +00:00
if container == nil {
return fmt . Errorf ( "container not found (%q)" , containerName )
2015-01-08 20:41:38 +00:00
}
2015-10-07 17:58:05 +00:00
return kl . runner . ExecInContainer ( container . ID , cmd , stdin , stdout , stderr , tty )
2015-01-08 20:41:38 +00:00
}
2016-04-28 04:26:36 +00:00
// AttachContainer uses the container runtime to attach the given streams to
// the given container.
2015-07-28 04:48:55 +00:00
func ( kl * Kubelet ) AttachContainer ( podFullName string , podUID types . UID , containerName string , stdin io . Reader , stdout , stderr io . WriteCloser , tty bool ) error {
podUID = kl . podManager . TranslatePodUID ( podUID )
container , err := kl . findContainer ( podFullName , podUID , containerName )
if err != nil {
return err
}
if container == nil {
return fmt . Errorf ( "container not found (%q)" , containerName )
}
2015-10-07 17:58:05 +00:00
return kl . containerRuntime . AttachContainer ( container . ID , stdin , stdout , stderr , tty )
2015-07-28 04:48:55 +00:00
}
2015-01-08 20:41:38 +00:00
// PortForward connects to the pod's port and copies data between the port
// and the stream.
2015-04-06 23:58:34 +00:00
func ( kl * Kubelet ) PortForward ( podFullName string , podUID types . UID , port uint16 , stream io . ReadWriteCloser ) error {
podUID = kl . podManager . TranslatePodUID ( podUID )
2015-03-20 20:55:26 +00:00
2015-05-01 22:25:11 +00:00
pods , err := kl . containerRuntime . GetPods ( false )
2015-01-08 20:41:38 +00:00
if err != nil {
return err
}
2015-04-06 23:58:34 +00:00
pod := kubecontainer . Pods ( pods ) . FindPod ( podFullName , podUID )
2015-06-05 21:10:45 +00:00
if pod . IsEmpty ( ) {
return fmt . Errorf ( "pod not found (%q)" , podFullName )
}
2015-04-06 23:58:34 +00:00
return kl . runner . PortForward ( & pod , port , stream )
2015-01-08 20:41:38 +00:00
}
2015-03-30 13:20:20 +00:00
// BirthCry sends an event that the kubelet has started up.
func ( kl * Kubelet ) BirthCry ( ) {
// Make an event that kubelet restarted.
2015-11-13 22:30:01 +00:00
kl . recorder . Eventf ( kl . nodeRef , api . EventTypeNormal , kubecontainer . StartingKubelet , "Starting kubelet." )
2014-11-10 21:13:57 +00:00
}
2015-01-08 20:41:38 +00:00
2016-04-28 04:26:36 +00:00
// StreamingConnectionIdleTimeout returns the timeout for streaming connections to the HTTP server.
2015-01-08 20:41:38 +00:00
func ( kl * Kubelet ) StreamingConnectionIdleTimeout ( ) time . Duration {
return kl . streamingConnectionIdleTimeout
}
2015-03-06 07:56:30 +00:00
2016-04-28 04:26:36 +00:00
// ResyncInterval returns the interval used for periodic syncs.
2015-06-17 22:31:46 +00:00
func ( kl * Kubelet ) ResyncInterval ( ) time . Duration {
return kl . resyncInterval
}
2016-04-28 04:26:36 +00:00
// ListenAndServe runs the kubelet HTTP server.
2015-12-10 20:14:26 +00:00
func ( kl * Kubelet ) ListenAndServe ( address net . IP , port uint , tlsOptions * server . TLSOptions , auth server . AuthInterface , enableDebuggingHandlers bool ) {
2016-03-29 00:05:02 +00:00
server . ListenAndServeKubeletServer ( kl , kl . resourceAnalyzer , address , port , tlsOptions , auth , enableDebuggingHandlers , kl . containerRuntime )
2015-03-26 12:31:54 +00:00
}
2016-04-28 04:26:36 +00:00
// ListenAndServeReadOnly runs the kubelet HTTP server in read-only mode.
2015-03-26 12:31:54 +00:00
func ( kl * Kubelet ) ListenAndServeReadOnly ( address net . IP , port uint ) {
2016-03-29 00:05:02 +00:00
server . ListenAndServeKubeletReadOnlyServer ( kl , kl . resourceAnalyzer , address , port , kl . containerRuntime )
2015-03-26 12:31:54 +00:00
}