2014-06-06 23:40:48 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2015 The Kubernetes Authors .
2014-06-06 23:40:48 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubelet
import (
2017-02-17 19:32:41 +00:00
"crypto/tls"
2014-06-06 23:40:48 +00:00
"fmt"
2014-11-12 05:21:40 +00:00
"net"
2014-06-06 23:40:48 +00:00
"net/http"
2016-11-04 18:50:51 +00:00
"net/url"
2014-11-07 06:41:16 +00:00
"os"
2014-07-29 17:20:50 +00:00
"path"
2014-10-28 00:29:55 +00:00
"sort"
2014-06-06 23:40:48 +00:00
"strings"
2015-05-05 10:19:54 +00:00
"sync"
2016-05-04 04:24:56 +00:00
"sync/atomic"
2014-06-06 23:40:48 +00:00
"time"
2015-08-05 22:05:17 +00:00
"github.com/golang/glog"
2017-01-04 20:13:06 +00:00
clientgoclientset "k8s.io/client-go/kubernetes"
2015-10-16 03:00:28 +00:00
cadvisorapi "github.com/google/cadvisor/info/v1"
2017-05-25 19:29:19 +00:00
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
2017-06-22 18:24:23 +00:00
"k8s.io/api/core/v1"
clientv1 "k8s.io/api/core/v1"
2017-01-22 03:36:02 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2017-01-19 14:50:16 +00:00
"k8s.io/apimachinery/pkg/fields"
2017-01-11 14:09:48 +00:00
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
2017-05-19 17:57:39 +00:00
"k8s.io/apimachinery/pkg/util/clock"
2017-01-11 14:09:48 +00:00
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
2017-01-20 13:05:41 +00:00
utilfeature "k8s.io/apiserver/pkg/util/feature"
2017-01-30 18:39:54 +00:00
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
2017-01-24 14:11:51 +00:00
"k8s.io/client-go/tools/cache"
2017-01-30 18:39:54 +00:00
"k8s.io/client-go/tools/record"
2017-01-23 18:37:22 +00:00
"k8s.io/client-go/util/flowcontrol"
"k8s.io/client-go/util/integer"
2017-05-24 22:19:54 +00:00
"k8s.io/kubernetes/cmd/kubelet/app/options"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2016-02-16 18:23:58 +00:00
"k8s.io/kubernetes/pkg/apis/componentconfig"
2016-09-06 16:55:36 +00:00
componentconfigv1alpha1 "k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1"
2017-01-10 08:49:34 +00:00
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
2017-02-21 20:00:57 +00:00
corelisters "k8s.io/kubernetes/pkg/client/listers/core/v1"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/cloudprovider"
2017-01-20 13:05:41 +00:00
"k8s.io/kubernetes/pkg/features"
2017-02-23 00:05:05 +00:00
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
2017-02-17 19:32:41 +00:00
"k8s.io/kubernetes/pkg/kubelet/certificate"
2015-10-10 00:09:53 +00:00
"k8s.io/kubernetes/pkg/kubelet/cm"
2016-04-26 17:58:12 +00:00
"k8s.io/kubernetes/pkg/kubelet/config"
2017-05-25 21:23:57 +00:00
"k8s.io/kubernetes/pkg/kubelet/configmap"
2015-08-05 22:03:47 +00:00
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
2016-09-16 01:25:18 +00:00
"k8s.io/kubernetes/pkg/kubelet/dockershim"
2017-05-03 17:46:35 +00:00
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
2016-10-04 03:49:19 +00:00
dockerremote "k8s.io/kubernetes/pkg/kubelet/dockershim/remote"
2016-07-13 00:32:24 +00:00
"k8s.io/kubernetes/pkg/kubelet/events"
2016-04-25 19:48:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/eviction"
2017-02-26 05:16:13 +00:00
"k8s.io/kubernetes/pkg/kubelet/gpu"
2016-12-03 07:12:38 +00:00
"k8s.io/kubernetes/pkg/kubelet/gpu/nvidia"
2016-07-19 22:42:21 +00:00
"k8s.io/kubernetes/pkg/kubelet/images"
2016-08-08 07:40:53 +00:00
"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
2016-04-15 18:17:17 +00:00
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/network"
2015-08-07 21:42:21 +00:00
"k8s.io/kubernetes/pkg/kubelet/pleg"
2015-10-12 23:28:23 +00:00
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
2017-02-23 18:31:20 +00:00
"k8s.io/kubernetes/pkg/kubelet/preemption"
2015-08-25 17:39:41 +00:00
"k8s.io/kubernetes/pkg/kubelet/prober"
2015-10-19 22:15:59 +00:00
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
2016-08-08 07:40:53 +00:00
"k8s.io/kubernetes/pkg/kubelet/remote"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/rkt"
2016-11-18 12:14:03 +00:00
"k8s.io/kubernetes/pkg/kubelet/secret"
2015-12-10 20:14:26 +00:00
"k8s.io/kubernetes/pkg/kubelet/server"
2016-01-14 19:19:26 +00:00
"k8s.io/kubernetes/pkg/kubelet/server/stats"
2016-11-04 18:50:51 +00:00
"k8s.io/kubernetes/pkg/kubelet/server/streaming"
2015-09-11 19:22:01 +00:00
"k8s.io/kubernetes/pkg/kubelet/status"
2016-08-19 08:53:25 +00:00
"k8s.io/kubernetes/pkg/kubelet/sysctl"
2015-10-09 17:24:31 +00:00
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
2015-11-20 17:54:37 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/format"
2015-09-02 17:18:11 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/queue"
2016-08-22 20:38:36 +00:00
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
2016-07-02 01:50:25 +00:00
"k8s.io/kubernetes/pkg/kubelet/volumemanager"
2016-08-30 00:54:15 +00:00
"k8s.io/kubernetes/pkg/security/apparmor"
2016-07-28 20:01:00 +00:00
utildbus "k8s.io/kubernetes/pkg/util/dbus"
2016-04-15 01:01:40 +00:00
utilexec "k8s.io/kubernetes/pkg/util/exec"
2015-09-14 09:51:40 +00:00
kubeio "k8s.io/kubernetes/pkg/util/io"
2016-07-28 20:01:00 +00:00
utilipt "k8s.io/kubernetes/pkg/util/iptables"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/util/mount"
2016-07-13 23:11:12 +00:00
nodeutil "k8s.io/kubernetes/pkg/util/node"
2015-08-04 00:28:33 +00:00
"k8s.io/kubernetes/pkg/util/oom"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
2014-06-06 23:40:48 +00:00
)
2015-03-06 00:37:08 +00:00
const (
2015-05-04 20:14:55 +00:00
// Max amount of time to wait for the container runtime to come up.
2016-12-09 20:39:20 +00:00
maxWaitForContainerRuntime = 30 * time . Second
2015-02-23 21:04:45 +00:00
2015-09-21 18:06:38 +00:00
// nodeStatusUpdateRetry specifies how many times kubelet retries when posting node status failed.
nodeStatusUpdateRetry = 5
2015-04-27 20:03:55 +00:00
// Location of container logs.
2016-10-15 04:32:50 +00:00
ContainerLogsDir = "/var/log/containers"
2015-08-13 12:59:15 +00:00
2015-10-15 04:58:20 +00:00
// max backoff period, exported for the e2e test
MaxContainerBackOff = 300 * time . Second
2015-08-19 00:52:26 +00:00
// Capacity of the channel for storing pods to kill. A small number should
// suffice because a goroutine is dedicated to check the channel and does
// not block on anything else.
podKillingChannelCapacity = 50
2015-07-28 18:54:32 +00:00
2015-11-03 18:03:39 +00:00
// Period for performing global cleanup tasks.
housekeepingPeriod = time . Second * 2
2015-10-21 17:17:27 +00:00
2016-05-13 03:35:18 +00:00
// Period for performing eviction monitoring.
// TODO ensure this is in sync with internal cadvisor housekeeping.
evictionMonitoringPeriod = time . Second * 10
2016-04-28 04:26:36 +00:00
// The path in containers' filesystems where the hosts file is mounted.
2015-10-21 17:17:27 +00:00
etcHostsPath = "/etc/hosts"
2015-08-07 21:42:21 +00:00
2016-02-12 19:33:32 +00:00
// Capacity of the channel for receiving pod lifecycle events. This number
2015-08-07 21:42:21 +00:00
// is a bit arbitrary and may be adjusted in the future.
plegChannelCapacity = 1000
2015-11-09 18:01:53 +00:00
// Generic PLEG relies on relisting for discovering container events.
2016-01-20 02:15:10 +00:00
// A longer period means that kubelet will take longer to detect container
// changes and to update pod status. On the other hand, a shorter period
// will cause more frequent relisting (e.g., container runtime operations),
// leading to higher cpu usage.
// Note that even though we set the period to 1s, the relisting itself can
// take more than 1s to finish if the container runtime responds slowly
// and/or when there are many container changes in one cycle.
plegRelistPeriod = time . Second * 1
2015-11-09 18:01:53 +00:00
2016-04-28 04:26:36 +00:00
// backOffPeriod is the period to back off when pod syncing results in an
2015-11-09 18:01:53 +00:00
// error. It is also used as the base period for the exponential backoff
// container restarts and image pulls.
backOffPeriod = time . Second * 10
2016-03-02 21:29:53 +00:00
// Period for performing container garbage collection.
ContainerGCPeriod = time . Minute
// Period for performing image garbage collection.
ImageGCPeriod = 5 * time . Minute
2016-04-29 19:29:59 +00:00
2016-07-07 16:58:55 +00:00
// Minimum number of dead containers to keep in a pod
minDeadContainerInPod = 1
2015-03-06 00:37:08 +00:00
)
2015-02-20 03:17:44 +00:00
2014-07-15 20:24:41 +00:00
// SyncHandler is an interface implemented by Kubelet, for testability
type SyncHandler interface {
2016-11-18 20:50:58 +00:00
HandlePodAdditions ( pods [ ] * v1 . Pod )
HandlePodUpdates ( pods [ ] * v1 . Pod )
HandlePodRemoves ( pods [ ] * v1 . Pod )
HandlePodReconcile ( pods [ ] * v1 . Pod )
HandlePodSyncs ( pods [ ] * v1 . Pod )
2015-08-19 00:52:26 +00:00
HandlePodCleanups ( ) error
2014-07-15 20:24:41 +00:00
}
2016-02-19 01:54:48 +00:00
// Option is a functional option type for Kubelet
type Option func ( * Kubelet )
2016-07-13 23:11:12 +00:00
// bootstrapping interface for kubelet, targets the initialization protocol
type KubeletBootstrap interface {
2016-08-30 01:03:34 +00:00
GetConfiguration ( ) componentconfig . KubeletConfiguration
2016-07-13 23:11:12 +00:00
BirthCry ( )
StartGarbageCollection ( )
2017-02-28 18:43:08 +00:00
ListenAndServe ( address net . IP , port uint , tlsOptions * server . TLSOptions , auth server . AuthInterface , enableDebuggingHandlers , enableContentionProfiling bool )
2016-07-13 23:11:12 +00:00
ListenAndServeReadOnly ( address net . IP , port uint )
Run ( <- chan kubetypes . PodUpdate )
RunOnce ( <- chan kubetypes . PodUpdate ) ( [ ] RunPodResult , error )
}
// create and initialize a Kubelet instance
2017-05-24 22:19:54 +00:00
type KubeletBuilder func ( kubeCfg * componentconfig . KubeletConfiguration , kubeDeps * KubeletDeps , crOptions * options . ContainerRuntimeOptions , standaloneMode bool , hostnameOverride , nodeIP , providerID string ) ( KubeletBootstrap , error )
2016-07-13 23:11:12 +00:00
// KubeletDeps is a bin for things we might consider "injected dependencies" -- objects constructed
// at runtime that are necessary for running the Kubelet. This is a temporary solution for grouping
// these objects while we figure out a more comprehensive dependency injection story for the Kubelet.
type KubeletDeps struct {
// TODO(mtaufen): KubeletBuilder:
// Mesos currently uses this as a hook to let them make their own call to
// let them wrap the KubeletBootstrap that CreateAndInitKubelet returns with
// their own KubeletBootstrap. It's a useful hook. I need to think about what
// a nice home for it would be. There seems to be a trend, between this and
// the Options fields below, of providing hooks where you can add extra functionality
// to the Kubelet for your solution. Maybe we should centralize these sorts of things?
Builder KubeletBuilder
// TODO(mtaufen): ContainerRuntimeOptions and Options:
// Arrays of functions that can do arbitrary things to the Kubelet and the Runtime
// seem like a difficult path to trace when it's time to debug something.
// I'm leaving these fields here for now, but there is likely an easier-to-follow
// way to support their intended use cases. E.g. ContainerRuntimeOptions
// is used by Mesos to set an environment variable in containers which has
// some connection to their container GC. It seems that Mesos intends to use
// Options to add additional node conditions that are updated as part of the
// Kubelet lifecycle (see https://github.com/kubernetes/kubernetes/pull/21521).
// We should think about providing more explicit ways of doing these things.
ContainerRuntimeOptions [ ] kubecontainer . Option
Options [ ] Option
// Injected Dependencies
2017-01-04 20:13:06 +00:00
Auth server . AuthInterface
CAdvisorInterface cadvisor . Interface
Cloud cloudprovider . Interface
ContainerManager cm . ContainerManager
2017-05-03 17:46:35 +00:00
DockerClient libdocker . Interface
2017-01-30 18:39:54 +00:00
EventClient v1core . EventsGetter
2017-01-31 22:34:39 +00:00
KubeClient clientset . Interface
2017-01-04 20:13:06 +00:00
ExternalKubeClient clientgoclientset . Interface
Mounter mount . Interface
NetworkPlugins [ ] network . NetworkPlugin
OOMAdjuster * oom . OOMAdjuster
OSInterface kubecontainer . OSInterface
PodConfig * config . PodConfig
Recorder record . EventRecorder
Writer kubeio . Writer
VolumePlugins [ ] volume . VolumePlugin
TLSOptions * server . TLSOptions
2016-07-13 23:11:12 +00:00
}
2016-09-15 20:27:09 +00:00
// makePodSourceConfig creates a config.PodConfig from the given
// KubeletConfiguration or returns an error.
2016-07-16 06:10:29 +00:00
func makePodSourceConfig ( kubeCfg * componentconfig . KubeletConfiguration , kubeDeps * KubeletDeps , nodeName types . NodeName ) ( * config . PodConfig , error ) {
2016-07-13 23:11:12 +00:00
manifestURLHeader := make ( http . Header )
if kubeCfg . ManifestURLHeader != "" {
pieces := strings . Split ( kubeCfg . ManifestURLHeader , ":" )
if len ( pieces ) != 2 {
return nil , fmt . Errorf ( "manifest-url-header must have a single ':' key-value separator, got %q" , kubeCfg . ManifestURLHeader )
}
manifestURLHeader . Set ( pieces [ 0 ] , pieces [ 1 ] )
}
// source of all configuration
cfg := config . NewPodConfig ( config . PodConfigNotificationIncremental , kubeDeps . Recorder )
// define file config source
if kubeCfg . PodManifestPath != "" {
glog . Infof ( "Adding manifest file: %v" , kubeCfg . PodManifestPath )
config . NewSourceFile ( kubeCfg . PodManifestPath , nodeName , kubeCfg . FileCheckFrequency . Duration , cfg . Channel ( kubetypes . FileSource ) )
}
// define url config source
if kubeCfg . ManifestURL != "" {
glog . Infof ( "Adding manifest url %q with HTTP header %v" , kubeCfg . ManifestURL , manifestURLHeader )
config . NewSourceURL ( kubeCfg . ManifestURL , manifestURLHeader , nodeName , kubeCfg . HTTPCheckFrequency . Duration , cfg . Channel ( kubetypes . HTTPSource ) )
}
if kubeDeps . KubeClient != nil {
glog . Infof ( "Watching apiserver" )
config . NewSourceApiserver ( kubeDeps . KubeClient , nodeName , cfg . Channel ( kubetypes . ApiserverSource ) )
}
return cfg , nil
}
2016-11-30 07:27:27 +00:00
func getRuntimeAndImageServices ( config * componentconfig . KubeletConfiguration ) ( internalapi . RuntimeService , internalapi . ImageManagerService , error ) {
2016-11-04 00:06:04 +00:00
rs , err := remote . NewRemoteRuntimeService ( config . RemoteRuntimeEndpoint , config . RuntimeRequestTimeout . Duration )
if err != nil {
return nil , nil , err
}
is , err := remote . NewRemoteImageService ( config . RemoteImageEndpoint , config . RuntimeRequestTimeout . Duration )
if err != nil {
return nil , nil , err
}
return rs , is , err
}
2016-04-26 17:58:12 +00:00
// NewMainKubelet instantiates a new Kubelet object along with all the required internal modules.
2015-11-07 01:03:39 +00:00
// No initialization of Kubelet and its modules should happen here.
2017-05-24 22:19:54 +00:00
func NewMainKubelet ( kubeCfg * componentconfig . KubeletConfiguration , kubeDeps * KubeletDeps , crOptions * options . ContainerRuntimeOptions , standaloneMode bool , hostnameOverride , nodeIP , providerID string ) ( * Kubelet , error ) {
2016-07-13 23:11:12 +00:00
if kubeCfg . RootDirectory == "" {
return nil , fmt . Errorf ( "invalid root directory %q" , kubeCfg . RootDirectory )
}
if kubeCfg . SyncFrequency . Duration <= 0 {
return nil , fmt . Errorf ( "invalid sync frequency %d" , kubeCfg . SyncFrequency . Duration )
}
if kubeCfg . MakeIPTablesUtilChains {
if kubeCfg . IPTablesMasqueradeBit > 31 || kubeCfg . IPTablesMasqueradeBit < 0 {
return nil , fmt . Errorf ( "iptables-masquerade-bit is not valid. Must be within [0, 31]" )
}
if kubeCfg . IPTablesDropBit > 31 || kubeCfg . IPTablesDropBit < 0 {
return nil , fmt . Errorf ( "iptables-drop-bit is not valid. Must be within [0, 31]" )
}
if kubeCfg . IPTablesDropBit == kubeCfg . IPTablesMasqueradeBit {
return nil , fmt . Errorf ( "iptables-masquerade-bit and iptables-drop-bit must be different" )
}
}
2017-01-18 19:34:49 +00:00
hostname := nodeutil . GetHostname ( hostnameOverride )
2016-07-13 23:11:12 +00:00
// Query the cloud provider for our node name, default to hostname
2016-07-16 06:10:29 +00:00
nodeName := types . NodeName ( hostname )
2017-02-17 19:32:41 +00:00
cloudIPs := [ ] net . IP { }
cloudNames := [ ] string { }
2016-07-13 23:11:12 +00:00
if kubeDeps . Cloud != nil {
var err error
instances , ok := kubeDeps . Cloud . Instances ( )
if ! ok {
return nil , fmt . Errorf ( "failed to get instances from cloud provider" )
}
nodeName , err = instances . CurrentNodeName ( hostname )
if err != nil {
return nil , fmt . Errorf ( "error fetching current instance name from cloud provider: %v" , err )
}
glog . V ( 2 ) . Infof ( "cloud provider determined current node name to be %s" , nodeName )
2017-02-17 19:32:41 +00:00
if utilfeature . DefaultFeatureGate . Enabled ( features . RotateKubeletServerCertificate ) {
nodeAddresses , err := instances . NodeAddresses ( nodeName )
if err != nil {
return nil , fmt . Errorf ( "failed to get the addresses of the current instance from the cloud provider: %v" , err )
}
for _ , nodeAddress := range nodeAddresses {
switch nodeAddress . Type {
case v1 . NodeExternalIP , v1 . NodeInternalIP :
ip := net . ParseIP ( nodeAddress . Address )
if ip != nil && ! ip . IsLoopback ( ) {
cloudIPs = append ( cloudIPs , ip )
}
case v1 . NodeExternalDNS , v1 . NodeInternalDNS , v1 . NodeHostName :
cloudNames = append ( cloudNames , nodeAddress . Address )
}
}
}
2016-07-13 23:11:12 +00:00
}
if kubeDeps . PodConfig == nil {
var err error
kubeDeps . PodConfig , err = makePodSourceConfig ( kubeCfg , kubeDeps , nodeName )
if err != nil {
return nil , err
}
}
containerGCPolicy := kubecontainer . ContainerGCPolicy {
MinAge : kubeCfg . MinimumGCAge . Duration ,
MaxPerPodContainer : int ( kubeCfg . MaxPerPodContainerCount ) ,
MaxContainers : int ( kubeCfg . MaxContainerCount ) ,
}
2016-11-18 20:50:58 +00:00
daemonEndpoints := & v1 . NodeDaemonEndpoints {
KubeletEndpoint : v1 . DaemonEndpoint { Port : kubeCfg . Port } ,
2016-07-13 23:11:12 +00:00
}
imageGCPolicy := images . ImageGCPolicy {
MinAge : kubeCfg . ImageMinimumGCAge . Duration ,
HighThresholdPercent : int ( kubeCfg . ImageGCHighThresholdPercent ) ,
LowThresholdPercent : int ( kubeCfg . ImageGCLowThresholdPercent ) ,
}
diskSpacePolicy := DiskSpacePolicy {
DockerFreeDiskMB : int ( kubeCfg . LowDiskSpaceThresholdMB ) ,
RootFreeDiskMB : int ( kubeCfg . LowDiskSpaceThresholdMB ) ,
}
2017-03-02 01:56:24 +00:00
enforceNodeAllocatable := kubeCfg . EnforceNodeAllocatable
if kubeCfg . ExperimentalNodeAllocatableIgnoreEvictionThreshold {
// Do not provide kubeCfg.EnforceNodeAllocatable to eviction threshold parsing if we are not enforcing Evictions
enforceNodeAllocatable = [ ] string { }
}
thresholds , err := eviction . ParseThresholdConfig ( enforceNodeAllocatable , kubeCfg . EvictionHard , kubeCfg . EvictionSoft , kubeCfg . EvictionSoftGracePeriod , kubeCfg . EvictionMinimumReclaim )
2016-07-13 23:11:12 +00:00
if err != nil {
return nil , err
}
evictionConfig := eviction . Config {
PressureTransitionPeriod : kubeCfg . EvictionPressureTransitionPeriod . Duration ,
MaxPodGracePeriodSeconds : int64 ( kubeCfg . EvictionMaxPodGracePeriod ) ,
Thresholds : thresholds ,
2016-12-07 15:09:41 +00:00
KernelMemcgNotification : kubeCfg . ExperimentalKernelMemcgNotification ,
2016-07-13 23:11:12 +00:00
}
2017-02-21 20:00:57 +00:00
serviceIndexer := cache . NewIndexer ( cache . MetaNamespaceKeyFunc , cache . Indexers { cache . NamespaceIndex : cache . MetaNamespaceIndexFunc } )
2017-01-31 22:34:39 +00:00
if kubeDeps . KubeClient != nil {
serviceLW := cache . NewListWatchFromClient ( kubeDeps . KubeClient . Core ( ) . RESTClient ( ) , "services" , metav1 . NamespaceAll , fields . Everything ( ) )
2017-02-21 20:00:57 +00:00
cache . NewReflector ( serviceLW , & v1 . Service { } , serviceIndexer , 0 ) . Run ( )
2015-01-16 21:39:31 +00:00
}
2017-02-21 20:00:57 +00:00
serviceLister := corelisters . NewServiceLister ( serviceIndexer )
2015-01-08 15:25:14 +00:00
2017-02-21 20:00:57 +00:00
nodeIndexer := cache . NewIndexer ( cache . MetaNamespaceKeyFunc , cache . Indexers { } )
2017-01-31 22:34:39 +00:00
if kubeDeps . KubeClient != nil {
2016-07-16 06:10:29 +00:00
fieldSelector := fields . Set { api . ObjectNameField : string ( nodeName ) } . AsSelector ( )
2017-01-31 22:34:39 +00:00
nodeLW := cache . NewListWatchFromClient ( kubeDeps . KubeClient . Core ( ) . RESTClient ( ) , "nodes" , metav1 . NamespaceAll , fieldSelector )
2017-02-21 20:00:57 +00:00
cache . NewReflector ( nodeLW , & v1 . Node { } , nodeIndexer , 0 ) . Run ( )
2015-09-21 18:00:04 +00:00
}
2017-05-10 23:25:09 +00:00
nodeInfo := & predicates . CachedNodeInfo { NodeLister : corelisters . NewNodeLister ( nodeIndexer ) }
2015-09-21 18:00:04 +00:00
2015-09-09 14:18:17 +00:00
// TODO: get the real node object of ourself,
// and use the real node name and UID.
2015-03-27 20:12:48 +00:00
// TODO: what is namespace for node?
2017-01-31 20:59:22 +00:00
nodeRef := & clientv1 . ObjectReference {
2015-03-27 20:12:48 +00:00
Kind : "Node" ,
2016-07-16 06:10:29 +00:00
Name : string ( nodeName ) ,
2015-06-12 15:40:34 +00:00
UID : types . UID ( nodeName ) ,
2015-03-27 20:12:48 +00:00
Namespace : "" ,
}
2016-07-13 23:11:12 +00:00
diskSpaceManager , err := newDiskSpaceManager ( kubeDeps . CAdvisorInterface , diskSpacePolicy )
2015-05-12 08:24:08 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to initialize disk manager: %v" , err )
}
2015-04-23 21:16:59 +00:00
containerRefManager := kubecontainer . NewRefManager ( )
2016-07-13 23:11:12 +00:00
oomWatcher := NewOOMWatcher ( kubeDeps . CAdvisorInterface , kubeDeps . Recorder )
2015-04-11 00:29:56 +00:00
2017-01-03 09:28:38 +00:00
clusterDNS := make ( [ ] net . IP , 0 , len ( kubeCfg . ClusterDNS ) )
for _ , ipEntry := range kubeCfg . ClusterDNS {
ip := net . ParseIP ( ipEntry )
if ip == nil {
glog . Warningf ( "Invalid clusterDNS ip '%q'" , ipEntry )
} else {
clusterDNS = append ( clusterDNS , ip )
}
}
2017-05-10 23:25:09 +00:00
httpClient := & http . Client { }
2017-01-03 09:28:38 +00:00
2015-01-12 00:42:11 +00:00
klet := & Kubelet {
2015-04-17 22:54:28 +00:00
hostname : hostname ,
2015-06-12 15:40:34 +00:00
nodeName : nodeName ,
2017-01-31 22:34:39 +00:00
kubeClient : kubeDeps . KubeClient ,
2016-07-13 23:11:12 +00:00
rootDirectory : kubeCfg . RootDirectory ,
resyncInterval : kubeCfg . SyncFrequency . Duration ,
sourcesReady : config . NewSourcesReady ( kubeDeps . PodConfig . SeenAllSources ) ,
registerNode : kubeCfg . RegisterNode ,
registerSchedulable : kubeCfg . RegisterSchedulable ,
2015-06-12 17:20:26 +00:00
standaloneMode : standaloneMode ,
2016-07-13 23:11:12 +00:00
clusterDomain : kubeCfg . ClusterDomain ,
2017-01-03 09:28:38 +00:00
clusterDNS : clusterDNS ,
2015-04-17 22:54:28 +00:00
serviceLister : serviceLister ,
2015-11-26 08:57:26 +00:00
nodeInfo : nodeInfo ,
2016-07-13 23:11:12 +00:00
masterServiceNamespace : kubeCfg . MasterServiceNamespace ,
streamingConnectionIdleTimeout : kubeCfg . StreamingConnectionIdleTimeout . Duration ,
recorder : kubeDeps . Recorder ,
cadvisor : kubeDeps . CAdvisorInterface ,
2015-05-12 08:24:08 +00:00
diskSpaceManager : diskSpaceManager ,
2016-07-13 23:11:12 +00:00
cloud : kubeDeps . Cloud ,
2016-09-06 16:55:36 +00:00
autoDetectCloudProvider : ( componentconfigv1alpha1 . AutoDetectCloudProvider == kubeCfg . CloudProvider ) ,
2017-03-29 23:21:42 +00:00
externalCloudProvider : cloudprovider . IsExternal ( kubeCfg . CloudProvider ) ,
providerID : providerID ,
2016-06-29 22:30:20 +00:00
nodeRef : nodeRef ,
2016-07-13 23:11:12 +00:00
nodeLabels : kubeCfg . NodeLabels ,
nodeStatusUpdateFrequency : kubeCfg . NodeStatusUpdateFrequency . Duration ,
2017-05-10 23:25:09 +00:00
os : kubeDeps . OSInterface ,
oomWatcher : oomWatcher ,
cgroupsPerQOS : kubeCfg . CgroupsPerQOS ,
cgroupRoot : kubeCfg . CgroupRoot ,
mounter : kubeDeps . Mounter ,
writer : kubeDeps . Writer ,
maxPods : int ( kubeCfg . MaxPods ) ,
podsPerCore : int ( kubeCfg . PodsPerCore ) ,
syncLoopMonitor : atomic . Value { } ,
resolverConfig : kubeCfg . ResolverConfig ,
daemonEndpoints : daemonEndpoints ,
containerManager : kubeDeps . ContainerManager ,
nodeIP : net . ParseIP ( nodeIP ) ,
clock : clock . RealClock { } ,
2016-08-25 20:45:38 +00:00
outOfDiskTransitionFrequency : kubeCfg . OutOfDiskTransitionFrequency . Duration ,
enableControllerAttachDetach : kubeCfg . EnableControllerAttachDetach ,
iptClient : utilipt . New ( utilexec . New ( ) , utildbus . New ( ) , utilipt . ProtocolIpv4 ) ,
makeIPTablesUtilChains : kubeCfg . MakeIPTablesUtilChains ,
iptablesMasqueradeBit : int ( kubeCfg . IPTablesMasqueradeBit ) ,
iptablesDropBit : int ( kubeCfg . IPTablesDropBit ) ,
2017-01-20 13:05:41 +00:00
experimentalHostUserNamespaceDefaulting : utilfeature . DefaultFeatureGate . Enabled ( features . ExperimentalHostUserNamespaceDefaultingGate ) ,
2016-08-25 20:45:38 +00:00
}
2017-01-22 06:30:50 +00:00
secretManager := secret . NewCachingSecretManager (
2017-01-30 12:37:48 +00:00
kubeDeps . KubeClient , secret . GetObjectTTLFromNodeFunc ( klet . GetNode ) )
klet . secretManager = secretManager
2017-05-30 13:35:43 +00:00
configMapManager := configmap . NewCachingConfigMapManager (
kubeDeps . KubeClient , configmap . GetObjectTTLFromNodeFunc ( klet . GetNode ) )
2017-05-25 21:23:57 +00:00
klet . configMapManager = configMapManager
2016-08-25 20:45:38 +00:00
if klet . experimentalHostUserNamespaceDefaulting {
glog . Infof ( "Experimental host user namespace defaulting is enabled." )
2015-11-24 02:11:51 +00:00
}
2016-01-14 19:19:26 +00:00
2017-05-24 22:19:54 +00:00
hairpinMode , err := effectiveHairpinMode ( componentconfig . HairpinMode ( kubeCfg . HairpinMode ) , kubeCfg . ContainerRuntime , crOptions . NetworkPluginName )
2017-05-10 23:25:09 +00:00
if err != nil {
2016-03-31 22:20:04 +00:00
// This is a non-recoverable error. Returning it up the callstack will just
// lead to retries of the same failure, so just fail hard.
glog . Fatalf ( "Invalid hairpin mode: %v" , err )
}
2017-05-10 23:25:09 +00:00
glog . Infof ( "Hairpin mode set to %q" , hairpinMode )
2016-03-31 22:20:04 +00:00
2017-06-09 01:18:05 +00:00
// TODO(#36485) Remove this workaround once we fix the init-container issue.
// Touch iptables lock file, which will be shared among all processes accessing
// the iptables.
f , err := os . OpenFile ( utilipt . LockfilePath16x , os . O_CREATE , 0600 )
if err != nil {
glog . Warningf ( "Failed to open iptables lock file: %v" , err )
} else if err = f . Close ( ) ; err != nil {
glog . Warningf ( "Failed to close iptables lock file: %v" , err )
}
2017-05-24 22:19:54 +00:00
if plug , err := network . InitNetworkPlugin ( kubeDeps . NetworkPlugins , crOptions . NetworkPluginName , & criNetworkHost { & networkHost { klet } , & network . NoopPortMappingGetter { } } , hairpinMode , kubeCfg . NonMasqueradeCIDR , int ( crOptions . NetworkPluginMTU ) ) ; err != nil {
2015-04-28 18:02:29 +00:00
return nil , err
} else {
klet . networkPlugin = plug
}
2015-05-01 21:24:07 +00:00
2015-09-21 18:06:38 +00:00
machineInfo , err := klet . GetCachedMachineInfo ( )
2015-08-04 00:28:33 +00:00
if err != nil {
return nil , err
}
2016-03-09 02:58:24 +00:00
imageBackOff := flowcontrol . NewBackOff ( backOffPeriod , MaxContainerBackOff )
2015-10-19 22:15:59 +00:00
2015-11-10 22:00:12 +00:00
klet . livenessManager = proberesults . NewManager ( )
2015-10-19 22:15:59 +00:00
2016-01-20 02:15:10 +00:00
klet . podCache = kubecontainer . NewCache ( )
2017-05-25 21:23:57 +00:00
// podManager is also responsible for keeping secretManager and configMapManager contents up-to-date.
klet . podManager = kubepod . NewBasicPodManager ( kubepod . NewBasicMirrorClient ( klet . kubeClient ) , secretManager , configMapManager )
2016-01-20 02:15:10 +00:00
2016-07-13 23:11:12 +00:00
if kubeCfg . RemoteRuntimeEndpoint != "" {
// kubeCfg.RemoteImageEndpoint is same as kubeCfg.RemoteRuntimeEndpoint if not explicitly specified
if kubeCfg . RemoteImageEndpoint == "" {
kubeCfg . RemoteImageEndpoint = kubeCfg . RemoteRuntimeEndpoint
2016-08-08 07:40:53 +00:00
}
}
2016-10-29 00:01:06 +00:00
// TODO: These need to become arguments to a standalone docker shim.
2017-05-24 22:19:54 +00:00
binDir := crOptions . CNIBinDir
2016-10-25 03:42:20 +00:00
if binDir == "" {
2017-05-24 22:19:54 +00:00
binDir = crOptions . NetworkPluginDir
2016-10-25 03:42:20 +00:00
}
pluginSettings := dockershim . NetworkPluginSettings {
2017-05-10 23:25:09 +00:00
HairpinMode : hairpinMode ,
NonMasqueradeCIDR : kubeCfg . NonMasqueradeCIDR ,
2017-05-24 22:19:54 +00:00
PluginName : crOptions . NetworkPluginName ,
PluginConfDir : crOptions . CNIConfDir ,
2016-10-25 03:42:20 +00:00
PluginBinDir : binDir ,
2017-05-24 22:19:54 +00:00
MTU : int ( crOptions . NetworkPluginMTU ) ,
2016-10-25 03:42:20 +00:00
}
2016-10-29 00:01:06 +00:00
// Remote runtime shim just cannot talk back to kubelet, so it doesn't
// support bandwidth shaping or hostports till #35457. To enable legacy
// features, replace with networkHost.
2017-05-03 23:32:01 +00:00
var nl * NoOpLegacyHost
2016-10-29 00:01:06 +00:00
pluginSettings . LegacyRuntimeHost = nl
2017-02-14 01:59:26 +00:00
// rktnetes cannot be run with CRI.
2017-06-20 11:51:12 +00:00
if kubeCfg . ContainerRuntime != kubetypes . RktContainerRuntime {
2016-11-04 00:06:04 +00:00
// kubelet defers to the runtime shim to setup networking. Setting
// this to nil will prevent it from trying to invoke the plugin.
// It's easier to always probe and initialize plugins till cri
// becomes the default.
klet . networkPlugin = nil
switch kubeCfg . ContainerRuntime {
2017-06-20 11:51:12 +00:00
case kubetypes . DockerContainerRuntime :
2017-01-27 01:22:43 +00:00
// Create and start the CRI shim running as a grpc server.
2016-11-04 18:50:51 +00:00
streamingConfig := getStreamingConfig ( kubeCfg , kubeDeps )
2017-05-24 22:19:54 +00:00
ds , err := dockershim . NewDockerService ( kubeDeps . DockerClient , kubeCfg . SeccompProfileRoot , crOptions . PodSandboxImage ,
streamingConfig , & pluginSettings , kubeCfg . RuntimeCgroups , kubeCfg . CgroupDriver , crOptions . DockerExecHandlerName ,
crOptions . DockershimRootDirectory , crOptions . DockerDisableSharedPID )
2016-10-10 20:56:53 +00:00
if err != nil {
return nil , err
}
2016-11-04 00:06:04 +00:00
if err := ds . Start ( ) ; err != nil {
return nil , err
}
2017-01-27 01:22:43 +00:00
// For now, the CRI shim redirects the streaming requests to the
// kubelet, which handles the requests using DockerService..
2016-11-04 18:50:51 +00:00
klet . criHandler = ds
2017-01-27 01:22:43 +00:00
2017-05-19 03:18:18 +00:00
// The unix socket for kubelet <-> dockershim communication.
glog . V ( 5 ) . Infof ( "RemoteRuntimeEndpoint: %q, RemoteImageEndpoint: %q" ,
kubeCfg . RemoteRuntimeEndpoint ,
kubeCfg . RemoteImageEndpoint )
2017-01-27 01:22:43 +00:00
glog . V ( 2 ) . Infof ( "Starting the GRPC server for the docker CRI shim." )
2017-04-05 20:01:19 +00:00
server := dockerremote . NewDockerServer ( kubeCfg . RemoteRuntimeEndpoint , ds )
2017-01-27 01:22:43 +00:00
if err := server . Start ( ) ; err != nil {
2016-10-04 03:49:19 +00:00
return nil , err
}
2017-02-27 22:35:23 +00:00
// Create dockerLegacyService when the logging driver is not supported.
2017-05-10 23:25:09 +00:00
supported , err := dockershim . IsCRISupportedLogDriver ( kubeDeps . DockerClient )
2017-02-27 22:35:23 +00:00
if err != nil {
return nil , err
}
if ! supported {
2017-05-10 23:25:09 +00:00
klet . dockerLegacyService = dockershim . NewDockerLegacyService ( kubeDeps . DockerClient )
2017-02-27 22:35:23 +00:00
}
2017-06-20 11:51:12 +00:00
case kubetypes . RemoteContainerRuntime :
2017-01-27 01:22:43 +00:00
// No-op.
break
2016-09-16 01:25:18 +00:00
default :
2016-11-04 00:06:04 +00:00
return nil , fmt . Errorf ( "unsupported CRI runtime: %q" , kubeCfg . ContainerRuntime )
}
2017-01-27 01:22:43 +00:00
runtimeService , imageService , err := getRuntimeAndImageServices ( kubeCfg )
2017-05-19 03:18:18 +00:00
if err != nil {
return nil , err
}
2016-11-04 00:06:04 +00:00
runtime , err := kuberuntime . NewKubeGenericRuntimeManager (
kubecontainer . FilterEventRecorder ( kubeDeps . Recorder ) ,
klet . livenessManager ,
containerRefManager ,
machineInfo ,
klet . podManager ,
kubeDeps . OSInterface ,
klet ,
2017-05-10 23:25:09 +00:00
httpClient ,
2016-11-04 00:06:04 +00:00
imageBackOff ,
kubeCfg . SerializeImagePulls ,
float32 ( kubeCfg . RegistryPullQPS ) ,
int ( kubeCfg . RegistryBurst ) ,
2017-05-10 23:25:09 +00:00
kubeCfg . CPUCFSQuota ,
2016-11-04 00:06:04 +00:00
runtimeService ,
2017-01-27 01:22:43 +00:00
imageService ,
2016-11-04 00:06:04 +00:00
)
if err != nil {
return nil , err
}
klet . containerRuntime = runtime
klet . runner = runtime
} else {
2017-05-01 21:39:51 +00:00
// rkt uses the legacy, non-CRI, integration. Configure it the old way.
2017-03-08 00:49:25 +00:00
// TODO: Include hairpin mode settings in rkt?
conf := & rkt . Config {
2017-05-24 22:19:54 +00:00
Path : crOptions . RktPath ,
Stage1Image : crOptions . RktStage1Image ,
2017-03-08 00:49:25 +00:00
InsecureOptions : "image,ondisk" ,
}
runtime , err := rkt . New (
2017-05-24 22:19:54 +00:00
crOptions . RktAPIEndpoint ,
2017-03-08 00:49:25 +00:00
conf ,
klet ,
kubeDeps . Recorder ,
containerRefManager ,
klet . podManager ,
klet . livenessManager ,
2017-05-10 23:25:09 +00:00
httpClient ,
2017-03-08 00:49:25 +00:00
klet . networkPlugin ,
2017-05-10 23:25:09 +00:00
hairpinMode == componentconfig . HairpinVeth ,
2017-03-08 00:49:25 +00:00
utilexec . New ( ) ,
kubecontainer . RealOS { } ,
imageBackOff ,
kubeCfg . SerializeImagePulls ,
float32 ( kubeCfg . RegistryPullQPS ) ,
int ( kubeCfg . RegistryBurst ) ,
kubeCfg . RuntimeRequestTimeout . Duration ,
)
if err != nil {
return nil , err
2016-09-16 01:25:18 +00:00
}
2017-03-08 00:49:25 +00:00
klet . containerRuntime = runtime
klet . runner = kubecontainer . DirectStreamingRunner ( runtime )
2015-05-01 21:24:07 +00:00
}
2015-04-28 18:02:29 +00:00
2016-03-29 00:05:02 +00:00
// TODO: Factor out "StatsProvider" from Kubelet so we don't have a cyclic dependency
2016-07-13 23:11:12 +00:00
klet . resourceAnalyzer = stats . NewResourceAnalyzer ( klet , kubeCfg . VolumeStatsAggPeriod . Duration , klet . containerRuntime )
2016-03-29 00:05:02 +00:00
2016-05-26 03:08:56 +00:00
klet . pleg = pleg . NewGenericPLEG ( klet . containerRuntime , plegChannelCapacity , plegRelistPeriod , klet . podCache , clock . RealClock { } )
2016-04-22 22:23:03 +00:00
klet . runtimeState = newRuntimeState ( maxWaitForContainerRuntime )
2017-02-16 16:37:24 +00:00
klet . runtimeState . addHealthCheck ( "PLEG" , klet . pleg . Healthy )
2016-07-13 23:11:12 +00:00
klet . updatePodCIDR ( kubeCfg . PodCIDR )
2015-11-07 01:03:39 +00:00
2015-10-03 15:37:07 +00:00
// setup containerGC
2017-05-22 18:00:22 +00:00
containerGC , err := kubecontainer . NewContainerGC ( klet . containerRuntime , containerGCPolicy , klet . sourcesReady )
2015-10-03 15:37:07 +00:00
if err != nil {
return nil , err
}
klet . containerGC = containerGC
2016-07-07 16:58:55 +00:00
klet . containerDeletor = newPodContainerDeletor ( klet . containerRuntime , integer . IntMax ( containerGCPolicy . MaxPerPodContainer , minDeadContainerInPod ) )
2015-10-03 15:37:07 +00:00
2015-09-26 00:29:08 +00:00
// setup imageManager
2016-07-13 23:11:12 +00:00
imageManager , err := images . NewImageGCManager ( klet . containerRuntime , kubeDeps . CAdvisorInterface , kubeDeps . Recorder , nodeRef , imageGCPolicy )
2015-09-26 00:29:08 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to initialize image manager: %v" , err )
}
klet . imageManager = imageManager
2017-02-10 23:08:03 +00:00
klet . statusManager = status . NewManager ( klet . kubeClient , klet . podManager , klet )
2015-03-21 00:22:02 +00:00
2017-02-17 19:32:41 +00:00
if utilfeature . DefaultFeatureGate . Enabled ( features . RotateKubeletServerCertificate ) && kubeDeps . TLSOptions != nil {
var ips [ ] net . IP
cfgAddress := net . ParseIP ( kubeCfg . Address )
if cfgAddress == nil || cfgAddress . IsUnspecified ( ) {
if localIPs , err := allLocalIPsWithoutLoopback ( ) ; err != nil {
return nil , err
} else {
ips = localIPs
}
} else {
ips = [ ] net . IP { cfgAddress }
}
ips = append ( ips , cloudIPs ... )
names := append ( [ ] string { klet . GetHostname ( ) , hostnameOverride } , cloudNames ... )
2017-07-05 22:09:26 +00:00
klet . serverCertificateManager , err = certificate . NewKubeletServerCertificateManager ( klet . kubeClient , kubeCfg , klet . nodeName , ips , names )
2017-02-17 19:32:41 +00:00
if err != nil {
return nil , fmt . Errorf ( "failed to initialize certificate manager: %v" , err )
}
kubeDeps . TLSOptions . Config . GetCertificate = func ( * tls . ClientHelloInfo ) ( * tls . Certificate , error ) {
cert := klet . serverCertificateManager . Current ( )
if cert == nil {
return nil , fmt . Errorf ( "no certificate available" )
}
return cert , nil
}
}
2015-08-25 17:39:41 +00:00
klet . probeManager = prober . NewManager (
klet . statusManager ,
2015-10-19 22:15:59 +00:00
klet . livenessManager ,
klet . runner ,
containerRefManager ,
2016-07-13 23:11:12 +00:00
kubeDeps . Recorder )
2015-08-25 17:39:41 +00:00
2016-07-15 00:47:46 +00:00
klet . volumePluginMgr , err =
2017-05-25 21:23:57 +00:00
NewInitializedVolumePluginMgr ( klet , secretManager , configMapManager , kubeDeps . VolumePlugins )
2016-07-15 00:47:46 +00:00
if err != nil {
return nil , err
}
2016-11-03 19:15:52 +00:00
// If the experimentalMounterPathFlag is set, we do not want to
// check node capabilities since the mount path is not the default
if len ( kubeCfg . ExperimentalMounterPath ) != 0 {
kubeCfg . ExperimentalCheckNodeCapabilitiesBeforeMount = false
}
2016-06-23 19:46:21 +00:00
// setup volumeManager
2016-12-09 02:18:52 +00:00
klet . volumeManager = volumemanager . NewVolumeManager (
2016-07-13 23:11:12 +00:00
kubeCfg . EnableControllerAttachDetach ,
2016-07-21 06:03:56 +00:00
nodeName ,
2016-07-15 00:47:46 +00:00
klet . podManager ,
2017-02-10 23:08:03 +00:00
klet . statusManager ,
2016-07-15 00:47:46 +00:00
klet . kubeClient ,
klet . volumePluginMgr ,
2016-07-06 17:42:56 +00:00
klet . containerRuntime ,
2016-07-13 23:11:12 +00:00
kubeDeps . Mounter ,
2016-06-21 16:13:23 +00:00
klet . getPodsDir ( ) ,
2016-11-03 19:15:52 +00:00
kubeDeps . Recorder ,
2016-11-21 16:48:50 +00:00
kubeCfg . ExperimentalCheckNodeCapabilitiesBeforeMount ,
kubeCfg . KeepTerminatedPodVolumes )
2016-07-15 00:47:46 +00:00
2015-05-01 22:25:11 +00:00
runtimeCache , err := kubecontainer . NewRuntimeCache ( klet . containerRuntime )
2015-02-19 09:12:53 +00:00
if err != nil {
return nil , err
}
2015-04-14 01:04:11 +00:00
klet . runtimeCache = runtimeCache
2016-01-12 21:28:00 +00:00
klet . reasonCache = NewReasonCache ( )
2016-04-29 01:39:46 +00:00
klet . workQueue = queue . NewBasicWorkQueue ( klet . clock )
2016-07-13 23:11:12 +00:00
klet . podWorkers = newPodWorkers ( klet . syncPod , kubeDeps . Recorder , klet . workQueue , klet . resyncInterval , backOffPeriod , klet . podCache )
2015-02-19 09:12:53 +00:00
2016-03-09 02:58:24 +00:00
klet . backOff = flowcontrol . NewBackOff ( backOffPeriod , MaxContainerBackOff )
2016-01-31 23:56:55 +00:00
klet . podKillingCh = make ( chan * kubecontainer . PodPair , podKillingChannelCapacity )
2016-02-19 01:54:48 +00:00
klet . setNodeStatusFuncs = klet . defaultNodeStatusFuncs ( )
2016-05-13 03:35:18 +00:00
// setup eviction manager
2017-05-22 18:00:22 +00:00
evictionManager , evictionAdmitHandler := eviction . NewManager ( klet . resourceAnalyzer , evictionConfig , killPodNow ( klet . podWorkers , kubeDeps . Recorder ) , klet . imageManager , klet . containerGC , kubeDeps . Recorder , nodeRef , klet . clock )
2016-07-13 23:11:12 +00:00
2016-05-13 03:35:18 +00:00
klet . evictionManager = evictionManager
2016-11-02 18:05:16 +00:00
klet . admitHandlers . AddPodAdmitHandler ( evictionAdmitHandler )
2016-05-13 03:35:18 +00:00
2016-08-19 08:53:25 +00:00
// add sysctl admission
runtimeSupport , err := sysctl . NewRuntimeAdmitHandler ( klet . containerRuntime )
if err != nil {
return nil , err
}
2016-11-18 20:50:58 +00:00
safeWhitelist , err := sysctl . NewWhitelist ( sysctl . SafeSysctlWhitelist ( ) , v1 . SysctlsPodAnnotationKey )
2016-08-19 08:53:25 +00:00
if err != nil {
return nil , err
}
// Safe, whitelisted sysctls can always be used as unsafe sysctls in the spec
// Hence, we concatenate those two lists.
2016-07-13 23:11:12 +00:00
safeAndUnsafeSysctls := append ( sysctl . SafeSysctlWhitelist ( ) , kubeCfg . AllowedUnsafeSysctls ... )
2016-11-18 20:50:58 +00:00
unsafeWhitelist , err := sysctl . NewWhitelist ( safeAndUnsafeSysctls , v1 . UnsafeSysctlsPodAnnotationKey )
2016-08-19 08:53:25 +00:00
if err != nil {
return nil , err
}
2016-11-02 18:05:16 +00:00
klet . admitHandlers . AddPodAdmitHandler ( runtimeSupport )
klet . admitHandlers . AddPodAdmitHandler ( safeWhitelist )
klet . admitHandlers . AddPodAdmitHandler ( unsafeWhitelist )
2016-08-19 08:53:25 +00:00
2016-04-22 21:30:35 +00:00
// enable active deadline handler
2016-07-13 23:11:12 +00:00
activeDeadlineHandler , err := newActiveDeadlineHandler ( klet . statusManager , kubeDeps . Recorder , klet . clock )
2016-04-22 21:30:35 +00:00
if err != nil {
return nil , err
}
klet . AddPodSyncLoopHandler ( activeDeadlineHandler )
klet . AddPodSyncHandler ( activeDeadlineHandler )
2017-03-11 18:43:24 +00:00
criticalPodAdmissionHandler := preemption . NewCriticalPodAdmissionHandler ( klet . GetActivePods , killPodNow ( klet . podWorkers , kubeDeps . Recorder ) , kubeDeps . Recorder )
2017-02-23 18:31:20 +00:00
klet . admitHandlers . AddPodAdmitHandler ( lifecycle . NewPredicateAdmitHandler ( klet . getNodeAnyWay , criticalPodAdmissionHandler ) )
2016-02-19 01:54:48 +00:00
// apply functional Option's
2016-07-13 23:11:12 +00:00
for _ , opt := range kubeDeps . Options {
2016-02-19 01:54:48 +00:00
opt ( klet )
}
2016-07-13 23:11:12 +00:00
2016-11-02 18:05:16 +00:00
klet . appArmorValidator = apparmor . NewValidator ( kubeCfg . ContainerRuntime )
klet . softAdmitHandlers . AddPodAdmitHandler ( lifecycle . NewAppArmorAdmitHandler ( klet . appArmorValidator ) )
2017-02-27 06:19:59 +00:00
if utilfeature . DefaultFeatureGate . Enabled ( features . Accelerators ) {
2017-06-20 11:51:12 +00:00
if kubeCfg . ContainerRuntime == kubetypes . DockerContainerRuntime {
2017-05-10 23:25:09 +00:00
if klet . gpuManager , err = nvidia . NewNvidiaGPUManager ( klet , kubeDeps . DockerClient ) ; err != nil {
2017-03-03 20:13:01 +00:00
return nil , err
}
} else {
glog . Errorf ( "Accelerators feature is supported with docker runtime only. Disabling this feature internally." )
2017-02-27 23:16:30 +00:00
}
2017-03-03 20:13:01 +00:00
}
// Set GPU manager to a stub implementation if it is not enabled or cannot be supported.
if klet . gpuManager == nil {
2017-02-26 05:16:13 +00:00
klet . gpuManager = gpu . NewGPUManagerStub ( )
}
2016-07-13 23:11:12 +00:00
// Finally, put the most recent version of the config on the Kubelet, so
// people can see how it was configured.
2016-08-30 01:03:34 +00:00
klet . kubeletConfiguration = * kubeCfg
2015-01-12 00:42:11 +00:00
return klet , nil
2014-07-22 21:40:59 +00:00
}
2015-01-08 15:25:14 +00:00
type serviceLister interface {
2016-11-18 20:50:58 +00:00
List ( labels . Selector ) ( [ ] * v1 . Service , error )
2015-01-08 15:25:14 +00:00
}
2014-07-10 12:26:24 +00:00
// Kubelet is the main kubelet implementation.
2014-06-06 23:40:48 +00:00
type Kubelet struct {
2016-08-30 01:03:34 +00:00
kubeletConfiguration componentconfig . KubeletConfiguration
2016-07-13 23:11:12 +00:00
2015-09-02 17:18:11 +00:00
hostname string
2016-07-16 06:10:29 +00:00
nodeName types . NodeName
2015-09-02 17:18:11 +00:00
runtimeCache kubecontainer . RuntimeCache
2016-02-01 22:30:47 +00:00
kubeClient clientset . Interface
2016-07-28 20:01:00 +00:00
iptClient utilipt . Interface
2015-09-02 17:18:11 +00:00
rootDirectory string
2016-04-28 04:26:36 +00:00
// podWorkers handle syncing Pods in response to events.
podWorkers PodWorkers
// resyncInterval is the interval between periodic full reconciliations of
// pods on this node.
2015-04-09 01:56:58 +00:00
resyncInterval time . Duration
2016-04-28 04:26:36 +00:00
2016-04-26 17:58:12 +00:00
// sourcesReady records the sources seen by the kubelet, it is thread-safe.
sourcesReady config . SourcesReady
2014-07-15 20:24:41 +00:00
2016-04-28 04:26:36 +00:00
// podManager is a facade that abstracts away the various sources of pods
// this Kubelet services.
2015-10-12 23:28:23 +00:00
podManager kubepod . Manager
2015-03-18 18:43:59 +00:00
2016-05-13 03:35:18 +00:00
// Needed to observe and respond to situations that could impact node stability
evictionManager eviction . Manager
2014-07-15 20:24:41 +00:00
// Optional, defaults to /logs/ from /var/log
2014-07-22 21:40:59 +00:00
logServer http . Handler
2014-08-07 18:15:11 +00:00
// Optional, defaults to simple Docker implementation
2015-05-11 22:32:51 +00:00
runner kubecontainer . ContainerCommandRunner
2014-10-09 00:05:04 +00:00
2015-03-06 07:56:30 +00:00
// cAdvisor used for container information.
cadvisor cadvisor . Interface
2014-10-28 00:29:55 +00:00
2015-09-21 18:06:38 +00:00
// Set to true to have the node register itself with the apiserver.
registerNode bool
2015-09-16 04:53:33 +00:00
// Set to true to have the node register itself as schedulable.
registerSchedulable bool
2015-07-01 01:49:18 +00:00
// for internal book keeping; access only from within registerWithApiserver
registrationCompleted bool
2015-05-20 21:21:03 +00:00
2015-06-12 17:20:26 +00:00
// Set to true if the kubelet is in standalone mode (i.e. setup without an apiserver)
standaloneMode bool
2014-11-12 05:21:40 +00:00
// If non-empty, use this for container DNS search.
clusterDomain string
// If non-nil, use this for container DNS server.
2017-01-03 09:28:38 +00:00
clusterDNS [ ] net . IP
2015-01-08 15:25:14 +00:00
2016-04-28 04:26:36 +00:00
// masterServiceNamespace is the namespace that the master service is exposed in.
2015-01-08 15:25:14 +00:00
masterServiceNamespace string
2016-04-28 04:26:36 +00:00
// serviceLister knows how to list services
serviceLister serviceLister
// nodeInfo knows how to get information about the node for this kubelet.
nodeInfo predicates . NodeInfo
2014-11-23 15:47:25 +00:00
2015-11-13 23:59:23 +00:00
// a list of node labels to register
2016-01-13 23:56:51 +00:00
nodeLabels map [ string ] string
2015-11-13 23:59:23 +00:00
2015-07-03 20:29:14 +00:00
// Last timestamp when runtime responded on ping.
2015-05-05 10:19:54 +00:00
// Mutex is used to protect this value.
2015-09-25 18:00:14 +00:00
runtimeState * runtimeState
2015-06-24 18:10:10 +00:00
2014-11-23 15:47:25 +00:00
// Volume plugins.
2016-05-30 02:22:22 +00:00
volumePluginMgr * volume . VolumePluginMgr
2015-02-02 18:51:52 +00:00
2015-04-17 22:54:28 +00:00
// Network plugin.
2015-03-19 23:14:13 +00:00
networkPlugin network . NetworkPlugin
2015-10-19 22:15:59 +00:00
// Handles container probing.
2015-08-25 17:39:41 +00:00
probeManager prober . Manager
2015-10-19 22:15:59 +00:00
// Manages container health check results.
livenessManager proberesults . Manager
2015-01-08 20:41:38 +00:00
2015-03-20 16:37:08 +00:00
// How long to keep idle streaming command execution/port forwarding
2015-01-08 20:41:38 +00:00
// connections open before terminating them
streamingConnectionIdleTimeout time . Duration
2015-03-03 06:06:20 +00:00
2015-03-20 16:37:08 +00:00
// The EventRecorder to use
2015-03-03 06:06:20 +00:00
recorder record . EventRecorder
2015-03-03 18:33:25 +00:00
2015-03-14 17:13:20 +00:00
// Policy for handling garbage collection of dead containers.
2015-10-05 22:35:32 +00:00
containerGC kubecontainer . ContainerGC
2015-03-16 04:00:46 +00:00
2016-08-04 19:26:06 +00:00
// Manager for image garbage collection.
imageManager images . ImageGCManager
2015-03-16 12:50:00 +00:00
2015-05-12 08:24:08 +00:00
// Diskspace manager.
diskSpaceManager diskSpaceManager
2016-11-17 11:22:11 +00:00
// Secret manager.
2016-11-18 12:14:03 +00:00
secretManager secret . Manager
2016-11-17 11:22:11 +00:00
2017-05-25 21:23:57 +00:00
// ConfigMap manager.
configMapManager configmap . Manager
2015-03-16 12:50:00 +00:00
// Cached MachineInfo returned by cadvisor.
2015-10-16 03:00:28 +00:00
machineInfo * cadvisorapi . MachineInfo
2015-03-20 16:37:08 +00:00
2017-05-25 19:29:19 +00:00
//Cached RootFsInfo returned by cadvisor
rootfsInfo * cadvisorapiv2 . FsInfo
2017-02-17 19:32:41 +00:00
// Handles certificate rotations.
serverCertificateManager certificate . Manager
2015-03-20 16:37:08 +00:00
// Syncs pods statuses with apiserver; also used as a cache of statuses.
2015-09-11 19:22:01 +00:00
statusManager status . Manager
2015-03-23 22:31:13 +00:00
2016-05-30 02:22:22 +00:00
// VolumeManager runs a set of asynchronous loops that figure out which
// volumes need to be attached/mounted/unmounted/detached based on the pods
// scheduled on this node and makes it so.
2016-07-02 01:50:25 +00:00
volumeManager volumemanager . VolumeManager
2015-04-16 00:40:07 +00:00
2016-05-30 02:22:22 +00:00
// Cloud provider interface.
2016-06-29 22:30:20 +00:00
cloud cloudprovider . Interface
autoDetectCloudProvider bool
2017-03-29 23:21:42 +00:00
// Indicates that the node initialization happens in an external cloud controller
externalCloudProvider bool
2015-03-27 20:12:48 +00:00
// Reference to this node.
2017-01-31 20:59:22 +00:00
nodeRef * clientv1 . ObjectReference
2015-04-02 20:14:52 +00:00
2015-05-01 22:25:11 +00:00
// Container runtime.
containerRuntime kubecontainer . Runtime
2015-03-31 11:17:12 +00:00
2016-01-12 21:28:00 +00:00
// reasonCache caches the failure reason of the last creation of all containers, which is
// used for generating ContainerStatus.
reasonCache * ReasonCache
2015-09-21 18:06:38 +00:00
// nodeStatusUpdateFrequency specifies how often kubelet posts node status to master.
// Note: be cautious when changing the constant, it must work with nodeMonitorGracePeriod
// in nodecontroller. There are several constraints:
// 1. nodeMonitorGracePeriod must be N times more than nodeStatusUpdateFrequency, where
// N means number of retries allowed for kubelet to post node status. It is pointless
// to make nodeMonitorGracePeriod be less than nodeStatusUpdateFrequency, since there
// will only be fresh values from Kubelet at an interval of nodeStatusUpdateFrequency.
// The constant must be less than podEvictionTimeout.
// 2. nodeStatusUpdateFrequency needs to be large enough for kubelet to generate node
// status. Kubelet may fail to update node status reliably if the value is too small,
// as it takes time to gather all necessary node information.
nodeStatusUpdateFrequency time . Duration
2015-08-07 21:42:21 +00:00
// Generates pod events.
pleg pleg . PodLifecycleEventGenerator
2016-01-20 02:15:10 +00:00
// Store kubecontainer.PodStatus for all pods.
podCache kubecontainer . Cache
2016-04-28 04:26:36 +00:00
// os is a facade for various syscalls that need to be mocked during testing.
2015-04-29 20:44:29 +00:00
os kubecontainer . OSInterface
// Watcher of out of memory events.
2015-04-11 00:29:56 +00:00
oomWatcher OOMWatcher
2015-04-29 20:44:29 +00:00
2016-01-14 19:19:26 +00:00
// Monitor resource usage
resourceAnalyzer stats . ResourceAnalyzer
2016-06-27 18:46:20 +00:00
// Whether or not we should have the QOS cgroup hierarchy for resource management
2016-07-13 23:11:12 +00:00
cgroupsPerQOS bool
2016-06-27 18:46:20 +00:00
2015-04-24 00:07:52 +00:00
// If non-empty, pass this to the container runtime as the root cgroup.
cgroupRoot string
2015-05-04 14:43:10 +00:00
// Mounter to use for volumes.
mounter mount . Interface
2015-05-12 16:59:02 +00:00
2015-09-14 09:51:40 +00:00
// Writer interface to use for volumes.
writer kubeio . Writer
2015-05-12 16:59:02 +00:00
// Manager of non-Runtime containers.
2015-10-10 00:09:53 +00:00
containerManager cm . ContainerManager
2016-01-07 20:44:40 +00:00
2015-11-17 01:15:40 +00:00
// Maximum Number of Pods which can be run by this Kubelet
maxPods int
2015-06-17 22:31:46 +00:00
// Monitor Kubelet's sync loop
2015-10-24 10:17:17 +00:00
syncLoopMonitor atomic . Value
2015-08-13 12:59:15 +00:00
// Container restart Backoff
2016-03-09 02:58:24 +00:00
backOff * flowcontrol . Backoff
2015-08-19 00:52:26 +00:00
// Channel for sending pods to kill.
2016-01-31 23:56:55 +00:00
podKillingCh chan * kubecontainer . PodPair
2015-07-28 18:54:32 +00:00
// The configuration file used as the base to generate the container's
// DNS resolver configuration file. This can be used in conjunction with
// clusterDomain and clusterDNS.
resolverConfig string
2015-08-10 22:08:31 +00:00
2015-09-21 18:06:38 +00:00
// Information about the ports which are opened by daemons on Node running this Kubelet server.
2016-11-18 20:50:58 +00:00
daemonEndpoints * v1 . NodeDaemonEndpoints
2015-09-02 17:18:11 +00:00
// A queue used to trigger pod workers.
workQueue queue . WorkQueue
2015-11-07 01:03:39 +00:00
// oneTimeInitializer is used to initialize modules that are dependent on the runtime to be up.
oneTimeInitializer sync . Once
2015-11-21 03:41:32 +00:00
2015-12-10 02:05:35 +00:00
// If non-nil, use this IP address for the node
nodeIP net . IP
2016-01-04 20:03:28 +00:00
2017-03-29 23:21:42 +00:00
// If non-nil, this is a unique identifier for the node in an external database, eg. cloudprovider
providerID string
2016-01-04 20:03:28 +00:00
// clock is an interface that provides time related functionality in a way that makes it
// easy to test the code.
2016-05-26 03:08:56 +00:00
clock clock . Clock
2016-01-04 20:03:28 +00:00
// outOfDiskTransitionFrequency specifies the amount of time the kubelet has to be actually
// not out of disk before it can transition the node condition status from out-of-disk to
// not-out-of-disk. This prevents a pod that causes out-of-disk condition from repeatedly
// getting rescheduled onto the node.
outOfDiskTransitionFrequency time . Duration
2015-12-12 01:51:39 +00:00
2016-02-19 01:54:48 +00:00
// handlers called during the tryUpdateNodeStatus cycle
2016-11-18 20:50:58 +00:00
setNodeStatusFuncs [ ] func ( * v1 . Node ) error
2016-04-15 18:17:17 +00:00
// TODO: think about moving this to be centralized in PodWorkers in follow-on.
// the list of handlers to call during pod admission.
2016-11-02 18:05:16 +00:00
admitHandlers lifecycle . PodAdmitHandlers
// softAdmithandlers are applied to the pod after it is admitted by the Kubelet, but before it is
// run. A pod rejected by a softAdmitHandler will be left in a Pending state indefinitely. If a
// rejected pod should not be recreated, or the scheduler is not aware of the rejection rule, the
// admission rule should be applied by a softAdmitHandler.
softAdmitHandlers lifecycle . PodAdmitHandlers
2016-04-15 18:17:17 +00:00
// the list of handlers to call during pod sync loop.
lifecycle . PodSyncLoopHandlers
// the list of handlers to call during pod sync.
lifecycle . PodSyncHandlers
2016-05-18 15:18:10 +00:00
// the number of allowed pods per core
podsPerCore int
2016-05-23 20:37:30 +00:00
// enableControllerAttachDetach indicates the Attach/Detach controller
// should manage attachment/detachment of volumes scheduled to this node,
// and disable kubelet from executing any attach/detach operations
enableControllerAttachDetach bool
2016-07-07 16:58:55 +00:00
// trigger deleting containers in a pod
containerDeletor * podContainerDeletor
2016-08-17 20:01:27 +00:00
// config iptables util rules
makeIPTablesUtilChains bool
// The bit of the fwmark space to mark packets for SNAT.
iptablesMasqueradeBit int
// The bit of the fwmark space to mark packets for dropping.
iptablesDropBit int
2016-08-30 00:54:15 +00:00
// The AppArmor validator for checking whether AppArmor is supported.
appArmorValidator apparmor . Validator
2016-11-04 18:50:51 +00:00
// The handler serving CRI streaming calls (exec/attach/port-forward).
criHandler http . Handler
2016-08-25 20:45:38 +00:00
// experimentalHostUserNamespaceDefaulting sets userns=true when users request host namespaces (pid, ipc, net),
// are using non-namespaced capabilities (mknod, sys_time, sys_module), the pod contains a privileged container,
// or using host path volumes.
// This should only be enabled when the container runtime is performing user remapping AND if the
// experimental behavior is desired.
experimentalHostUserNamespaceDefaulting bool
2016-12-03 07:12:38 +00:00
2017-02-26 05:16:13 +00:00
// GPU Manager
gpuManager gpu . GPUManager
2017-02-27 22:35:23 +00:00
// dockerLegacyService contains some legacy methods for backward compatibility.
// It should be set only when docker is using non json-file logging driver.
dockerLegacyService dockershim . DockerLegacyService
2015-12-10 02:05:35 +00:00
}
2017-02-17 19:32:41 +00:00
func allLocalIPsWithoutLoopback ( ) ( [ ] net . IP , error ) {
interfaces , err := net . Interfaces ( )
if err != nil {
return nil , fmt . Errorf ( "could not list network interfaces: %v" , err )
}
var ips [ ] net . IP
for _ , i := range interfaces {
addresses , err := i . Addrs ( )
if err != nil {
return nil , fmt . Errorf ( "could not list the addresses for network interface %v: %v\n" , i , err )
}
for _ , address := range addresses {
switch v := address . ( type ) {
case * net . IPNet :
if ! v . IP . IsLoopback ( ) {
ips = append ( ips , v . IP )
}
}
}
}
return ips , nil
}
2016-04-28 04:26:36 +00:00
// setupDataDirs creates:
// 1. the root directory
// 2. the pods directory
// 3. the plugins directory
2015-01-12 00:42:11 +00:00
func ( kl * Kubelet ) setupDataDirs ( ) error {
kl . rootDirectory = path . Clean ( kl . rootDirectory )
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getRootDir ( ) , 0750 ) ; err != nil {
2015-01-12 00:42:11 +00:00
return fmt . Errorf ( "error creating root directory: %v" , err )
}
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getPodsDir ( ) , 0750 ) ; err != nil {
2015-01-12 00:42:11 +00:00
return fmt . Errorf ( "error creating pods directory: %v" , err )
}
2014-11-23 15:47:25 +00:00
if err := os . MkdirAll ( kl . getPluginsDir ( ) , 0750 ) ; err != nil {
return fmt . Errorf ( "error creating plugins directory: %v" , err )
}
2015-01-12 00:42:11 +00:00
return nil
}
2015-07-03 20:29:14 +00:00
// Starts garbage collection threads.
2015-03-16 04:00:46 +00:00
func ( kl * Kubelet ) StartGarbageCollection ( ) {
2016-09-02 17:24:54 +00:00
loggedContainerGCFailure := false
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2017-05-22 18:00:22 +00:00
if err := kl . containerGC . GarbageCollect ( ) ; err != nil {
2015-03-16 04:00:46 +00:00
glog . Errorf ( "Container garbage collection failed: %v" , err )
2016-11-18 20:50:58 +00:00
kl . recorder . Eventf ( kl . nodeRef , v1 . EventTypeWarning , events . ContainerGCFailed , err . Error ( ) )
2016-09-02 17:24:54 +00:00
loggedContainerGCFailure = true
} else {
var vLevel glog . Level = 4
if loggedContainerGCFailure {
vLevel = 1
loggedContainerGCFailure = false
}
glog . V ( vLevel ) . Infof ( "Container garbage collection succeeded" )
2015-03-16 04:00:46 +00:00
}
2016-03-02 21:29:53 +00:00
} , ContainerGCPeriod , wait . NeverStop )
2015-03-16 04:00:46 +00:00
2017-03-12 13:42:11 +00:00
prevImageGCFailed := false
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2015-03-16 04:00:46 +00:00
if err := kl . imageManager . GarbageCollect ( ) ; err != nil {
2017-03-12 13:42:11 +00:00
if prevImageGCFailed {
glog . Errorf ( "Image garbage collection failed multiple times in a row: %v" , err )
// Only create an event for repeated failures
kl . recorder . Eventf ( kl . nodeRef , v1 . EventTypeWarning , events . ImageGCFailed , err . Error ( ) )
} else {
glog . Errorf ( "Image garbage collection failed once. Stats initialization may not have completed yet: %v" , err )
}
prevImageGCFailed = true
2016-09-02 17:24:54 +00:00
} else {
var vLevel glog . Level = 4
2017-03-12 13:42:11 +00:00
if prevImageGCFailed {
2016-09-02 17:24:54 +00:00
vLevel = 1
2017-03-12 13:42:11 +00:00
prevImageGCFailed = false
2016-09-02 17:24:54 +00:00
}
glog . V ( vLevel ) . Infof ( "Image garbage collection succeeded" )
2014-12-22 19:54:07 +00:00
}
2016-03-02 21:29:53 +00:00
} , ImageGCPeriod , wait . NeverStop )
2014-12-22 19:54:07 +00:00
}
2015-11-07 01:03:39 +00:00
// initializeModules will initialize internal modules that do not require the container runtime to be up.
// Note that the modules here must not depend on modules that are not initialized here.
func ( kl * Kubelet ) initializeModules ( ) error {
2017-02-17 19:32:41 +00:00
// Prometheus metrics.
2015-09-24 22:26:25 +00:00
metrics . Register ( kl . runtimeCache )
2017-02-17 19:32:41 +00:00
// Setup filesystem directories.
2015-09-24 22:26:25 +00:00
if err := kl . setupDataDirs ( ) ; err != nil {
return err
2014-07-15 07:04:30 +00:00
}
2015-11-07 01:03:39 +00:00
2017-02-17 19:32:41 +00:00
// If the container logs directory does not exist, create it.
2016-10-15 04:32:50 +00:00
if _ , err := os . Stat ( ContainerLogsDir ) ; err != nil {
if err := kl . os . MkdirAll ( ContainerLogsDir , 0755 ) ; err != nil {
glog . Errorf ( "Failed to create directory %q: %v" , ContainerLogsDir , err )
2015-09-24 22:26:25 +00:00
}
2015-02-23 21:04:45 +00:00
}
2015-04-14 00:30:57 +00:00
2017-02-17 19:32:41 +00:00
// Start the image manager.
2016-12-21 06:58:00 +00:00
kl . imageManager . Start ( )
2015-05-05 18:15:12 +00:00
2017-02-17 19:32:41 +00:00
// Start the certificate manager.
if utilfeature . DefaultFeatureGate . Enabled ( features . RotateKubeletServerCertificate ) {
kl . serverCertificateManager . Start ( )
}
// Start container manager.
2016-07-13 04:39:22 +00:00
node , err := kl . getNodeAnyWay ( )
if err != nil {
2016-11-24 01:59:10 +00:00
return fmt . Errorf ( "Kubelet failed to get node info: %v" , err )
2016-07-13 04:39:22 +00:00
}
2017-03-11 18:43:24 +00:00
if err := kl . containerManager . Start ( node , kl . GetActivePods ) ; err != nil {
2015-09-24 22:26:25 +00:00
return fmt . Errorf ( "Failed to start ContainerManager %v" , err )
2015-05-12 16:59:02 +00:00
}
2017-02-17 19:32:41 +00:00
// Start out of memory watcher.
2015-05-20 21:21:03 +00:00
if err := kl . oomWatcher . Start ( kl . nodeRef ) ; err != nil {
2015-09-24 22:26:25 +00:00
return fmt . Errorf ( "Failed to start OOM watcher %v" , err )
2015-05-15 20:24:24 +00:00
}
2016-01-14 19:19:26 +00:00
2017-02-17 19:32:41 +00:00
// Initialize GPUs
2017-04-20 15:09:25 +00:00
if err := kl . gpuManager . Start ( ) ; err != nil {
glog . Errorf ( "Failed to start gpuManager %v" , err )
}
2016-12-03 07:12:38 +00:00
2017-02-17 19:32:41 +00:00
// Start resource analyzer
2016-01-14 19:19:26 +00:00
kl . resourceAnalyzer . Start ( )
2016-04-25 19:48:47 +00:00
2015-09-24 22:26:25 +00:00
return nil
}
2015-05-15 20:24:24 +00:00
2015-11-07 01:03:39 +00:00
// initializeRuntimeDependentModules will initialize internal modules that require the container runtime to be up.
func ( kl * Kubelet ) initializeRuntimeDependentModules ( ) {
if err := kl . cadvisor . Start ( ) ; err != nil {
2016-07-25 23:07:33 +00:00
// Fail kubelet and rely on the babysitter to retry starting kubelet.
// TODO(random-liu): Add backoff logic in the babysitter
glog . Fatalf ( "Failed to start cAdvisor %v" , err )
2016-07-21 22:31:36 +00:00
}
// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
2017-06-26 19:49:00 +00:00
kl . evictionManager . Start ( kl . cadvisor , kl . GetActivePods , kl . podResourcesAreReclaimed , kl , evictionMonitoringPeriod )
2015-11-07 01:03:39 +00:00
}
2015-05-12 16:59:02 +00:00
2015-09-24 22:26:25 +00:00
// Run starts the kubelet reacting to config updates
func ( kl * Kubelet ) Run ( updates <- chan kubetypes . PodUpdate ) {
if kl . logServer == nil {
kl . logServer = http . StripPrefix ( "/logs/" , http . FileServer ( http . Dir ( "/var/log/" ) ) )
}
if kl . kubeClient == nil {
glog . Warning ( "No api server defined - no node status update will be sent." )
}
2017-03-11 01:42:44 +00:00
2015-11-07 01:03:39 +00:00
if err := kl . initializeModules ( ) ; err != nil {
2016-11-18 20:50:58 +00:00
kl . recorder . Eventf ( kl . nodeRef , v1 . EventTypeWarning , events . KubeletSetupFailed , err . Error ( ) )
2015-09-24 22:26:25 +00:00
glog . Error ( err )
kl . runtimeState . setInitError ( err )
2015-05-18 19:18:12 +00:00
}
2016-05-30 02:22:22 +00:00
// Start volume manager
2016-06-23 19:46:21 +00:00
go kl . volumeManager . Run ( kl . sourcesReady , wait . NeverStop )
2016-05-30 02:22:22 +00:00
2015-09-24 22:26:25 +00:00
if kl . kubeClient != nil {
// Start syncing node status immediately, this may set up things the runtime needs to run.
2016-02-02 10:57:06 +00:00
go wait . Until ( kl . syncNodeStatus , kl . nodeStatusUpdateFrequency , wait . NeverStop )
2015-05-18 19:18:12 +00:00
}
2016-02-02 10:57:06 +00:00
go wait . Until ( kl . syncNetworkStatus , 30 * time . Second , wait . NeverStop )
go wait . Until ( kl . updateRuntimeUp , 5 * time . Second , wait . NeverStop )
2015-06-24 18:10:10 +00:00
2016-08-17 20:01:27 +00:00
// Start loop to sync iptables util rules
if kl . makeIPTablesUtilChains {
go wait . Until ( kl . syncNetworkUtil , 1 * time . Minute , wait . NeverStop )
}
2015-08-19 00:52:26 +00:00
// Start a goroutine responsible for killing pods (that are not properly
// handled by pod workers).
2016-02-02 10:57:06 +00:00
go wait . Until ( kl . podKiller , 1 * time . Second , wait . NeverStop )
2015-08-19 00:52:26 +00:00
2016-07-27 08:53:18 +00:00
// Start gorouting responsible for checking limits in resolv.conf
if kl . resolverConfig != "" {
go wait . Until ( func ( ) { kl . checkLimitsForResolvConf ( ) } , 30 * time . Second , wait . NeverStop )
}
2015-12-09 18:58:15 +00:00
// Start component sync loops.
2015-03-20 16:37:08 +00:00
kl . statusManager . Start ( )
2015-12-09 18:58:15 +00:00
kl . probeManager . Start ( )
2016-04-25 19:48:47 +00:00
2015-08-07 21:42:21 +00:00
// Start the pod lifecycle event generator.
kl . pleg . Start ( )
2014-07-15 20:24:41 +00:00
kl . syncLoop ( updates , kl )
2014-06-06 23:40:48 +00:00
}
2016-10-25 03:42:20 +00:00
// GetKubeClient returns the Kubernetes client.
// TODO: This is currently only required by network plugins. Replace
// with more specific methods.
func ( kl * Kubelet ) GetKubeClient ( ) clientset . Interface {
return kl . kubeClient
}
2016-01-28 23:57:38 +00:00
// GetClusterDNS returns a list of the DNS servers and a list of the DNS search
2015-03-26 18:59:41 +00:00
// domains of the cluster.
2016-08-05 08:19:17 +00:00
func ( kl * Kubelet ) GetClusterDNS ( pod * v1 . Pod ) ( [ ] string , [ ] string , bool , error ) {
2015-07-28 18:54:32 +00:00
var hostDNS , hostSearch [ ] string
2015-10-14 17:34:29 +00:00
// Get host DNS settings
2015-07-28 18:54:32 +00:00
if kl . resolverConfig != "" {
f , err := os . Open ( kl . resolverConfig )
if err != nil {
2016-08-05 08:19:17 +00:00
return nil , nil , false , err
2015-07-28 18:54:32 +00:00
}
defer f . Close ( )
2014-11-12 05:21:40 +00:00
2015-10-24 00:01:49 +00:00
hostDNS , hostSearch , err = kl . parseResolvConf ( f )
2015-07-28 18:54:32 +00:00
if err != nil {
2016-08-05 08:19:17 +00:00
return nil , nil , false , err
2015-07-28 18:54:32 +00:00
}
}
2016-08-05 08:19:17 +00:00
useClusterFirstPolicy := ( ( pod . Spec . DNSPolicy == v1 . DNSClusterFirst && ! kubecontainer . IsHostNetworkPod ( pod ) ) || pod . Spec . DNSPolicy == v1 . DNSClusterFirstWithHostNet )
2017-01-03 09:28:38 +00:00
if useClusterFirstPolicy && len ( kl . clusterDNS ) == 0 {
2015-10-14 17:34:29 +00:00
// clusterDNS is not known.
// pod with ClusterDNSFirst Policy cannot be created
2016-11-18 20:50:58 +00:00
kl . recorder . Eventf ( pod , v1 . EventTypeWarning , "MissingClusterDNS" , "kubelet does not have ClusterDNS IP configured and cannot create Pod using %q policy. Falling back to DNSDefault policy." , pod . Spec . DNSPolicy )
2015-12-21 19:25:38 +00:00
log := fmt . Sprintf ( "kubelet does not have ClusterDNS IP configured and cannot create Pod using %q policy. pod: %q. Falling back to DNSDefault policy." , pod . Spec . DNSPolicy , format . Pod ( pod ) )
2016-11-18 20:50:58 +00:00
kl . recorder . Eventf ( kl . nodeRef , v1 . EventTypeWarning , "MissingClusterDNS" , log )
2015-10-14 17:34:29 +00:00
// fallback to DNSDefault
useClusterFirstPolicy = false
}
if ! useClusterFirstPolicy {
2015-07-28 18:54:32 +00:00
// When the kubelet --resolv-conf flag is set to the empty string, use
// DNS settings that override the docker default (which is to use
2016-08-02 22:13:54 +00:00
// /etc/resolv.conf) and effectively disable DNS lookups. According to
2015-07-28 18:54:32 +00:00
// the bind documentation, the behavior of the DNS client library when
// "nameservers" are not specified is to "use the nameserver on the
// local machine". A nameserver setting of localhost is equivalent to
// this documented behavior.
if kl . resolverConfig == "" {
hostDNS = [ ] string { "127.0.0.1" }
hostSearch = [ ] string { "." }
2016-07-27 08:53:18 +00:00
} else {
hostSearch = kl . formDNSSearchForDNSDefault ( hostSearch , pod )
2015-07-28 18:54:32 +00:00
}
2016-08-05 08:19:17 +00:00
return hostDNS , hostSearch , useClusterFirstPolicy , nil
2014-11-12 05:21:40 +00:00
}
2015-03-26 18:59:41 +00:00
2015-10-14 17:34:29 +00:00
// for a pod with DNSClusterFirst policy, the cluster DNS server is the only nameserver configured for
// the pod. The cluster DNS server itself will forward queries to other nameservers that is configured to use,
// in case the cluster DNS server cannot resolve the DNS query itself
2017-01-03 09:28:38 +00:00
dns := make ( [ ] string , len ( kl . clusterDNS ) )
for i , ip := range kl . clusterDNS {
dns [ i ] = ip . String ( )
}
2016-07-27 08:53:18 +00:00
dnsSearch := kl . formDNSSearch ( hostSearch , pod )
2017-01-03 09:28:38 +00:00
2016-08-05 08:19:17 +00:00
return dns , dnsSearch , useClusterFirstPolicy , nil
2014-11-12 05:21:40 +00:00
}
2016-04-28 04:26:36 +00:00
// syncPod is the transaction script for the sync of a single pod.
//
// Arguments:
//
2016-07-20 22:08:47 +00:00
// o - the SyncPodOptions for this invocation
2016-04-28 04:26:36 +00:00
//
// The workflow is:
// * If the pod is being created, record pod worker start latency
2016-11-18 20:50:58 +00:00
// * Call generateAPIPodStatus to prepare an v1.PodStatus for the pod
2016-04-28 04:26:36 +00:00
// * If the pod is being seen as running for the first time, record pod
// start latency
// * Update the status of the pod in the status manager
// * Kill the pod if it should not be running
// * Create a mirror pod if the pod is a static pod, and does not
// already have a mirror pod
// * Create the data directories for the pod if they do not exist
2016-05-30 02:22:22 +00:00
// * Wait for volumes to attach/mount
2016-04-28 04:26:36 +00:00
// * Fetch the pull secrets for the pod
// * Call the container runtime's SyncPod callback
// * Update the traffic shaping for the pod's ingress and egress limits
//
2016-11-01 00:43:58 +00:00
// If any step of this workflow errors, the error is returned, and is repeated
2016-04-28 04:26:36 +00:00
// on the next syncPod call.
2016-05-06 18:07:24 +00:00
func ( kl * Kubelet ) syncPod ( o syncPodOptions ) error {
// pull out the required options
pod := o . pod
mirrorPod := o . mirrorPod
podStatus := o . podStatus
updateType := o . updateType
// if we want to kill a pod, do it now!
if updateType == kubetypes . SyncPodKill {
killPodOptions := o . killPodOptions
if killPodOptions == nil || killPodOptions . PodStatusFunc == nil {
return fmt . Errorf ( "kill pod options are required if update type is kill" )
}
apiPodStatus := killPodOptions . PodStatusFunc ( pod , podStatus )
kl . statusManager . SetPodStatus ( pod , apiPodStatus )
// we kill the pod with the specified grace period since this is a termination
if err := kl . killPod ( pod , nil , podStatus , killPodOptions . PodTerminationGracePeriodSecondsOverride ) ; err != nil {
// there was an error killing the pod, so we return that error directly
utilruntime . HandleError ( err )
return err
}
return nil
}
2016-04-28 04:26:36 +00:00
// Latency measurements for the main workflow are relative to the
2016-08-11 06:14:01 +00:00
// first time the pod was seen by the API server.
2015-06-05 19:42:23 +00:00
var firstSeenTime time . Time
2016-01-20 02:15:10 +00:00
if firstSeenTimeStr , ok := pod . Annotations [ kubetypes . ConfigFirstSeenAnnotationKey ] ; ok {
2015-10-09 17:24:31 +00:00
firstSeenTime = kubetypes . ConvertToTimestamp ( firstSeenTimeStr ) . Get ( )
2015-06-05 19:42:23 +00:00
}
2015-03-09 14:23:52 +00:00
2016-04-28 04:26:36 +00:00
// Record pod worker start latency if being created
// TODO: make pod workers record their own latencies
2016-01-20 02:15:10 +00:00
if updateType == kubetypes . SyncPodCreate {
if ! firstSeenTime . IsZero ( ) {
// This is the first time we are syncing the pod. Record the latency
// since kubelet first saw the pod if firstSeenTime is set.
metrics . PodWorkerStartLatency . Observe ( metrics . SinceInMicroseconds ( firstSeenTime ) )
2015-03-09 14:23:52 +00:00
} else {
2016-01-20 02:15:10 +00:00
glog . V ( 3 ) . Infof ( "First seen time not recorded for pod %q" , pod . UID )
2015-03-09 14:23:52 +00:00
}
2016-01-20 02:15:10 +00:00
}
2016-04-28 04:26:36 +00:00
// Generate final API pod status with pod and status manager status
2016-02-13 05:56:12 +00:00
apiPodStatus := kl . generateAPIPodStatus ( pod , podStatus )
2016-04-21 22:32:11 +00:00
// The pod IP may be changed in generateAPIPodStatus if the pod is using host network. (See #24576)
// TODO(random-liu): After writing pod spec into container labels, check whether pod is using host network, and
// set pod IP to hostIP directly in runtime.GetPodStatus
podStatus . IP = apiPodStatus . PodIP
2016-04-28 04:26:36 +00:00
2016-01-20 02:15:10 +00:00
// Record the time it takes for the pod to become running.
existingStatus , ok := kl . statusManager . GetPodStatus ( pod . UID )
2016-11-18 20:50:58 +00:00
if ! ok || existingStatus . Phase == v1 . PodPending && apiPodStatus . Phase == v1 . PodRunning &&
2016-01-20 02:15:10 +00:00
! firstSeenTime . IsZero ( ) {
metrics . PodStartLatency . Observe ( metrics . SinceInMicroseconds ( firstSeenTime ) )
}
2016-03-29 03:08:54 +00:00
2016-11-02 18:05:16 +00:00
runnable := kl . canRunPod ( pod )
if ! runnable . Admit {
// Pod is not runnable; update the Pod and Container statuses to why.
apiPodStatus . Reason = runnable . Reason
apiPodStatus . Message = runnable . Message
// Waiting containers are not creating.
const waitingReason = "Blocked"
for _ , cs := range apiPodStatus . InitContainerStatuses {
if cs . State . Waiting != nil {
cs . State . Waiting . Reason = waitingReason
}
}
for _ , cs := range apiPodStatus . ContainerStatuses {
if cs . State . Waiting != nil {
cs . State . Waiting . Reason = waitingReason
}
}
}
2016-04-28 04:26:36 +00:00
// Update status in the status manager
2016-01-20 02:15:10 +00:00
kl . statusManager . SetPodStatus ( pod , apiPodStatus )
2015-03-09 14:23:52 +00:00
2016-04-28 04:26:36 +00:00
// Kill pod if it should not be running
2016-11-18 20:50:58 +00:00
if ! runnable . Admit || pod . DeletionTimestamp != nil || apiPodStatus . Phase == v1 . PodFailed {
2016-11-02 18:05:16 +00:00
var syncErr error
if err := kl . killPod ( pod , nil , podStatus , nil ) ; err != nil {
syncErr = fmt . Errorf ( "error killing pod: %v" , err )
utilruntime . HandleError ( syncErr )
} else {
if ! runnable . Admit {
// There was no error killing the pod, but the pod cannot be run.
// Return an error to signal that the sync loop should back off.
syncErr = fmt . Errorf ( "pod cannot be run: %s" , runnable . Message )
}
2015-08-20 01:57:58 +00:00
}
2016-11-02 18:05:16 +00:00
return syncErr
2015-03-26 18:25:48 +00:00
}
2016-09-23 03:04:37 +00:00
// If the network plugin is not ready, only start the pod if it uses the host network
2016-12-08 01:23:04 +00:00
if rs := kl . runtimeState . networkErrors ( ) ; len ( rs ) != 0 && ! kubecontainer . IsHostNetworkPod ( pod ) {
2016-09-23 03:04:37 +00:00
return fmt . Errorf ( "network is not ready: %v" , rs )
}
2016-10-17 17:23:48 +00:00
// Create Cgroups for the pod and apply resource parameters
2017-04-13 02:56:47 +00:00
// to them if cgroups-per-qos flag is enabled.
2016-10-17 17:23:48 +00:00
pcm := kl . containerManager . NewPodContainerManager ( )
// If pod has already been terminated then we need not create
// or update the pod's cgroup
if ! kl . podIsTerminated ( pod ) {
2017-04-13 02:56:47 +00:00
// When the kubelet is restarted with the cgroups-per-qos
2016-10-17 17:23:48 +00:00
// flag enabled, all the pod's running containers
// should be killed intermittently and brought back up
// under the qos cgroup hierarchy.
// Check if this is the pod's first sync
firstSync := true
for _ , containerStatus := range apiPodStatus . ContainerStatuses {
if containerStatus . State . Running != nil {
firstSync = false
break
}
}
// Don't kill containers in pod if pod's cgroups already
// exists or the pod is running for the first time
podKilled := false
if ! pcm . Exists ( pod ) && ! firstSync {
2017-01-09 12:28:37 +00:00
if err := kl . killPod ( pod , nil , podStatus , nil ) ; err == nil {
podKilled = true
}
2016-10-17 17:23:48 +00:00
}
// Create and Update pod's Cgroups
// Don't create cgroups for run once pod if it was killed above
// The current policy is not to restart the run once pods when
// the kubelet is restarted with the new flag as run once pods are
// expected to run only once and if the kubelet is restarted then
// they are not expected to run again.
// We don't create and apply updates to cgroup if its a run once pod and was killed above
2016-11-18 20:50:58 +00:00
if ! ( podKilled && pod . Spec . RestartPolicy == v1 . RestartPolicyNever ) {
2017-02-21 20:10:45 +00:00
if ! pcm . Exists ( pod ) {
if err := kl . containerManager . UpdateQOSCgroups ( ) ; err != nil {
glog . V ( 2 ) . Infof ( "Failed to update QoS cgroups while syncing pod: %v" , err )
}
if err := pcm . EnsureExists ( pod ) ; err != nil {
return fmt . Errorf ( "failed to ensure that the pod: %v cgroups exist and are correctly applied: %v" , pod . UID , err )
}
2016-10-17 17:23:48 +00:00
}
}
}
2015-11-04 18:50:43 +00:00
// Create Mirror Pod for Static Pod if it doesn't already exist
if kubepod . IsStaticPod ( pod ) {
2015-12-07 21:31:02 +00:00
podFullName := kubecontainer . GetPodFullName ( pod )
2016-01-21 19:55:37 +00:00
deleted := false
2016-03-07 05:42:41 +00:00
if mirrorPod != nil {
if mirrorPod . DeletionTimestamp != nil || ! kl . podManager . IsMirrorPodOf ( mirrorPod , pod ) {
// The mirror pod is semantically different from the static pod. Remove
// it. The mirror pod will get recreated later.
2016-08-11 06:14:01 +00:00
glog . Warningf ( "Deleting mirror pod %q because it is outdated" , format . Pod ( mirrorPod ) )
2016-03-07 05:42:41 +00:00
if err := kl . podManager . DeleteMirrorPod ( podFullName ) ; err != nil {
glog . Errorf ( "Failed deleting mirror pod %q: %v" , format . Pod ( mirrorPod ) , err )
} else {
deleted = true
}
2015-11-04 18:50:43 +00:00
}
}
2016-01-21 19:55:37 +00:00
if mirrorPod == nil || deleted {
2015-12-07 21:31:02 +00:00
glog . V ( 3 ) . Infof ( "Creating a mirror pod for static pod %q" , format . Pod ( pod ) )
2015-11-04 18:50:43 +00:00
if err := kl . podManager . CreateMirrorPod ( pod ) ; err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Failed creating a mirror pod for %q: %v" , format . Pod ( pod ) , err )
2015-11-04 18:50:43 +00:00
}
}
}
2016-04-28 04:26:36 +00:00
// Make data directories for the pod
2015-04-17 23:12:08 +00:00
if err := kl . makePodDataDirs ( pod ) ; err != nil {
2015-12-07 21:31:02 +00:00
glog . Errorf ( "Unable to make pod data directories for pod %q: %v" , format . Pod ( pod ) , err )
2015-04-17 23:12:08 +00:00
return err
}
2016-05-30 02:22:22 +00:00
// Wait for volumes to attach/mount
2016-07-26 22:04:03 +00:00
if err := kl . volumeManager . WaitForAttachAndMount ( pod ) ; err != nil {
2016-11-18 20:50:58 +00:00
kl . recorder . Eventf ( pod , v1 . EventTypeWarning , events . FailedMountVolume , "Unable to mount volumes for pod %q: %v" , format . Pod ( pod ) , err )
2016-07-08 20:53:33 +00:00
glog . Errorf ( "Unable to mount volumes for pod %q: %v; skipping pod" , format . Pod ( pod ) , err )
return err
2015-04-16 00:40:07 +00:00
}
2016-04-28 04:26:36 +00:00
// Fetch the pull secrets for the pod
2016-12-26 10:41:14 +00:00
pullSecrets := kl . getPullSecretsForPod ( pod )
2015-05-08 17:53:00 +00:00
2016-04-28 04:26:36 +00:00
// Call the container runtime's SyncPod callback
2016-01-12 10:19:13 +00:00
result := kl . containerRuntime . SyncPod ( pod , apiPodStatus , podStatus , pullSecrets , kl . backOff )
2016-01-12 21:28:00 +00:00
kl . reasonCache . Update ( pod . UID , result )
2016-12-26 10:41:14 +00:00
if err := result . Error ( ) ; err != nil {
2015-05-01 01:37:15 +00:00
return err
2015-03-10 14:09:55 +00:00
}
2014-07-01 05:27:56 +00:00
return nil
}
2015-11-05 05:59:15 +00:00
// Get pods which should be resynchronized. Currently, the following pod should be resynchronized:
// * pod whose work is ready.
2016-04-15 18:17:17 +00:00
// * internal modules that request sync of a pod.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) getPodsToSync ( ) [ ] * v1 . Pod {
2015-11-05 05:59:15 +00:00
allPods := kl . podManager . GetPods ( )
podUIDs := kl . workQueue . GetWork ( )
podUIDSet := sets . NewString ( )
for _ , podUID := range podUIDs {
podUIDSet . Insert ( string ( podUID ) )
}
2016-11-18 20:50:58 +00:00
var podsToSync [ ] * v1 . Pod
2015-11-05 05:59:15 +00:00
for _ , pod := range allPods {
if podUIDSet . Has ( string ( pod . UID ) ) {
// The work of the pod is ready
podsToSync = append ( podsToSync , pod )
2016-04-15 18:17:17 +00:00
continue
}
for _ , podSyncLoopHandler := range kl . PodSyncLoopHandlers {
if podSyncLoopHandler . ShouldSync ( pod ) {
podsToSync = append ( podsToSync , pod )
break
}
2015-11-05 05:59:15 +00:00
}
}
return podsToSync
}
2016-04-28 04:26:36 +00:00
// deletePod deletes the pod from the internal state of the kubelet by:
// 1. stopping the associated pod worker asynchronously
// 2. signaling to kill the pod by sending on the podKillingCh channel
//
// deletePod returns an error if not all sources are ready or the pod is not
// found in the runtime cache.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) deletePod ( pod * v1 . Pod ) error {
2016-01-31 23:56:55 +00:00
if pod == nil {
return fmt . Errorf ( "deletePod does not allow nil pod" )
}
2016-04-26 17:58:12 +00:00
if ! kl . sourcesReady . AllReady ( ) {
2015-08-19 00:52:26 +00:00
// If the sources aren't ready, skip deletion, as we may accidentally delete pods
// for sources that haven't reported yet.
return fmt . Errorf ( "skipping delete because sources aren't ready yet" )
2014-07-01 16:37:45 +00:00
}
2016-01-31 23:56:55 +00:00
kl . podWorkers . ForgetWorker ( pod . UID )
2015-08-11 23:25:17 +00:00
2015-08-19 00:52:26 +00:00
// Runtime cache may not have been updated to with the pod, but it's okay
// because the periodic cleanup routine will attempt to delete again later.
runningPods , err := kl . runtimeCache . GetPods ( )
if err != nil {
return fmt . Errorf ( "error listing containers: %v" , err )
2015-08-11 23:25:17 +00:00
}
2016-01-31 23:56:55 +00:00
runningPod := kubecontainer . Pods ( runningPods ) . FindPod ( "" , pod . UID )
if runningPod . IsEmpty ( ) {
2015-08-19 00:52:26 +00:00
return fmt . Errorf ( "pod not found" )
}
2016-03-23 23:45:24 +00:00
podPair := kubecontainer . PodPair { APIPod : pod , RunningPod : & runningPod }
2015-08-19 00:52:26 +00:00
2016-01-31 23:56:55 +00:00
kl . podKillingCh <- & podPair
2015-08-19 00:52:26 +00:00
// TODO: delete the mirror pod here?
2015-02-27 09:19:41 +00:00
2015-08-19 00:52:26 +00:00
// We leave the volume/directory cleanup to the periodic cleanup routine.
return nil
}
2016-11-01 00:43:58 +00:00
// isOutOfDisk detects if pods can't fit due to lack of disk space.
2015-08-19 00:52:26 +00:00
func ( kl * Kubelet ) isOutOfDisk ( ) bool {
2015-05-12 08:24:08 +00:00
// Check disk space once globally and reject or accept all new pods.
2016-05-18 05:05:55 +00:00
withinBounds , err := kl . diskSpaceManager . IsRuntimeDiskSpaceAvailable ( )
2015-05-12 08:24:08 +00:00
// Assume enough space in case of errors.
2016-06-10 22:22:39 +00:00
if err != nil {
glog . Errorf ( "Failed to check if disk space is available for the runtime: %v" , err )
} else if ! withinBounds {
return true
2015-05-12 08:24:08 +00:00
}
withinBounds , err = kl . diskSpaceManager . IsRootDiskSpaceAvailable ( )
// Assume enough space in case of errors.
2016-06-10 22:22:39 +00:00
if err != nil {
glog . Errorf ( "Failed to check if disk space is available on the root partition: %v" , err )
} else if ! withinBounds {
return true
2015-05-12 08:24:08 +00:00
}
2016-06-10 22:22:39 +00:00
return false
2015-05-12 08:24:08 +00:00
}
2016-04-28 04:26:36 +00:00
// rejectPod records an event about the pod with the given reason and message,
// and updates the pod to the failed phase in the status manage.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) rejectPod ( pod * v1 . Pod , reason , message string ) {
kl . recorder . Eventf ( pod , v1 . EventTypeWarning , reason , message )
kl . statusManager . SetPodStatus ( pod , v1 . PodStatus {
Phase : v1 . PodFailed ,
2015-08-19 00:52:26 +00:00
Reason : reason ,
Message : "Pod " + message } )
}
2015-05-14 20:02:36 +00:00
2015-08-19 00:52:26 +00:00
// canAdmitPod determines if a pod can be admitted, and gives a reason if it
2016-07-07 02:44:52 +00:00
// cannot. "pod" is new pod, while "pods" are all admitted pods
// The function returns a boolean value indicating whether the pod
2015-08-19 00:52:26 +00:00
// can be admitted, a brief single-word reason and a message explaining why
// the pod cannot be admitted.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) canAdmitPod ( pods [ ] * v1 . Pod , pod * v1 . Pod ) ( bool , string , string ) {
2016-04-15 18:17:17 +00:00
// the kubelet will invoke each pod admit handler in sequence
// if any handler rejects, the pod is rejected.
// TODO: move out of disk check into a pod admitter
// TODO: out of resource eviction should have a pod admitter call-out
2016-07-07 02:44:52 +00:00
attrs := & lifecycle . PodAdmitAttributes { Pod : pod , OtherPods : pods }
2016-11-02 18:05:16 +00:00
for _ , podAdmitHandler := range kl . admitHandlers {
2016-04-15 18:17:17 +00:00
if result := podAdmitHandler . Admit ( attrs ) ; ! result . Admit {
return false , result . Reason , result . Message
}
}
2016-01-06 01:10:59 +00:00
// TODO: When disk space scheduling is implemented (#11976), remove the out-of-disk check here and
// add the disk space predicate to predicates.GeneralPredicates.
2015-08-19 00:52:26 +00:00
if kl . isOutOfDisk ( ) {
2016-07-29 14:50:56 +00:00
glog . Warningf ( "Failed to admit pod %v - %s" , format . Pod ( pod ) , "predicate fails due to OutOfDisk" )
2015-08-19 00:52:26 +00:00
return false , "OutOfDisk" , "cannot be started due to lack of disk space."
}
2016-04-15 18:17:17 +00:00
2015-08-19 00:52:26 +00:00
return true , "" , ""
2014-07-08 04:48:47 +00:00
}
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) canRunPod ( pod * v1 . Pod ) lifecycle . PodAdmitResult {
2016-11-02 18:05:16 +00:00
attrs := & lifecycle . PodAdmitAttributes { Pod : pod }
// Get "OtherPods". Rejected pods are failed, so only include admitted pods that are alive.
attrs . OtherPods = kl . filterOutTerminatedPods ( kl . podManager . GetPods ( ) )
for _ , handler := range kl . softAdmitHandlers {
if result := handler . Admit ( attrs ) ; ! result . Admit {
return result
}
}
// TODO: Refactor as a soft admit handler.
if err := canRunPod ( pod ) ; err != nil {
return lifecycle . PodAdmitResult {
Admit : false ,
Reason : "Forbidden" ,
Message : err . Error ( ) ,
}
}
return lifecycle . PodAdmitResult { Admit : true }
}
2014-07-01 20:01:39 +00:00
// syncLoop is the main loop for processing changes. It watches for changes from
2015-03-11 23:40:20 +00:00
// three channels (file, apiserver, and http) and creates a union of them. For
2014-06-06 23:40:48 +00:00
// any new change seen, will run a sync against desired state and running state. If
// no changes are seen to the configuration, will synchronize the last known desired
2015-08-11 20:29:50 +00:00
// state every sync-frequency seconds. Never returns.
2015-10-09 17:24:31 +00:00
func ( kl * Kubelet ) syncLoop ( updates <- chan kubetypes . PodUpdate , handler SyncHandler ) {
2015-04-08 20:57:19 +00:00
glog . Info ( "Starting kubelet main sync loop." )
2015-09-02 17:18:11 +00:00
// The resyncTicker wakes up kubelet to checks if there are any pod workers
// that need to be sync'd. A one-second period is sufficient because the
// sync interval is defaulted to 10s.
2015-11-03 18:03:39 +00:00
syncTicker := time . NewTicker ( time . Second )
2016-07-26 03:53:21 +00:00
defer syncTicker . Stop ( )
2015-11-03 18:03:39 +00:00
housekeepingTicker := time . NewTicker ( housekeepingPeriod )
2016-07-26 03:53:21 +00:00
defer housekeepingTicker . Stop ( )
2015-08-07 21:42:21 +00:00
plegCh := kl . pleg . Watch ( )
2014-06-06 23:40:48 +00:00
for {
2016-09-23 03:04:37 +00:00
if rs := kl . runtimeState . runtimeErrors ( ) ; len ( rs ) != 0 {
2015-09-24 22:26:25 +00:00
glog . Infof ( "skipping pod synchronization - %v" , rs )
2015-08-28 01:07:57 +00:00
time . Sleep ( 5 * time . Second )
continue
}
2017-06-10 02:04:16 +00:00
kl . syncLoopMonitor . Store ( kl . clock . Now ( ) )
2015-08-07 21:42:21 +00:00
if ! kl . syncLoopIteration ( updates , handler , syncTicker . C , housekeepingTicker . C , plegCh ) {
2015-09-15 19:29:34 +00:00
break
}
2017-06-10 02:04:16 +00:00
kl . syncLoopMonitor . Store ( kl . clock . Now ( ) )
2015-06-17 22:31:46 +00:00
}
}
2016-04-28 04:26:36 +00:00
// syncLoopIteration reads from various channels and dispatches pods to the
// given handler.
//
// Arguments:
// 1. configCh: a channel to read config events from
// 2. handler: the SyncHandler to dispatch pods to
// 3. syncCh: a channel to read periodic sync events from
// 4. houseKeepingCh: a channel to read housekeeping events from
// 5. plegCh: a channel to read PLEG updates from
//
// Events are also read from the kubelet liveness manager's update channel.
//
// The workflow is to read from one of the channels, handle that event, and
// update the timestamp in the sync loop monitor.
//
// Here is an appropriate place to note that despite the syntactical
// similarity to the switch statement, the case statements in a select are
// evaluated in a pseudorandom order if there are multiple channels ready to
// read from when the select is evaluated. In other words, case statements
// are evaluated in random order, and you can not assume that the case
// statements evaluate in order if multiple channels have events.
//
// With that in mind, in truly no particular order, the different channels
// are handled as follows:
//
// * configCh: dispatch the pods for the config change to the appropriate
// handler callback for the event type
// * plegCh: update the runtime cache; sync pod
// * syncCh: sync all pods waiting for sync
// * houseKeepingCh: trigger cleanup of pods
// * liveness manager: sync pods that have failed or in which one or more
// containers have failed liveness checks
func ( kl * Kubelet ) syncLoopIteration ( configCh <- chan kubetypes . PodUpdate , handler SyncHandler ,
2015-08-07 21:42:21 +00:00
syncCh <- chan time . Time , housekeepingCh <- chan time . Time , plegCh <- chan * pleg . PodLifecycleEvent ) bool {
2015-06-17 22:31:46 +00:00
select {
2016-04-28 04:26:36 +00:00
case u , open := <- configCh :
// Update from a config source; dispatch it to the right handler
// callback.
2015-08-30 19:47:24 +00:00
if ! open {
2015-06-17 22:31:46 +00:00
glog . Errorf ( "Update channel is closed. Exiting the sync loop." )
2015-08-30 19:47:24 +00:00
return false
2015-06-18 05:34:11 +00:00
}
2015-08-07 21:42:21 +00:00
2015-08-19 00:52:26 +00:00
switch u . Op {
2015-10-09 17:24:31 +00:00
case kubetypes . ADD :
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (ADD, %q): %q" , u . Source , format . Pods ( u . Pods ) )
2016-03-17 18:36:18 +00:00
// After restarting, kubelet will get all existing pods through
// ADD as if they are new pods. These pods will then go through the
2016-09-23 14:27:10 +00:00
// admission process and *may* be rejected. This can be resolved
2016-03-17 18:36:18 +00:00
// once we have checkpointing.
2015-08-19 00:52:26 +00:00
handler . HandlePodAdditions ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . UPDATE :
2016-06-08 23:32:30 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (UPDATE, %q): %q" , u . Source , format . PodsWithDeletiontimestamps ( u . Pods ) )
2015-08-19 00:52:26 +00:00
handler . HandlePodUpdates ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . REMOVE :
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "SyncLoop (REMOVE, %q): %q" , u . Source , format . Pods ( u . Pods ) )
2016-06-14 09:29:18 +00:00
handler . HandlePodRemoves ( u . Pods )
2015-12-09 03:13:09 +00:00
case kubetypes . RECONCILE :
glog . V ( 4 ) . Infof ( "SyncLoop (RECONCILE, %q): %q" , u . Source , format . Pods ( u . Pods ) )
handler . HandlePodReconcile ( u . Pods )
2016-06-14 09:29:18 +00:00
case kubetypes . DELETE :
glog . V ( 2 ) . Infof ( "SyncLoop (DELETE, %q): %q" , u . Source , format . Pods ( u . Pods ) )
// DELETE is treated as a UPDATE because of graceful deletion.
handler . HandlePodUpdates ( u . Pods )
2015-10-09 17:24:31 +00:00
case kubetypes . SET :
2015-08-19 00:52:26 +00:00
// TODO: Do we want to support this?
glog . Errorf ( "Kubelet does not support snapshot update" )
}
2016-08-11 00:55:10 +00:00
// Mark the source ready after receiving at least one update from the
// source. Once all the sources are marked ready, various cleanup
// routines will start reclaiming resources. It is important that this
// takes place only after kubelet calls the update handler to process
// the update to ensure the internal pod cache is up-to-date.
kl . sourcesReady . AddSource ( u . Source )
2015-08-07 21:42:21 +00:00
case e := <- plegCh :
2016-07-13 23:48:43 +00:00
if isSyncPodWorthy ( e ) {
// PLEG event for a pod; sync it.
2016-08-09 23:51:55 +00:00
if pod , ok := kl . podManager . GetPodByUID ( e . ID ) ; ok {
glog . V ( 2 ) . Infof ( "SyncLoop (PLEG): %q, event: %#v" , format . Pod ( pod ) , e )
2016-11-18 20:50:58 +00:00
handler . HandlePodSyncs ( [ ] * v1 . Pod { pod } )
2016-08-09 23:51:55 +00:00
} else {
2016-07-13 23:48:43 +00:00
// If the pod no longer exists, ignore the event.
glog . V ( 4 ) . Infof ( "SyncLoop (PLEG): ignore irrelevant event: %#v" , e )
}
2015-08-07 21:42:21 +00:00
}
2016-08-02 01:14:52 +00:00
2016-07-07 16:58:55 +00:00
if e . Type == pleg . ContainerDied {
2016-08-02 01:14:52 +00:00
if containerID , ok := e . Data . ( string ) ; ok {
kl . cleanUpContainersInPod ( e . ID , containerID )
2016-07-07 16:58:55 +00:00
}
}
2015-11-03 18:03:39 +00:00
case <- syncCh :
2016-04-28 04:26:36 +00:00
// Sync pods waiting for sync
2015-11-05 05:59:15 +00:00
podsToSync := kl . getPodsToSync ( )
2015-11-04 23:35:10 +00:00
if len ( podsToSync ) == 0 {
break
}
2015-11-20 17:54:37 +00:00
glog . V ( 4 ) . Infof ( "SyncLoop (SYNC): %d pods; %s" , len ( podsToSync ) , format . Pods ( podsToSync ) )
2015-09-02 17:18:11 +00:00
kl . HandlePodSyncs ( podsToSync )
2015-10-19 22:15:59 +00:00
case update := <- kl . livenessManager . Updates ( ) :
if update . Result == proberesults . Failure {
2016-04-28 04:26:36 +00:00
// The liveness manager detected a failure; sync the pod.
2016-02-10 00:32:54 +00:00
// We should not use the pod from livenessManager, because it is never updated after
// initialization.
2016-02-12 05:02:31 +00:00
pod , ok := kl . podManager . GetPodByUID ( update . PodUID )
2016-02-10 00:32:54 +00:00
if ! ok {
// If the pod no longer exists, ignore the update.
glog . V ( 4 ) . Infof ( "SyncLoop (container unhealthy): ignore irrelevant update: %#v" , update )
break
}
glog . V ( 1 ) . Infof ( "SyncLoop (container unhealthy): %q" , format . Pod ( pod ) )
2016-11-18 20:50:58 +00:00
handler . HandlePodSyncs ( [ ] * v1 . Pod { pod } )
2015-10-19 22:15:59 +00:00
}
2015-11-03 18:03:39 +00:00
case <- housekeepingCh :
2016-04-26 17:58:12 +00:00
if ! kl . sourcesReady . AllReady ( ) {
2016-11-18 22:15:12 +00:00
// If the sources aren't ready or volume manager has not yet synced the states,
// skip housekeeping, as we may accidentally delete pods from unready sources.
2015-11-03 18:03:39 +00:00
glog . V ( 4 ) . Infof ( "SyncLoop (housekeeping, skipped): sources aren't ready yet." )
} else {
glog . V ( 4 ) . Infof ( "SyncLoop (housekeeping)" )
if err := handler . HandlePodCleanups ( ) ; err != nil {
glog . Errorf ( "Failed cleaning pods: %v" , err )
}
}
2015-06-17 22:31:46 +00:00
}
2015-08-30 19:47:24 +00:00
return true
2015-08-19 00:52:26 +00:00
}
2016-04-28 04:26:36 +00:00
// dispatchWork starts the asynchronous sync of the pod in a pod worker.
// If the pod is terminated, dispatchWork
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) dispatchWork ( pod * v1 . Pod , syncType kubetypes . SyncPodType , mirrorPod * v1 . Pod , start time . Time ) {
2015-08-19 00:52:26 +00:00
if kl . podIsTerminated ( pod ) {
2016-02-17 23:56:54 +00:00
if pod . DeletionTimestamp != nil {
2016-04-28 04:26:36 +00:00
// If the pod is in a terminated state, there is no pod worker to
2016-02-17 23:56:54 +00:00
// handle the work item. Check if the DeletionTimestamp has been
// set, and force a status update to trigger a pod deletion request
// to the apiserver.
kl . statusManager . TerminatePod ( pod )
}
2015-08-19 00:52:26 +00:00
return
}
// Run the sync in an async worker.
2016-05-06 18:07:24 +00:00
kl . podWorkers . UpdatePod ( & UpdatePodOptions {
Pod : pod ,
MirrorPod : mirrorPod ,
UpdateType : syncType ,
OnCompleteFunc : func ( err error ) {
if err != nil {
metrics . PodWorkerLatency . WithLabelValues ( syncType . String ( ) ) . Observe ( metrics . SinceInMicroseconds ( start ) )
}
} ,
2015-08-19 00:52:26 +00:00
} )
// Note the number of containers for new pods.
2015-10-09 17:24:31 +00:00
if syncType == kubetypes . SyncPodCreate {
2015-08-19 00:52:26 +00:00
metrics . ContainersPerPodCount . Observe ( float64 ( len ( pod . Spec . Containers ) ) )
}
}
2016-07-15 01:28:25 +00:00
// TODO: handle mirror pods in a separate component (issue #17251)
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) handleMirrorPod ( mirrorPod * v1 . Pod , start time . Time ) {
2015-08-19 00:52:26 +00:00
// Mirror pod ADD/UPDATE/DELETE operations are considered an UPDATE to the
// corresponding static pod. Send update to the pod worker if the static
// pod exists.
if pod , ok := kl . podManager . GetPodByMirrorPod ( mirrorPod ) ; ok {
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodUpdate , mirrorPod , start )
2015-08-19 00:52:26 +00:00
}
}
2016-04-28 04:26:36 +00:00
// HandlePodAdditions is the callback in SyncHandler for pods being added from
// a config source.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) HandlePodAdditions ( pods [ ] * v1 . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2017-01-26 04:41:09 +00:00
sort . Sort ( sliceutils . PodsByCreationTime ( pods ) )
for _ , pod := range pods {
2016-11-29 22:57:17 +00:00
existingPods := kl . podManager . GetPods ( )
// Always add the pod to the pod manager. Kubelet relies on the pod
// manager as the source of truth for the desired state. If a pod does
// not exist in the pod manager, it means that it has been deleted in
// the apiserver and no action (other than cleanup) is required.
kl . podManager . AddPod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
2016-11-29 22:57:17 +00:00
if ! kl . podIsTerminated ( pod ) {
// Only go through the admission process if the pod is not
// terminated.
// We failed pods that we rejected, so activePods include all admitted
// pods that are alive.
activePods := kl . filterOutTerminatedPods ( existingPods )
// Check if we can admit the pod; if not, reject it.
if ok , reason , message := kl . canAdmitPod ( activePods , pod ) ; ! ok {
kl . rejectPod ( pod , reason , message )
continue
}
2014-06-06 23:40:48 +00:00
}
2015-08-19 00:52:26 +00:00
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodCreate , mirrorPod , start )
2015-08-25 17:39:41 +00:00
kl . probeManager . AddPod ( pod )
2014-06-06 23:40:48 +00:00
}
2015-08-19 00:52:26 +00:00
}
2016-04-28 04:26:36 +00:00
// HandlePodUpdates is the callback in the SyncHandler interface for pods
// being updated from a config source.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) HandlePodUpdates ( pods [ ] * v1 . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
for _ , pod := range pods {
kl . podManager . UpdatePod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// TODO: Evaluate if we need to validate and reject updates.
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodUpdate , mirrorPod , start )
2015-08-19 00:52:26 +00:00
}
}
2016-06-14 09:29:18 +00:00
// HandlePodRemoves is the callback in the SyncHandler interface for pods
// being removed from a config source.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) HandlePodRemoves ( pods [ ] * v1 . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
for _ , pod := range pods {
kl . podManager . DeletePod ( pod )
2015-10-12 23:28:23 +00:00
if kubepod . IsMirrorPod ( pod ) {
2015-08-19 00:52:26 +00:00
kl . handleMirrorPod ( pod , start )
continue
}
// Deletion is allowed to fail because the periodic cleanup routine
// will trigger deletion again.
2016-01-31 23:56:55 +00:00
if err := kl . deletePod ( pod ) ; err != nil {
2015-11-20 17:54:37 +00:00
glog . V ( 2 ) . Infof ( "Failed to delete pod %q, err: %v" , format . Pod ( pod ) , err )
2015-08-19 00:52:26 +00:00
}
2015-08-25 17:39:41 +00:00
kl . probeManager . RemovePod ( pod )
2015-08-19 00:52:26 +00:00
}
}
2016-04-28 04:26:36 +00:00
// HandlePodReconcile is the callback in the SyncHandler interface for pods
// that should be reconciled.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) HandlePodReconcile ( pods [ ] * v1 . Pod ) {
2015-12-09 03:13:09 +00:00
for _ , pod := range pods {
// Update the pod in pod manager, status manager will do periodically reconcile according
// to the pod manager.
kl . podManager . UpdatePod ( pod )
2016-08-09 22:37:23 +00:00
// After an evicted pod is synced, all dead containers in the pod can be removed.
if eviction . PodIsEvicted ( pod . Status ) {
if podStatus , err := kl . podCache . Get ( pod . UID ) ; err == nil {
kl . containerDeletor . deleteContainersInPod ( "" , podStatus , true )
}
}
2015-12-09 03:13:09 +00:00
}
}
2016-04-28 04:26:36 +00:00
// HandlePodSyncs is the callback in the syncHandler interface for pods
// that should be dispatched to pod workers for sync.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) HandlePodSyncs ( pods [ ] * v1 . Pod ) {
2016-01-04 20:03:28 +00:00
start := kl . clock . Now ( )
2015-08-19 00:52:26 +00:00
for _ , pod := range pods {
mirrorPod , _ := kl . podManager . GetMirrorPodByPod ( pod )
2015-10-09 17:24:31 +00:00
kl . dispatchWork ( pod , kubetypes . SyncPodSync , mirrorPod , start )
2015-06-17 22:31:46 +00:00
}
}
2016-04-28 04:26:36 +00:00
// LatestLoopEntryTime returns the last time in the sync loop monitor.
2015-06-17 22:31:46 +00:00
func ( kl * Kubelet ) LatestLoopEntryTime ( ) time . Time {
val := kl . syncLoopMonitor . Load ( )
if val == nil {
return time . Time { }
}
return val . ( time . Time )
2014-06-06 23:40:48 +00:00
}
2016-04-28 04:26:36 +00:00
// updateRuntimeUp calls the container runtime status callback, initializing
// the runtime dependent modules when the container runtime first comes up,
// and returns an error if the status check fails. If the status check is OK,
// update the container runtime uptime in the kubelet runtimeState.
2015-05-05 10:19:54 +00:00
func ( kl * Kubelet ) updateRuntimeUp ( ) {
2016-11-02 04:39:46 +00:00
s , err := kl . containerRuntime . Status ( )
if err != nil {
2015-12-07 19:12:20 +00:00
glog . Errorf ( "Container runtime sanity check failed: %v" , err )
return
2015-05-05 10:19:54 +00:00
}
2017-05-01 21:39:51 +00:00
// rkt uses the legacy, non-CRI integration. Don't check the runtime
// conditions for it.
2017-06-20 11:51:12 +00:00
if kl . kubeletConfiguration . ContainerRuntime != kubetypes . RktContainerRuntime {
2016-11-02 04:39:46 +00:00
if s == nil {
glog . Errorf ( "Container runtime status is nil" )
return
}
// Periodically log the whole runtime status for debugging.
// TODO(random-liu): Consider to send node event when optional
// condition is unmet.
glog . V ( 4 ) . Infof ( "Container runtime status: %v" , s )
2016-11-03 01:23:57 +00:00
networkReady := s . GetRuntimeCondition ( kubecontainer . NetworkReady )
if networkReady == nil || ! networkReady . Status {
glog . Errorf ( "Container runtime network not ready: %v" , networkReady )
kl . runtimeState . setNetworkState ( fmt . Errorf ( "runtime network not ready: %v" , networkReady ) )
} else {
2017-02-21 13:15:40 +00:00
// Set nil if the container runtime network is ready.
2016-11-03 01:23:57 +00:00
kl . runtimeState . setNetworkState ( nil )
}
// TODO(random-liu): Add runtime error in runtimeState, and update it
// when runtime is not ready, so that the information in RuntimeReady
// condition will be propagated to NodeReady condition.
runtimeReady := s . GetRuntimeCondition ( kubecontainer . RuntimeReady )
2016-11-02 04:39:46 +00:00
// If RuntimeReady is not set or is false, report an error.
if runtimeReady == nil || ! runtimeReady . Status {
glog . Errorf ( "Container runtime not ready: %v" , runtimeReady )
return
}
}
2015-12-07 19:12:20 +00:00
kl . oneTimeInitializer . Do ( kl . initializeRuntimeDependentModules )
2016-01-04 20:03:28 +00:00
kl . runtimeState . setRuntimeSync ( kl . clock . Now ( ) )
2015-05-05 10:19:54 +00:00
}
2016-09-23 15:58:44 +00:00
// updateCloudProviderFromMachineInfo updates the node's provider ID field
// from the given cadvisor machine info.
2016-11-18 20:50:58 +00:00
func ( kl * Kubelet ) updateCloudProviderFromMachineInfo ( node * v1 . Node , info * cadvisorapi . MachineInfo ) {
2016-02-14 08:33:14 +00:00
if info . CloudProvider != cadvisorapi . UnknownProvider &&
info . CloudProvider != cadvisorapi . Baremetal {
// The cloud providers from pkg/cloudprovider/providers/* that update ProviderID
// will use the format of cloudprovider://project/availability_zone/instance_name
// here we only have the cloudprovider and the instance name so we leave project
// and availability zone empty for compatibility.
node . Spec . ProviderID = strings . ToLower ( string ( info . CloudProvider ) ) +
":////" + string ( info . InstanceID )
}
}
2016-07-13 23:11:12 +00:00
// GetConfiguration returns the KubeletConfiguration used to configure the kubelet.
2016-08-30 01:03:34 +00:00
func ( kl * Kubelet ) GetConfiguration ( ) componentconfig . KubeletConfiguration {
2016-07-13 23:11:12 +00:00
return kl . kubeletConfiguration
}
2015-03-30 13:20:20 +00:00
// BirthCry sends an event that the kubelet has started up.
func ( kl * Kubelet ) BirthCry ( ) {
// Make an event that kubelet restarted.
2016-11-18 20:50:58 +00:00
kl . recorder . Eventf ( kl . nodeRef , v1 . EventTypeNormal , events . StartingKubelet , "Starting kubelet." )
2014-11-10 21:13:57 +00:00
}
2015-01-08 20:41:38 +00:00
2016-04-28 04:26:36 +00:00
// StreamingConnectionIdleTimeout returns the timeout for streaming connections to the HTTP server.
2015-01-08 20:41:38 +00:00
func ( kl * Kubelet ) StreamingConnectionIdleTimeout ( ) time . Duration {
return kl . streamingConnectionIdleTimeout
}
2015-03-06 07:56:30 +00:00
2016-04-28 04:26:36 +00:00
// ResyncInterval returns the interval used for periodic syncs.
2015-06-17 22:31:46 +00:00
func ( kl * Kubelet ) ResyncInterval ( ) time . Duration {
return kl . resyncInterval
}
2016-04-28 04:26:36 +00:00
// ListenAndServe runs the kubelet HTTP server.
2017-02-28 18:43:08 +00:00
func ( kl * Kubelet ) ListenAndServe ( address net . IP , port uint , tlsOptions * server . TLSOptions , auth server . AuthInterface , enableDebuggingHandlers , enableContentionProfiling bool ) {
server . ListenAndServeKubeletServer ( kl , kl . resourceAnalyzer , address , port , tlsOptions , auth , enableDebuggingHandlers , enableContentionProfiling , kl . containerRuntime , kl . criHandler )
2015-03-26 12:31:54 +00:00
}
2016-04-28 04:26:36 +00:00
// ListenAndServeReadOnly runs the kubelet HTTP server in read-only mode.
2015-03-26 12:31:54 +00:00
func ( kl * Kubelet ) ListenAndServeReadOnly ( address net . IP , port uint ) {
2016-03-29 00:05:02 +00:00
server . ListenAndServeKubeletReadOnlyServer ( kl , kl . resourceAnalyzer , address , port , kl . containerRuntime )
2015-03-26 12:31:54 +00:00
}
2016-07-13 23:48:43 +00:00
2016-08-02 01:14:52 +00:00
// Delete the eligible dead container instances in a pod. Depending on the configuration, the latest dead containers may be kept around.
func ( kl * Kubelet ) cleanUpContainersInPod ( podId types . UID , exitedContainerID string ) {
if podStatus , err := kl . podCache . Get ( podId ) ; err == nil {
2016-08-09 22:37:23 +00:00
removeAll := false
if syncedPod , ok := kl . podManager . GetPodByUID ( podId ) ; ok {
// When an evicted pod has already synced, all containers can be removed.
removeAll = eviction . PodIsEvicted ( syncedPod . Status )
2016-08-02 01:14:52 +00:00
}
2016-08-09 22:37:23 +00:00
kl . containerDeletor . deleteContainersInPod ( exitedContainerID , podStatus , removeAll )
2016-08-02 01:14:52 +00:00
}
}
2016-07-07 16:58:55 +00:00
// isSyncPodWorthy filters out events that are not worthy of pod syncing
2016-07-13 23:48:43 +00:00
func isSyncPodWorthy ( event * pleg . PodLifecycleEvent ) bool {
// ContatnerRemoved doesn't affect pod state
return event . Type != pleg . ContainerRemoved
}
2016-07-13 23:11:12 +00:00
2016-11-04 18:50:51 +00:00
// Gets the streaming server configuration to use with in-process CRI shims.
func getStreamingConfig ( kubeCfg * componentconfig . KubeletConfiguration , kubeDeps * KubeletDeps ) * streaming . Config {
config := & streaming . Config {
// Use a relative redirect (no scheme or host).
BaseURL : & url . URL {
Path : "/cri/" ,
} ,
2016-10-17 08:50:20 +00:00
StreamIdleTimeout : kubeCfg . StreamingConnectionIdleTimeout . Duration ,
StreamCreationTimeout : streaming . DefaultConfig . StreamCreationTimeout ,
SupportedRemoteCommandProtocols : streaming . DefaultConfig . SupportedRemoteCommandProtocols ,
SupportedPortForwardProtocols : streaming . DefaultConfig . SupportedPortForwardProtocols ,
2016-11-04 18:50:51 +00:00
}
if kubeDeps . TLSOptions != nil {
config . TLSConfig = kubeDeps . TLSOptions . Config
}
return config
}