2019-01-12 04:58:27 +00:00
/ *
Copyright 2016 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kuberuntime
import (
"errors"
"fmt"
"os"
2019-12-12 01:27:03 +00:00
goruntime "runtime"
2019-01-12 04:58:27 +00:00
"time"
cadvisorapi "github.com/google/cadvisor/info/v1"
2020-08-10 17:43:49 +00:00
"k8s.io/klog/v2"
2019-01-12 04:58:27 +00:00
2019-04-07 17:07:55 +00:00
v1 "k8s.io/api/core/v1"
2019-01-12 04:58:27 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilversion "k8s.io/apimachinery/pkg/util/version"
2019-09-27 21:51:53 +00:00
utilfeature "k8s.io/apiserver/pkg/util/feature"
2019-01-12 04:58:27 +00:00
"k8s.io/client-go/tools/record"
ref "k8s.io/client-go/tools/reference"
"k8s.io/client-go/util/flowcontrol"
2020-08-10 17:43:49 +00:00
"k8s.io/component-base/logs/logreduction"
2019-08-30 18:33:25 +00:00
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
2019-01-12 04:58:27 +00:00
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/credentialprovider"
2020-12-01 01:06:26 +00:00
"k8s.io/kubernetes/pkg/credentialprovider/plugin"
2019-09-27 21:51:53 +00:00
"k8s.io/kubernetes/pkg/features"
2019-01-12 04:58:27 +00:00
"k8s.io/kubernetes/pkg/kubelet/cm"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/kubelet/images"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
2020-08-10 17:43:49 +00:00
"k8s.io/kubernetes/pkg/kubelet/logs"
2021-07-02 08:43:15 +00:00
"k8s.io/kubernetes/pkg/kubelet/metrics"
2019-01-12 04:58:27 +00:00
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
2019-04-07 17:07:55 +00:00
"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
2019-01-12 04:58:27 +00:00
"k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util/cache"
"k8s.io/kubernetes/pkg/kubelet/util/format"
)
const (
// The api version of kubelet runtime api
kubeRuntimeAPIVersion = "0.1.0"
// The root directory for pod logs
podLogsRootDirectory = "/var/log/pods"
// A minimal shutdown window for avoiding unnecessary SIGKILLs
minimumGracePeriodInSeconds = 2
// The expiration time of version cache.
versionCacheTTL = 60 * time . Second
2019-04-07 17:07:55 +00:00
// How frequently to report identical errors
identicalErrorDelay = 1 * time . Minute
2019-01-12 04:58:27 +00:00
)
var (
// ErrVersionNotSupported is returned when the api version of runtime interface is not supported
2019-09-27 21:51:53 +00:00
ErrVersionNotSupported = errors . New ( "runtime api version is not supported" )
2019-01-12 04:58:27 +00:00
)
2021-07-02 08:43:15 +00:00
// podStateProvider can determine if none of the elements are necessary to retain (pod content)
// or if none of the runtime elements are necessary to retain (containers)
2019-01-12 04:58:27 +00:00
type podStateProvider interface {
2021-10-29 15:14:24 +00:00
IsPodTerminationRequested ( kubetypes . UID ) bool
2021-07-02 08:43:15 +00:00
ShouldPodContentBeRemoved ( kubetypes . UID ) bool
ShouldPodRuntimeBeRemoved ( kubetypes . UID ) bool
2019-01-12 04:58:27 +00:00
}
type kubeGenericRuntimeManager struct {
2020-08-10 17:43:49 +00:00
runtimeName string
recorder record . EventRecorder
osInterface kubecontainer . OSInterface
2019-01-12 04:58:27 +00:00
// machineInfo contains the machine information.
machineInfo * cadvisorapi . MachineInfo
// Container GC manager
containerGC * containerGC
// Keyring for pulling images
keyring credentialprovider . DockerKeyring
// Runner of lifecycle events.
runner kubecontainer . HandlerRunner
// RuntimeHelper that wraps kubelet to generate runtime container options.
runtimeHelper kubecontainer . RuntimeHelper
// Health check results.
2021-03-18 22:40:29 +00:00
livenessManager proberesults . Manager
readinessManager proberesults . Manager
startupManager proberesults . Manager
2019-01-12 04:58:27 +00:00
// If true, enforce container cpu limits with CFS quota support
cpuCFSQuota bool
// CPUCFSQuotaPeriod sets the CPU CFS quota period value, cpu.cfs_period_us, defaults to 100ms
cpuCFSQuotaPeriod metav1 . Duration
// wrapped image puller.
imagePuller images . ImageManager
// gRPC service clients
runtimeService internalapi . RuntimeService
imageService internalapi . ImageManagerService
// The version cache of runtime daemon.
versionCache * cache . ObjectCache
// The directory path for seccomp profiles.
seccompProfileRoot string
// Internal lifecycle event handlers for container resource management.
internalLifecycle cm . InternalContainerLifecycle
// A shim to legacy functions for backward compatibility.
legacyLogProvider LegacyLogProvider
2019-04-07 17:07:55 +00:00
2020-08-10 17:43:49 +00:00
// Manage container logs.
logManager logs . ContainerLogManager
2019-04-07 17:07:55 +00:00
// Manage RuntimeClass resources.
runtimeClassManager * runtimeclass . Manager
// Cache last per-container error message to reduce log spam
2019-08-30 18:33:25 +00:00
logReduction * logreduction . LogReduction
2021-03-18 22:40:29 +00:00
// PodState provider instance
podStateProvider podStateProvider
2021-07-02 08:43:15 +00:00
// Use RuntimeDefault as the default seccomp profile for all workloads.
seccompDefault bool
// MemorySwapBehavior defines how swap is used
memorySwapBehavior string
//Function to get node allocatable resources
getNodeAllocatable func ( ) v1 . ResourceList
// Memory throttling factor for MemoryQoS
memoryThrottlingFactor float64
2019-01-12 04:58:27 +00:00
}
// KubeGenericRuntime is a interface contains interfaces for container runtime and command.
type KubeGenericRuntime interface {
kubecontainer . Runtime
kubecontainer . StreamingRuntime
2020-08-10 17:43:49 +00:00
kubecontainer . CommandRunner
2019-01-12 04:58:27 +00:00
}
// LegacyLogProvider gives the ability to use unsupported docker log drivers (e.g. journald)
type LegacyLogProvider interface {
2021-07-02 08:43:15 +00:00
// GetContainerLogTail gets the last few lines of the logs for a specific container.
2019-01-12 04:58:27 +00:00
GetContainerLogTail ( uid kubetypes . UID , name , namespace string , containerID kubecontainer . ContainerID ) ( string , error )
}
// NewKubeGenericRuntimeManager creates a new kubeGenericRuntimeManager
func NewKubeGenericRuntimeManager (
recorder record . EventRecorder ,
livenessManager proberesults . Manager ,
2021-03-18 22:40:29 +00:00
readinessManager proberesults . Manager ,
2019-12-12 01:27:03 +00:00
startupManager proberesults . Manager ,
2019-01-12 04:58:27 +00:00
seccompProfileRoot string ,
machineInfo * cadvisorapi . MachineInfo ,
podStateProvider podStateProvider ,
osInterface kubecontainer . OSInterface ,
runtimeHelper kubecontainer . RuntimeHelper ,
2020-03-26 21:07:15 +00:00
httpClient types . HTTPGetter ,
2019-01-12 04:58:27 +00:00
imageBackOff * flowcontrol . Backoff ,
serializeImagePulls bool ,
imagePullQPS float32 ,
imagePullBurst int ,
2020-12-01 01:06:26 +00:00
imageCredentialProviderConfigFile string ,
imageCredentialProviderBinDir string ,
2019-01-12 04:58:27 +00:00
cpuCFSQuota bool ,
cpuCFSQuotaPeriod metav1 . Duration ,
runtimeService internalapi . RuntimeService ,
imageService internalapi . ImageManagerService ,
internalLifecycle cm . InternalContainerLifecycle ,
legacyLogProvider LegacyLogProvider ,
2020-08-10 17:43:49 +00:00
logManager logs . ContainerLogManager ,
2019-04-07 17:07:55 +00:00
runtimeClassManager * runtimeclass . Manager ,
2021-07-02 08:43:15 +00:00
seccompDefault bool ,
memorySwapBehavior string ,
getNodeAllocatable func ( ) v1 . ResourceList ,
memoryThrottlingFactor float64 ,
2019-01-12 04:58:27 +00:00
) ( KubeGenericRuntime , error ) {
kubeRuntimeManager := & kubeGenericRuntimeManager {
2021-07-02 08:43:15 +00:00
recorder : recorder ,
cpuCFSQuota : cpuCFSQuota ,
cpuCFSQuotaPeriod : cpuCFSQuotaPeriod ,
seccompProfileRoot : seccompProfileRoot ,
livenessManager : livenessManager ,
readinessManager : readinessManager ,
startupManager : startupManager ,
machineInfo : machineInfo ,
osInterface : osInterface ,
runtimeHelper : runtimeHelper ,
runtimeService : newInstrumentedRuntimeService ( runtimeService ) ,
imageService : newInstrumentedImageManagerService ( imageService ) ,
internalLifecycle : internalLifecycle ,
legacyLogProvider : legacyLogProvider ,
logManager : logManager ,
runtimeClassManager : runtimeClassManager ,
logReduction : logreduction . NewLogReduction ( identicalErrorDelay ) ,
seccompDefault : seccompDefault ,
memorySwapBehavior : memorySwapBehavior ,
getNodeAllocatable : getNodeAllocatable ,
memoryThrottlingFactor : memoryThrottlingFactor ,
2019-01-12 04:58:27 +00:00
}
2020-03-26 21:07:15 +00:00
typedVersion , err := kubeRuntimeManager . getTypedVersion ( )
2019-01-12 04:58:27 +00:00
if err != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "Get runtime version failed" )
2019-01-12 04:58:27 +00:00
return nil , err
}
// Only matching kubeRuntimeAPIVersion is supported now
// TODO: Runtime API machinery is under discussion at https://github.com/kubernetes/kubernetes/issues/28642
if typedVersion . Version != kubeRuntimeAPIVersion {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "This runtime api version is not supported" ,
"apiVersion" , typedVersion . Version ,
"supportedAPIVersion" , kubeRuntimeAPIVersion )
2019-01-12 04:58:27 +00:00
return nil , ErrVersionNotSupported
}
kubeRuntimeManager . runtimeName = typedVersion . RuntimeName
2021-03-18 22:40:29 +00:00
klog . InfoS ( "Container runtime initialized" ,
"containerRuntime" , typedVersion . RuntimeName ,
"version" , typedVersion . RuntimeVersion ,
"apiVersion" , typedVersion . RuntimeApiVersion )
2019-01-12 04:58:27 +00:00
// If the container logs directory does not exist, create it.
// TODO: create podLogsRootDirectory at kubelet.go when kubelet is refactored to
// new runtime interface
if _ , err := osInterface . Stat ( podLogsRootDirectory ) ; os . IsNotExist ( err ) {
if err := osInterface . MkdirAll ( podLogsRootDirectory , 0755 ) ; err != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "Failed to create pod log directory" , "path" , podLogsRootDirectory )
2019-01-12 04:58:27 +00:00
}
}
2020-12-01 01:06:26 +00:00
if ! utilfeature . DefaultFeatureGate . Enabled ( features . KubeletCredentialProviders ) && ( imageCredentialProviderConfigFile != "" || imageCredentialProviderBinDir != "" ) {
2021-03-18 22:40:29 +00:00
klog . InfoS ( "Flags --image-credential-provider-config or --image-credential-provider-bin-dir were set but the feature gate was disabled, these flags will be ignored" ,
"featureGate" , features . KubeletCredentialProviders )
2020-12-01 01:06:26 +00:00
}
if utilfeature . DefaultFeatureGate . Enabled ( features . KubeletCredentialProviders ) && ( imageCredentialProviderConfigFile != "" || imageCredentialProviderBinDir != "" ) {
if err := plugin . RegisterCredentialProviderPlugins ( imageCredentialProviderConfigFile , imageCredentialProviderBinDir ) ; err != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "Failed to register CRI auth plugins" )
os . Exit ( 1 )
2020-12-01 01:06:26 +00:00
}
}
kubeRuntimeManager . keyring = credentialprovider . NewDockerKeyring ( )
2019-01-12 04:58:27 +00:00
kubeRuntimeManager . imagePuller = images . NewImageManager (
kubecontainer . FilterEventRecorder ( recorder ) ,
kubeRuntimeManager ,
imageBackOff ,
serializeImagePulls ,
imagePullQPS ,
imagePullBurst )
kubeRuntimeManager . runner = lifecycle . NewHandlerRunner ( httpClient , kubeRuntimeManager , kubeRuntimeManager )
kubeRuntimeManager . containerGC = newContainerGC ( runtimeService , podStateProvider , kubeRuntimeManager )
2021-03-18 22:40:29 +00:00
kubeRuntimeManager . podStateProvider = podStateProvider
2019-01-12 04:58:27 +00:00
kubeRuntimeManager . versionCache = cache . NewObjectCache (
func ( ) ( interface { } , error ) {
return kubeRuntimeManager . getTypedVersion ( )
} ,
versionCacheTTL ,
)
return kubeRuntimeManager , nil
}
// Type returns the type of the container runtime.
func ( m * kubeGenericRuntimeManager ) Type ( ) string {
return m . runtimeName
}
2019-12-12 01:27:03 +00:00
// SupportsSingleFileMapping returns whether the container runtime supports single file mappings or not.
// It is supported on Windows only if the container runtime is containerd.
func ( m * kubeGenericRuntimeManager ) SupportsSingleFileMapping ( ) bool {
switch goruntime . GOOS {
case "windows" :
return m . Type ( ) != types . DockerContainerRuntime
default :
return true
}
}
2019-01-12 04:58:27 +00:00
func newRuntimeVersion ( version string ) ( * utilversion . Version , error ) {
if ver , err := utilversion . ParseSemantic ( version ) ; err == nil {
return ver , err
}
return utilversion . ParseGeneric ( version )
}
func ( m * kubeGenericRuntimeManager ) getTypedVersion ( ) ( * runtimeapi . VersionResponse , error ) {
typedVersion , err := m . runtimeService . Version ( kubeRuntimeAPIVersion )
if err != nil {
2020-03-26 21:07:15 +00:00
return nil , fmt . Errorf ( "get remote runtime typed version failed: %v" , err )
2019-01-12 04:58:27 +00:00
}
return typedVersion , nil
}
// Version returns the version information of the container runtime.
func ( m * kubeGenericRuntimeManager ) Version ( ) ( kubecontainer . Version , error ) {
2020-03-26 21:07:15 +00:00
typedVersion , err := m . getTypedVersion ( )
2019-01-12 04:58:27 +00:00
if err != nil {
return nil , err
}
return newRuntimeVersion ( typedVersion . RuntimeVersion )
}
// APIVersion returns the cached API version information of the container
// runtime. Implementation is expected to update this cache periodically.
// This may be different from the runtime engine's version.
func ( m * kubeGenericRuntimeManager ) APIVersion ( ) ( kubecontainer . Version , error ) {
versionObject , err := m . versionCache . Get ( m . machineInfo . MachineID )
if err != nil {
return nil , err
}
typedVersion := versionObject . ( * runtimeapi . VersionResponse )
return newRuntimeVersion ( typedVersion . RuntimeApiVersion )
}
// Status returns the status of the runtime. An error is returned if the Status
// function itself fails, nil otherwise.
func ( m * kubeGenericRuntimeManager ) Status ( ) ( * kubecontainer . RuntimeStatus , error ) {
status , err := m . runtimeService . Status ( )
if err != nil {
return nil , err
}
return toKubeRuntimeStatus ( status ) , nil
}
// GetPods returns a list of containers grouped by pods. The boolean parameter
// specifies whether the runtime returns all containers including those already
// exited and dead containers (used for garbage collection).
func ( m * kubeGenericRuntimeManager ) GetPods ( all bool ) ( [ ] * kubecontainer . Pod , error ) {
pods := make ( map [ kubetypes . UID ] * kubecontainer . Pod )
sandboxes , err := m . getKubeletSandboxes ( all )
if err != nil {
return nil , err
}
for i := range sandboxes {
s := sandboxes [ i ]
if s . Metadata == nil {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Sandbox does not have metadata" , "sandbox" , s )
2019-01-12 04:58:27 +00:00
continue
}
podUID := kubetypes . UID ( s . Metadata . Uid )
if _ , ok := pods [ podUID ] ; ! ok {
pods [ podUID ] = & kubecontainer . Pod {
ID : podUID ,
Name : s . Metadata . Name ,
Namespace : s . Metadata . Namespace ,
}
}
p := pods [ podUID ]
converted , err := m . sandboxToKubeContainer ( s )
if err != nil {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Convert sandbox of pod failed" , "runtimeName" , m . runtimeName , "sandbox" , s , "podUID" , podUID , "err" , err )
2019-01-12 04:58:27 +00:00
continue
}
p . Sandboxes = append ( p . Sandboxes , converted )
}
containers , err := m . getKubeletContainers ( all )
if err != nil {
return nil , err
}
for i := range containers {
c := containers [ i ]
if c . Metadata == nil {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Container does not have metadata" , "container" , c )
2019-01-12 04:58:27 +00:00
continue
}
labelledInfo := getContainerInfoFromLabels ( c . Labels )
pod , found := pods [ labelledInfo . PodUID ]
if ! found {
pod = & kubecontainer . Pod {
ID : labelledInfo . PodUID ,
Name : labelledInfo . PodName ,
Namespace : labelledInfo . PodNamespace ,
}
pods [ labelledInfo . PodUID ] = pod
}
converted , err := m . toKubeContainer ( c )
if err != nil {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Convert container of pod failed" , "runtimeName" , m . runtimeName , "container" , c , "podUID" , labelledInfo . PodUID , "err" , err )
2019-01-12 04:58:27 +00:00
continue
}
pod . Containers = append ( pod . Containers , converted )
}
// Convert map to list.
var result [ ] * kubecontainer . Pod
for _ , pod := range pods {
result = append ( result , pod )
}
return result , nil
}
2021-03-18 22:40:29 +00:00
// containerKillReason explains what killed a given container
type containerKillReason string
const (
reasonStartupProbe containerKillReason = "StartupProbe"
reasonLivenessProbe containerKillReason = "LivenessProbe"
reasonFailedPostStartHook containerKillReason = "FailedPostStartHook"
reasonUnknown containerKillReason = "Unknown"
)
2019-01-12 04:58:27 +00:00
// containerToKillInfo contains necessary information to kill a container.
type containerToKillInfo struct {
// The spec of the container.
container * v1 . Container
// The name of the container.
name string
// The message indicates why the container will be killed.
message string
2021-03-18 22:40:29 +00:00
// The reason is a clearer source of info on why a container will be killed
// TODO: replace message with reason?
reason containerKillReason
2019-01-12 04:58:27 +00:00
}
// podActions keeps information what to do for a pod.
type podActions struct {
2019-09-27 21:51:53 +00:00
// Stop all running (regular, init and ephemeral) containers and the sandbox for the pod.
2019-01-12 04:58:27 +00:00
KillPod bool
2019-08-30 18:33:25 +00:00
// Whether need to create a new sandbox. If needed to kill pod and create
2019-01-12 04:58:27 +00:00
// a new pod sandbox, all init containers need to be purged (i.e., removed).
CreateSandbox bool
// The id of existing sandbox. It is used for starting containers in ContainersToStart.
SandboxID string
// The attempt number of creating sandboxes for the pod.
Attempt uint32
// The next init container to start.
NextInitContainerToStart * v1 . Container
// ContainersToStart keeps a list of indexes for the containers to start,
// where the index is the index of the specific container in the pod spec (
// pod.Spec.Containers.
ContainersToStart [ ] int
// ContainersToKill keeps a map of containers that need to be killed, note that
// the key is the container ID of the container, while
// the value contains necessary information to kill a container.
ContainersToKill map [ kubecontainer . ContainerID ] containerToKillInfo
2019-09-27 21:51:53 +00:00
// EphemeralContainersToStart is a list of indexes for the ephemeral containers to start,
// where the index is the index of the specific container in pod.Spec.EphemeralContainers.
EphemeralContainersToStart [ ] int
2019-01-12 04:58:27 +00:00
}
// podSandboxChanged checks whether the spec of the pod is changed and returns
// (changed, new attempt, original sandboxID if exist).
func ( m * kubeGenericRuntimeManager ) podSandboxChanged ( pod * v1 . Pod , podStatus * kubecontainer . PodStatus ) ( bool , uint32 , string ) {
if len ( podStatus . SandboxStatuses ) == 0 {
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . InfoS ( "No sandbox for pod can be found. Need to start a new one" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return true , 0 , ""
}
readySandboxCount := 0
for _ , s := range podStatus . SandboxStatuses {
if s . State == runtimeapi . PodSandboxState_SANDBOX_READY {
readySandboxCount ++
}
}
// Needs to create a new sandbox when readySandboxCount > 1 or the ready sandbox is not the latest one.
sandboxStatus := podStatus . SandboxStatuses [ 0 ]
if readySandboxCount > 1 {
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . InfoS ( "Multiple sandboxes are ready for Pod. Need to reconcile them" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return true , sandboxStatus . Metadata . Attempt + 1 , sandboxStatus . Id
}
if sandboxStatus . State != runtimeapi . PodSandboxState_SANDBOX_READY {
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . InfoS ( "No ready sandbox for pod can be found. Need to start a new one" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return true , sandboxStatus . Metadata . Attempt + 1 , sandboxStatus . Id
}
// Needs to create a new sandbox when network namespace changed.
if sandboxStatus . GetLinux ( ) . GetNamespaces ( ) . GetOptions ( ) . GetNetwork ( ) != networkNamespaceForPod ( pod ) {
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . InfoS ( "Sandbox for pod has changed. Need to start a new one" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return true , sandboxStatus . Metadata . Attempt + 1 , ""
}
// Needs to create a new sandbox when the sandbox does not have an IP address.
if ! kubecontainer . IsHostNetworkPod ( pod ) && sandboxStatus . Network . Ip == "" {
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . InfoS ( "Sandbox for pod has no IP address. Need to start a new one" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return true , sandboxStatus . Metadata . Attempt + 1 , sandboxStatus . Id
}
return false , sandboxStatus . Metadata . Attempt , sandboxStatus . Id
}
2020-08-10 17:43:49 +00:00
func containerChanged ( container * v1 . Container , containerStatus * kubecontainer . Status ) ( uint64 , uint64 , bool ) {
2019-01-12 04:58:27 +00:00
expectedHash := kubecontainer . HashContainer ( container )
return expectedHash , containerStatus . Hash , containerStatus . Hash != expectedHash
}
func shouldRestartOnFailure ( pod * v1 . Pod ) bool {
return pod . Spec . RestartPolicy != v1 . RestartPolicyNever
}
func containerSucceeded ( c * v1 . Container , podStatus * kubecontainer . PodStatus ) bool {
cStatus := podStatus . FindContainerStatusByName ( c . Name )
if cStatus == nil || cStatus . State == kubecontainer . ContainerStateRunning {
return false
}
return cStatus . ExitCode == 0
}
// computePodActions checks whether the pod spec has changed and returns the changes if true.
func ( m * kubeGenericRuntimeManager ) computePodActions ( pod * v1 . Pod , podStatus * kubecontainer . PodStatus ) podActions {
2021-03-18 22:40:29 +00:00
klog . V ( 5 ) . InfoS ( "Syncing Pod" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
createPodSandbox , attempt , sandboxID := m . podSandboxChanged ( pod , podStatus )
changes := podActions {
KillPod : createPodSandbox ,
CreateSandbox : createPodSandbox ,
SandboxID : sandboxID ,
Attempt : attempt ,
ContainersToStart : [ ] int { } ,
ContainersToKill : make ( map [ kubecontainer . ContainerID ] containerToKillInfo ) ,
}
// If we need to (re-)create the pod sandbox, everything will need to be
// killed and recreated, and init containers should be purged.
if createPodSandbox {
2019-07-14 07:58:54 +00:00
if ! shouldRestartOnFailure ( pod ) && attempt != 0 && len ( podStatus . ContainerStatuses ) != 0 {
2019-01-12 04:58:27 +00:00
// Should not restart the pod, just return.
// we should not create a sandbox for a pod if it is already done.
// if all containers are done and should not be started, there is no need to create a new sandbox.
// this stops confusing logs on pods whose containers all have exit codes, but we recreate a sandbox before terminating it.
2019-07-14 07:58:54 +00:00
//
// If ContainerStatuses is empty, we assume that we've never
// successfully created any containers. In this case, we should
// retry creating the sandbox.
2019-01-12 04:58:27 +00:00
changes . CreateSandbox = false
return changes
}
2020-11-14 08:06:46 +00:00
// Get the containers to start, excluding the ones that succeeded if RestartPolicy is OnFailure.
var containersToStart [ ] int
for idx , c := range pod . Spec . Containers {
if pod . Spec . RestartPolicy == v1 . RestartPolicyOnFailure && containerSucceeded ( & c , podStatus ) {
continue
}
containersToStart = append ( containersToStart , idx )
}
// We should not create a sandbox for a Pod if initialization is done and there is no container to start.
if len ( containersToStart ) == 0 {
_ , _ , done := findNextInitContainerToRun ( pod , podStatus )
if done {
changes . CreateSandbox = false
return changes
}
}
2019-01-12 04:58:27 +00:00
if len ( pod . Spec . InitContainers ) != 0 {
// Pod has init containers, return the first one.
changes . NextInitContainerToStart = & pod . Spec . InitContainers [ 0 ]
return changes
}
2020-11-14 08:06:46 +00:00
changes . ContainersToStart = containersToStart
2019-01-12 04:58:27 +00:00
return changes
}
2019-09-27 21:51:53 +00:00
// Ephemeral containers may be started even if initialization is not yet complete.
if utilfeature . DefaultFeatureGate . Enabled ( features . EphemeralContainers ) {
for i := range pod . Spec . EphemeralContainers {
c := ( * v1 . Container ) ( & pod . Spec . EphemeralContainers [ i ] . EphemeralContainerCommon )
// Ephemeral Containers are never restarted
if podStatus . FindContainerStatusByName ( c . Name ) == nil {
changes . EphemeralContainersToStart = append ( changes . EphemeralContainersToStart , i )
}
}
}
2019-01-12 04:58:27 +00:00
// Check initialization progress.
initLastStatus , next , done := findNextInitContainerToRun ( pod , podStatus )
if ! done {
2019-04-07 17:07:55 +00:00
if next != nil {
2019-02-08 04:04:22 +00:00
initFailed := initLastStatus != nil && isInitContainerFailed ( initLastStatus )
2019-01-12 04:58:27 +00:00
if initFailed && ! shouldRestartOnFailure ( pod ) {
changes . KillPod = true
} else {
2019-02-08 04:04:22 +00:00
// Always try to stop containers in unknown state first.
if initLastStatus != nil && initLastStatus . State == kubecontainer . ContainerStateUnknown {
changes . ContainersToKill [ initLastStatus . ID ] = containerToKillInfo {
2019-04-07 17:07:55 +00:00
name : next . Name ,
container : next ,
2019-02-08 04:04:22 +00:00
message : fmt . Sprintf ( "Init container is in %q state, try killing it before restart" ,
initLastStatus . State ) ,
2021-03-18 22:40:29 +00:00
reason : reasonUnknown ,
2019-02-08 04:04:22 +00:00
}
}
2019-04-07 17:07:55 +00:00
changes . NextInitContainerToStart = next
2019-01-12 04:58:27 +00:00
}
}
// Initialization failed or still in progress. Skip inspecting non-init
// containers.
return changes
}
// Number of running containers to keep.
keepCount := 0
// check the status of containers.
for idx , container := range pod . Spec . Containers {
containerStatus := podStatus . FindContainerStatusByName ( container . Name )
// Call internal container post-stop lifecycle hook for any non-running container so that any
// allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
// to it.
if containerStatus != nil && containerStatus . State != kubecontainer . ContainerStateRunning {
if err := m . internalLifecycle . PostStopContainer ( containerStatus . ID . ID ) ; err != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "Internal container post-stop lifecycle hook failed for container in pod with error" ,
"containerName" , container . Name , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
}
// If container does not exist, or is not running, check whether we
// need to restart it.
if containerStatus == nil || containerStatus . State != kubecontainer . ContainerStateRunning {
if kubecontainer . ShouldContainerBeRestarted ( & container , pod , podStatus ) {
2021-03-18 22:40:29 +00:00
klog . V ( 3 ) . InfoS ( "Container of pod is not in the desired state and shall be started" , "containerName" , container . Name , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
changes . ContainersToStart = append ( changes . ContainersToStart , idx )
2019-02-08 04:04:22 +00:00
if containerStatus != nil && containerStatus . State == kubecontainer . ContainerStateUnknown {
// If container is in unknown state, we don't know whether it
// is actually running or not, always try killing it before
// restart to avoid having 2 running instances of the same container.
changes . ContainersToKill [ containerStatus . ID ] = containerToKillInfo {
name : containerStatus . Name ,
container : & pod . Spec . Containers [ idx ] ,
message : fmt . Sprintf ( "Container is in %q state, try killing it before restart" ,
containerStatus . State ) ,
2021-03-18 22:40:29 +00:00
reason : reasonUnknown ,
2019-02-08 04:04:22 +00:00
}
}
2019-01-12 04:58:27 +00:00
}
continue
}
// The container is running, but kill the container if any of the following condition is met.
2019-04-07 17:07:55 +00:00
var message string
2021-03-18 22:40:29 +00:00
var reason containerKillReason
2019-01-12 04:58:27 +00:00
restart := shouldRestartOnFailure ( pod )
2019-04-07 17:07:55 +00:00
if _ , _ , changed := containerChanged ( & container , containerStatus ) ; changed {
message = fmt . Sprintf ( "Container %s definition changed" , container . Name )
2019-01-12 04:58:27 +00:00
// Restart regardless of the restart policy because the container
// spec changed.
restart = true
} else if liveness , found := m . livenessManager . Get ( containerStatus . ID ) ; found && liveness == proberesults . Failure {
// If the container failed the liveness probe, we should kill it.
2019-04-07 17:07:55 +00:00
message = fmt . Sprintf ( "Container %s failed liveness probe" , container . Name )
2021-03-18 22:40:29 +00:00
reason = reasonLivenessProbe
2019-12-12 01:27:03 +00:00
} else if startup , found := m . startupManager . Get ( containerStatus . ID ) ; found && startup == proberesults . Failure {
// If the container failed the startup probe, we should kill it.
message = fmt . Sprintf ( "Container %s failed startup probe" , container . Name )
2021-03-18 22:40:29 +00:00
reason = reasonStartupProbe
2019-01-12 04:58:27 +00:00
} else {
// Keep the container.
keepCount ++
continue
}
// We need to kill the container, but if we also want to restart the
// container afterwards, make the intent clear in the message. Also do
// not kill the entire pod since we expect container to be running eventually.
if restart {
2019-04-07 17:07:55 +00:00
message = fmt . Sprintf ( "%s, will be restarted" , message )
2019-01-12 04:58:27 +00:00
changes . ContainersToStart = append ( changes . ContainersToStart , idx )
}
changes . ContainersToKill [ containerStatus . ID ] = containerToKillInfo {
name : containerStatus . Name ,
container : & pod . Spec . Containers [ idx ] ,
message : message ,
2021-03-18 22:40:29 +00:00
reason : reason ,
2019-01-12 04:58:27 +00:00
}
2021-03-18 22:40:29 +00:00
klog . V ( 2 ) . InfoS ( "Message for Container of pod" , "containerName" , container . Name , "containerStatusID" , containerStatus . ID , "pod" , klog . KObj ( pod ) , "containerMessage" , message )
2019-01-12 04:58:27 +00:00
}
if keepCount == 0 && len ( changes . ContainersToStart ) == 0 {
changes . KillPod = true
}
return changes
}
// SyncPod syncs the running pod into the desired pod by executing following steps:
//
// 1. Compute sandbox and container changes.
// 2. Kill pod sandbox if necessary.
// 3. Kill any containers that should not be running.
// 4. Create sandbox if necessary.
2019-09-27 21:51:53 +00:00
// 5. Create ephemeral containers.
// 6. Create init containers.
// 7. Create normal containers.
2019-04-07 17:07:55 +00:00
func ( m * kubeGenericRuntimeManager ) SyncPod ( pod * v1 . Pod , podStatus * kubecontainer . PodStatus , pullSecrets [ ] v1 . Secret , backOff * flowcontrol . Backoff ) ( result kubecontainer . PodSyncResult ) {
2019-01-12 04:58:27 +00:00
// Step 1: Compute sandbox and container changes.
podContainerChanges := m . computePodActions ( pod , podStatus )
2021-03-18 22:40:29 +00:00
klog . V ( 3 ) . InfoS ( "computePodActions got for pod" , "podActions" , podContainerChanges , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
if podContainerChanges . CreateSandbox {
ref , err := ref . GetReference ( legacyscheme . Scheme , pod )
if err != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "Couldn't make a ref to pod" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
if podContainerChanges . SandboxID != "" {
m . recorder . Eventf ( ref , v1 . EventTypeNormal , events . SandboxChanged , "Pod sandbox changed, it will be killed and re-created." )
} else {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "SyncPod received new pod, will create a sandbox for it" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
}
// Step 2: Kill the pod if the sandbox has changed.
if podContainerChanges . KillPod {
2019-04-07 17:07:55 +00:00
if podContainerChanges . CreateSandbox {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Stopping PodSandbox for pod, will start new one" , "pod" , klog . KObj ( pod ) )
2019-04-07 17:07:55 +00:00
} else {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Stopping PodSandbox for pod, because all other containers are dead" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
killResult := m . killPodWithSyncResult ( pod , kubecontainer . ConvertPodStatusToRunningPod ( m . runtimeName , podStatus ) , nil )
result . AddPodSyncResult ( killResult )
if killResult . Error ( ) != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( killResult . Error ( ) , "killPodWithSyncResult failed" )
2019-01-12 04:58:27 +00:00
return
}
if podContainerChanges . CreateSandbox {
m . purgeInitContainers ( pod , podStatus )
}
} else {
// Step 3: kill any running containers in this pod which are not to keep.
for containerID , containerInfo := range podContainerChanges . ContainersToKill {
2021-03-18 22:40:29 +00:00
klog . V ( 3 ) . InfoS ( "Killing unwanted container for pod" , "containerName" , containerInfo . name , "containerID" , containerID , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
killContainerResult := kubecontainer . NewSyncResult ( kubecontainer . KillContainer , containerInfo . name )
result . AddSyncResult ( killContainerResult )
2021-03-18 22:40:29 +00:00
if err := m . killContainer ( pod , containerID , containerInfo . name , containerInfo . message , containerInfo . reason , nil ) ; err != nil {
2019-01-12 04:58:27 +00:00
killContainerResult . Fail ( kubecontainer . ErrKillContainer , err . Error ( ) )
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "killContainer for pod failed" , "containerName" , containerInfo . name , "containerID" , containerID , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return
}
}
}
// Keep terminated init containers fairly aggressively controlled
// This is an optimization because container removals are typically handled
// by container garbage collector.
m . pruneInitContainersBeforeStart ( pod , podStatus )
2019-12-12 01:27:03 +00:00
// We pass the value of the PRIMARY podIP and list of podIPs down to
// generatePodSandboxConfig and generateContainerConfig, which in turn
// passes it to various other functions, in order to facilitate functionality
// that requires this value (hosts file and downward API) and avoid races determining
2019-01-12 04:58:27 +00:00
// the pod IP in cases where a container requires restart but the
2019-12-12 01:27:03 +00:00
// podIP isn't in the status manager yet. The list of podIPs is used to
// generate the hosts file.
2019-01-12 04:58:27 +00:00
//
2019-09-27 21:51:53 +00:00
// We default to the IPs in the passed-in pod status, and overwrite them if the
2019-01-12 04:58:27 +00:00
// sandbox needs to be (re)started.
2019-09-27 21:51:53 +00:00
var podIPs [ ] string
2019-01-12 04:58:27 +00:00
if podStatus != nil {
2019-09-27 21:51:53 +00:00
podIPs = podStatus . IPs
2019-01-12 04:58:27 +00:00
}
// Step 4: Create a sandbox for the pod if necessary.
podSandboxID := podContainerChanges . SandboxID
if podContainerChanges . CreateSandbox {
var msg string
var err error
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Creating PodSandbox for pod" , "pod" , klog . KObj ( pod ) )
2021-07-02 08:43:15 +00:00
metrics . StartedPodsTotal . Inc ( )
2019-01-12 04:58:27 +00:00
createSandboxResult := kubecontainer . NewSyncResult ( kubecontainer . CreatePodSandbox , format . Pod ( pod ) )
result . AddSyncResult ( createSandboxResult )
podSandboxID , msg , err = m . createPodSandbox ( pod , podContainerChanges . Attempt )
if err != nil {
2021-03-18 22:40:29 +00:00
// createPodSandbox can return an error from CNI, CSI,
// or CRI if the Pod has been deleted while the POD is
// being created. If the pod has been deleted then it's
// not a real error.
2021-10-29 15:14:24 +00:00
//
// SyncPod can still be running when we get here, which
// means the PodWorker has not acked the deletion.
if m . podStateProvider . IsPodTerminationRequested ( pod . UID ) {
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Pod was deleted and sandbox failed to be created" , "pod" , klog . KObj ( pod ) , "podUID" , pod . UID )
return
}
2021-10-29 15:14:24 +00:00
metrics . StartedPodsErrorsTotal . Inc ( )
2019-01-12 04:58:27 +00:00
createSandboxResult . Fail ( kubecontainer . ErrCreatePodSandbox , msg )
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "CreatePodSandbox for pod failed" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
ref , referr := ref . GetReference ( legacyscheme . Scheme , pod )
if referr != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( referr , "Couldn't make a ref to pod" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
2019-12-12 01:27:03 +00:00
m . recorder . Eventf ( ref , v1 . EventTypeWarning , events . FailedCreatePodSandBox , "Failed to create pod sandbox: %v" , err )
2019-01-12 04:58:27 +00:00
return
}
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Created PodSandbox for pod" , "podSandboxID" , podSandboxID , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
podSandboxStatus , err := m . runtimeService . PodSandboxStatus ( podSandboxID )
if err != nil {
ref , referr := ref . GetReference ( legacyscheme . Scheme , pod )
if referr != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( referr , "Couldn't make a ref to pod" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
m . recorder . Eventf ( ref , v1 . EventTypeWarning , events . FailedStatusPodSandBox , "Unable to get pod sandbox status: %v" , err )
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "Failed to get pod sandbox status; Skipping pod" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
result . Fail ( err )
return
}
// If we ever allow updating a pod from non-host-network to
// host-network, we may use a stale IP.
if ! kubecontainer . IsHostNetworkPod ( pod ) {
2019-09-27 21:51:53 +00:00
// Overwrite the podIPs passed in the pod status, since we just started the pod sandbox.
podIPs = m . determinePodSandboxIPs ( pod . Namespace , pod . Name , podSandboxStatus )
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Determined the ip for pod after sandbox changed" , "IPs" , podIPs , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
}
}
2019-09-27 21:51:53 +00:00
// the start containers routines depend on pod ip(as in primary pod ip)
// instead of trying to figure out if we have 0 < len(podIPs)
// everytime, we short circuit it here
podIP := ""
if len ( podIPs ) != 0 {
podIP = podIPs [ 0 ]
}
2019-01-12 04:58:27 +00:00
// Get podSandboxConfig for containers to start.
configPodSandboxResult := kubecontainer . NewSyncResult ( kubecontainer . ConfigPodSandbox , podSandboxID )
result . AddSyncResult ( configPodSandboxResult )
podSandboxConfig , err := m . generatePodSandboxConfig ( pod , podContainerChanges . Attempt )
if err != nil {
message := fmt . Sprintf ( "GeneratePodSandboxConfig for pod %q failed: %v" , format . Pod ( pod ) , err )
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "GeneratePodSandboxConfig for pod failed" , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
configPodSandboxResult . Fail ( kubecontainer . ErrConfigPodSandbox , message )
return
}
2019-09-27 21:51:53 +00:00
// Helper containing boilerplate common to starting all types of containers.
2021-07-02 08:43:15 +00:00
// typeName is a description used to describe this type of container in log messages,
2019-09-27 21:51:53 +00:00
// currently: "container", "init container" or "ephemeral container"
2021-07-02 08:43:15 +00:00
// metricLabel is the label used to describe this type of container in monitoring metrics.
// currently: "container", "init_container" or "ephemeral_container"
start := func ( typeName , metricLabel string , spec * startSpec ) error {
2020-03-26 21:07:15 +00:00
startContainerResult := kubecontainer . NewSyncResult ( kubecontainer . StartContainer , spec . container . Name )
2019-01-12 04:58:27 +00:00
result . AddSyncResult ( startContainerResult )
2020-03-26 21:07:15 +00:00
isInBackOff , msg , err := m . doBackOff ( pod , spec . container , podStatus , backOff )
2019-01-12 04:58:27 +00:00
if isInBackOff {
startContainerResult . Fail ( err , msg )
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Backing Off restarting container in pod" , "containerType" , typeName , "container" , spec . container , "pod" , klog . KObj ( pod ) )
2019-09-27 21:51:53 +00:00
return err
2019-01-12 04:58:27 +00:00
}
2021-07-02 08:43:15 +00:00
metrics . StartedContainersTotal . WithLabelValues ( metricLabel ) . Inc ( )
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Creating container in pod" , "containerType" , typeName , "container" , spec . container , "pod" , klog . KObj ( pod ) )
2019-12-12 01:27:03 +00:00
// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
2020-03-26 21:07:15 +00:00
if msg , err := m . startContainer ( podSandboxID , podSandboxConfig , spec , pod , podStatus , pullSecrets , podIP , podIPs ) ; err != nil {
2021-07-02 08:43:15 +00:00
// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
// useful to cluster administrators to distinguish "server errors" from "user errors".
metrics . StartedContainersErrorsTotal . WithLabelValues ( metricLabel , err . Error ( ) ) . Inc ( )
2019-01-12 04:58:27 +00:00
startContainerResult . Fail ( err , msg )
// known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam
switch {
case err == images . ErrImagePullBackOff :
2021-03-18 22:40:29 +00:00
klog . V ( 3 ) . InfoS ( "Container start failed in pod" , "containerType" , typeName , "container" , spec . container , "pod" , klog . KObj ( pod ) , "containerMessage" , msg , "err" , err )
2019-01-12 04:58:27 +00:00
default :
2020-08-10 17:43:49 +00:00
utilruntime . HandleError ( fmt . Errorf ( "%v %+v start failed in pod %v: %v: %s" , typeName , spec . container , format . Pod ( pod ) , err , msg ) )
2019-01-12 04:58:27 +00:00
}
2019-09-27 21:51:53 +00:00
return err
}
return nil
}
// Step 5: start ephemeral containers
// These are started "prior" to init containers to allow running ephemeral containers even when there
// are errors starting an init container. In practice init containers will start first since ephemeral
// containers cannot be specified on pod creation.
if utilfeature . DefaultFeatureGate . Enabled ( features . EphemeralContainers ) {
for _ , idx := range podContainerChanges . EphemeralContainersToStart {
2021-07-02 08:43:15 +00:00
start ( "ephemeral container" , metrics . EphemeralContainer , ephemeralContainerStartSpec ( & pod . Spec . EphemeralContainers [ idx ] ) )
2019-09-27 21:51:53 +00:00
}
}
// Step 6: start the init container.
if container := podContainerChanges . NextInitContainerToStart ; container != nil {
// Start the next init container.
2021-07-02 08:43:15 +00:00
if err := start ( "init container" , metrics . InitContainer , containerStartSpec ( container ) ) ; err != nil {
2019-09-27 21:51:53 +00:00
return
2019-01-12 04:58:27 +00:00
}
2019-09-27 21:51:53 +00:00
// Successfully started the container; clear the entry in the failure
2021-03-18 22:40:29 +00:00
klog . V ( 4 ) . InfoS ( "Completed init container for pod" , "containerName" , container . Name , "pod" , klog . KObj ( pod ) )
2019-09-27 21:51:53 +00:00
}
// Step 7: start containers in podContainerChanges.ContainersToStart.
for _ , idx := range podContainerChanges . ContainersToStart {
2021-07-02 08:43:15 +00:00
start ( "container" , metrics . Container , containerStartSpec ( & pod . Spec . Containers [ idx ] ) )
2019-01-12 04:58:27 +00:00
}
return
}
// If a container is still in backoff, the function will return a brief backoff error and
// a detailed error message.
func ( m * kubeGenericRuntimeManager ) doBackOff ( pod * v1 . Pod , container * v1 . Container , podStatus * kubecontainer . PodStatus , backOff * flowcontrol . Backoff ) ( bool , string , error ) {
2020-08-10 17:43:49 +00:00
var cStatus * kubecontainer . Status
2019-01-12 04:58:27 +00:00
for _ , c := range podStatus . ContainerStatuses {
if c . Name == container . Name && c . State == kubecontainer . ContainerStateExited {
cStatus = c
break
}
}
if cStatus == nil {
return false , "" , nil
}
2021-03-18 22:40:29 +00:00
klog . V ( 3 ) . InfoS ( "Checking backoff for container in pod" , "containerName" , container . Name , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
ts := cStatus . FinishedAt
// backOff requires a unique key to identify the container.
key := getStableKey ( pod , container )
if backOff . IsInBackOffSince ( key , ts ) {
2021-03-18 22:40:29 +00:00
if containerRef , err := kubecontainer . GenerateContainerRef ( pod , container ) ; err == nil {
m . recorder . Eventf ( containerRef , v1 . EventTypeWarning , events . BackOffStartContainer , "Back-off restarting failed container" )
2019-01-12 04:58:27 +00:00
}
2019-09-27 21:51:53 +00:00
err := fmt . Errorf ( "back-off %s restarting failed container=%s pod=%s" , backOff . Get ( key ) , container . Name , format . Pod ( pod ) )
2021-03-18 22:40:29 +00:00
klog . V ( 3 ) . InfoS ( "Back-off restarting failed container" , "err" , err . Error ( ) )
2019-01-12 04:58:27 +00:00
return true , err . Error ( ) , kubecontainer . ErrCrashLoopBackOff
}
backOff . Next ( key , ts )
return false , "" , nil
}
// KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
// gracePeriodOverride if specified allows the caller to override the pod default grace period.
// only hard kill paths are allowed to specify a gracePeriodOverride in the kubelet in order to not corrupt user data.
// it is useful when doing SIGKILL for hard eviction scenarios, or max grace period during soft eviction scenarios.
func ( m * kubeGenericRuntimeManager ) KillPod ( pod * v1 . Pod , runningPod kubecontainer . Pod , gracePeriodOverride * int64 ) error {
err := m . killPodWithSyncResult ( pod , runningPod , gracePeriodOverride )
return err . Error ( )
}
// killPodWithSyncResult kills a runningPod and returns SyncResult.
// Note: The pod passed in could be *nil* when kubelet restarted.
func ( m * kubeGenericRuntimeManager ) killPodWithSyncResult ( pod * v1 . Pod , runningPod kubecontainer . Pod , gracePeriodOverride * int64 ) ( result kubecontainer . PodSyncResult ) {
killContainerResults := m . killContainersWithSyncResult ( pod , runningPod , gracePeriodOverride )
for _ , containerResult := range killContainerResults {
result . AddSyncResult ( containerResult )
}
// stop sandbox, the sandbox will be removed in GarbageCollect
killSandboxResult := kubecontainer . NewSyncResult ( kubecontainer . KillPodSandbox , runningPod . ID )
result . AddSyncResult ( killSandboxResult )
// Stop all sandboxes belongs to same pod
for _ , podSandbox := range runningPod . Sandboxes {
if err := m . runtimeService . StopPodSandbox ( podSandbox . ID . ID ) ; err != nil {
killSandboxResult . Fail ( kubecontainer . ErrKillPodSandbox , err . Error ( ) )
2021-03-18 22:40:29 +00:00
klog . ErrorS ( nil , "Failed to stop sandbox" , "podSandboxID" , podSandbox . ID )
2019-01-12 04:58:27 +00:00
}
}
return
}
// GetPodStatus retrieves the status of the pod, including the
// information of all containers in the pod that are visible in Runtime.
func ( m * kubeGenericRuntimeManager ) GetPodStatus ( uid kubetypes . UID , name , namespace string ) ( * kubecontainer . PodStatus , error ) {
// Now we retain restart count of container as a container label. Each time a container
// restarts, pod will read the restart count from the registered dead container, increment
// it to get the new restart count, and then add a label with the new restart count on
// the newly started container.
// However, there are some limitations of this method:
// 1. When all dead containers were garbage collected, the container status could
// not get the historical value and would be *inaccurate*. Fortunately, the chance
// is really slim.
// 2. When working with old version containers which have no restart count label,
// we can only assume their restart count is 0.
// Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
// these limitations now.
// TODO: move this comment to SyncPod.
podSandboxIDs , err := m . getSandboxIDByPodUID ( uid , nil )
if err != nil {
return nil , err
}
2021-03-18 22:40:29 +00:00
pod := & v1 . Pod {
2019-01-12 04:58:27 +00:00
ObjectMeta : metav1 . ObjectMeta {
Name : name ,
Namespace : namespace ,
UID : uid ,
} ,
2021-03-18 22:40:29 +00:00
}
podFullName := format . Pod ( pod )
klog . V ( 4 ) . InfoS ( "getSandboxIDByPodUID got sandbox IDs for pod" , "podSandboxID" , podSandboxIDs , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
sandboxStatuses := make ( [ ] * runtimeapi . PodSandboxStatus , len ( podSandboxIDs ) )
2019-09-27 21:51:53 +00:00
podIPs := [ ] string { }
2019-01-12 04:58:27 +00:00
for idx , podSandboxID := range podSandboxIDs {
podSandboxStatus , err := m . runtimeService . PodSandboxStatus ( podSandboxID )
if err != nil {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "PodSandboxStatus of sandbox for pod" , "podSandboxID" , podSandboxID , "pod" , klog . KObj ( pod ) )
2019-01-12 04:58:27 +00:00
return nil , err
}
sandboxStatuses [ idx ] = podSandboxStatus
// Only get pod IP from latest sandbox
if idx == 0 && podSandboxStatus . State == runtimeapi . PodSandboxState_SANDBOX_READY {
2019-09-27 21:51:53 +00:00
podIPs = m . determinePodSandboxIPs ( namespace , name , podSandboxStatus )
2019-01-12 04:58:27 +00:00
}
}
// Get statuses of all containers visible in the pod.
containerStatuses , err := m . getPodContainerStatuses ( uid , name , namespace )
if err != nil {
2019-08-30 18:33:25 +00:00
if m . logReduction . ShouldMessageBePrinted ( err . Error ( ) , podFullName ) {
2021-03-18 22:40:29 +00:00
klog . ErrorS ( err , "getPodContainerStatuses for pod failed" , "pod" , klog . KObj ( pod ) )
2019-04-07 17:07:55 +00:00
}
2019-01-12 04:58:27 +00:00
return nil , err
}
2019-08-30 18:33:25 +00:00
m . logReduction . ClearID ( podFullName )
2019-01-12 04:58:27 +00:00
return & kubecontainer . PodStatus {
ID : uid ,
Name : name ,
Namespace : namespace ,
2019-09-27 21:51:53 +00:00
IPs : podIPs ,
2019-01-12 04:58:27 +00:00
SandboxStatuses : sandboxStatuses ,
ContainerStatuses : containerStatuses ,
} , nil
}
// GarbageCollect removes dead containers using the specified container gc policy.
2020-08-10 17:43:49 +00:00
func ( m * kubeGenericRuntimeManager ) GarbageCollect ( gcPolicy kubecontainer . GCPolicy , allSourcesReady bool , evictNonDeletedPods bool ) error {
2019-01-12 04:58:27 +00:00
return m . containerGC . GarbageCollect ( gcPolicy , allSourcesReady , evictNonDeletedPods )
}
// UpdatePodCIDR is just a passthrough method to update the runtimeConfig of the shim
// with the podCIDR supplied by the kubelet.
func ( m * kubeGenericRuntimeManager ) UpdatePodCIDR ( podCIDR string ) error {
// TODO(#35531): do we really want to write a method on this manager for each
// field of the config?
2021-03-18 22:40:29 +00:00
klog . InfoS ( "Updating runtime config through cri with podcidr" , "CIDR" , podCIDR )
2019-01-12 04:58:27 +00:00
return m . runtimeService . UpdateRuntimeConfig (
& runtimeapi . RuntimeConfig {
NetworkConfig : & runtimeapi . NetworkConfig {
PodCidr : podCIDR ,
} ,
} )
}