reclaim terminated pod volumes

pull/6/head
Seth Jennings 2016-11-21 10:48:50 -06:00
parent 4f8f6006cf
commit e2750a305a
13 changed files with 36 additions and 15 deletions

View File

@ -251,6 +251,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&s.ExperimentalKernelMemcgNotification, "experimental-kernel-memcg-notification", s.ExperimentalKernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.Int32Var(&s.PodsPerCore, "pods-per-core", s.PodsPerCore, "Number of Pods per core that can run on this Kubelet. The total number of Pods on this Kubelet cannot exceed max-pods, so max-pods will be used if this calculation results in a larger number of Pods allowed on the Kubelet. A value of 0 disables this limit.")
fs.BoolVar(&s.ProtectKernelDefaults, "protect-kernel-defaults", s.ProtectKernelDefaults, "Default kubelet behaviour for kernel tuning. If set, kubelet errors if any of kernel tunables is different than kubelet defaults.")
fs.BoolVar(&s.KeepTerminatedPodVolumes, "keep-terminated-pod-volumes", s.KeepTerminatedPodVolumes, "Keep terminated pod volumes mounted to the node after the pod terminates. Can be useful for debugging volume related issues.")
// CRI flags.
fs.BoolVar(&s.EnableCRI, "experimental-cri", s.EnableCRI, "[Experimental] Enable the Container Runtime Interface (CRI) integration. If --container-runtime is set to \"remote\", Kubelet will communicate with the runtime/image CRI server listening on the endpoint specified by --remote-runtime-endpoint/--remote-image-endpoint. If --container-runtime is set to \"docker\", Kubelet will launch a in-process CRI server on behalf of docker, and communicate over a default endpoint.")

View File

@ -536,6 +536,7 @@ function start_kubelet {
--experimental-cgroups-per-qos=${EXPERIMENTAL_CGROUPS_PER_QOS} \
--cgroup-driver=${CGROUP_DRIVER} \
--cgroup-root=${CGROUP_ROOT} \
--keep-terminated-pod-volumes=true \
${auth_args} \
${dns_args} \
${net_plugin_dir_args} \

View File

@ -308,6 +308,7 @@ junit-file-number
k8s-bin-dir
k8s-build-output
keep-gogoproto
keep-terminated-pod-volumes
km-path
kops-admin-access
kops-cluster

View File

@ -481,6 +481,9 @@ type KubeletConfiguration struct {
// (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled
// and fails the mount operation fails.
ExperimentalCheckNodeCapabilitiesBeforeMount bool
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
// This can be useful for debugging volume related issues.
KeepTerminatedPodVolumes bool
}
type KubeletAuthorizationMode string

View File

@ -521,6 +521,9 @@ type KubeletConfiguration struct {
// (binaries, etc.) to mount the volume are available on the underlying node. If the check is enabled
// and fails the mount operation fails.
ExperimentalCheckNodeCapabilitiesBeforeMount bool `json:"experimentalCheckNodeCapabilitiesBeforeMount,omitempty"`
// This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node.
// This can be useful for debugging volume related issues.
KeepTerminatedPodVolumes bool `json:"keepTerminatedPodVolumes,omitempty"`
}
type KubeletAuthorizationMode string

View File

@ -481,6 +481,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.EnableCRI = in.EnableCRI
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
return nil
}
@ -652,6 +653,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.EnableCRI = in.EnableCRI
out.ExperimentalFailSwapOn = in.ExperimentalFailSwapOn
out.ExperimentalCheckNodeCapabilitiesBeforeMount = in.ExperimentalCheckNodeCapabilitiesBeforeMount
out.KeepTerminatedPodVolumes = in.KeepTerminatedPodVolumes
return nil
}

View File

@ -10431,6 +10431,13 @@ var OpenAPIDefinitions *openapi.OpenAPIDefinitions = &openapi.OpenAPIDefinitions
Format: "",
},
},
"keepTerminatedPodVolumes": {
SchemaProps: spec.SchemaProps{
Description: "This flag, if set, instructs the kubelet to keep volumes from terminated pods mounted to the node. This can be useful for debugging volume related issues.",
Type: []string{"boolean"},
Format: "",
},
},
},
Required: []string{"podManifestPath", "syncFrequency", "fileCheckFrequency", "httpCheckFrequency", "manifestURL", "manifestURLHeader", "enableServer", "address", "port", "readOnlyPort", "tlsCertFile", "tlsPrivateKeyFile", "certDirectory", "authentication", "authorization", "hostnameOverride", "podInfraContainerImage", "dockerEndpoint", "rootDirectory", "seccompProfileRoot", "allowPrivileged", "hostNetworkSources", "hostPIDSources", "hostIPCSources", "registryPullQPS", "registryBurst", "eventRecordQPS", "eventBurst", "enableDebuggingHandlers", "minimumGCAge", "maxPerPodContainerCount", "maxContainerCount", "cAdvisorPort", "healthzPort", "healthzBindAddress", "oomScoreAdj", "registerNode", "clusterDomain", "masterServiceNamespace", "clusterDNS", "streamingConnectionIdleTimeout", "nodeStatusUpdateFrequency", "imageMinimumGCAge", "imageGCHighThresholdPercent", "imageGCLowThresholdPercent", "lowDiskSpaceThresholdMB", "volumeStatsAggPeriod", "networkPluginName", "networkPluginDir", "cniConfDir", "cniBinDir", "networkPluginMTU", "volumePluginDir", "cloudProvider", "cloudConfigFile", "kubeletCgroups", "runtimeCgroups", "systemCgroups", "cgroupRoot", "containerRuntime", "remoteRuntimeEndpoint", "remoteImageEndpoint", "runtimeRequestTimeout", "rktPath", "rktAPIEndpoint", "rktStage1Image", "lockFilePath", "exitOnLockContention", "hairpinMode", "babysitDaemons", "maxPods", "nvidiaGPUs", "dockerExecHandlerName", "podCIDR", "resolvConf", "cpuCFSQuota", "containerized", "maxOpenFiles", "registerSchedulable", "registerWithTaints", "contentType", "kubeAPIQPS", "kubeAPIBurst", "serializeImagePulls", "outOfDiskTransitionFrequency", "nodeIP", "nodeLabels", "nonMasqueradeCIDR", "enableCustomMetrics", "evictionHard", "evictionSoft", "evictionSoftGracePeriod", "evictionPressureTransitionPeriod", "evictionMaxPodGracePeriod", "evictionMinimumReclaim", "experimentalKernelMemcgNotification", "podsPerCore", "enableControllerAttachDetach", "systemReserved", "kubeReserved", "protectKernelDefaults", "makeIPTablesUtilChains", "iptablesMasqueradeBit", "iptablesDropBit"},
},

View File

@ -747,7 +747,8 @@ func NewMainKubelet(kubeCfg *componentconfig.KubeletConfiguration, kubeDeps *Kub
kubeDeps.Mounter,
klet.getPodsDir(),
kubeDeps.Recorder,
kubeCfg.ExperimentalCheckNodeCapabilitiesBeforeMount)
kubeCfg.ExperimentalCheckNodeCapabilitiesBeforeMount,
kubeCfg.KeepTerminatedPodVolumes)
runtimeCache, err := kubecontainer.NewRuntimeCache(klet.containerRuntime)
if err != nil {

View File

@ -257,7 +257,8 @@ func newTestKubeletWithImageList(
kubelet.mounter,
kubelet.getPodsDir(),
kubelet.recorder,
false /* experimentalCheckNodeCapabilitiesBeforeMount*/)
false, /* experimentalCheckNodeCapabilitiesBeforeMount*/
false /* keepTerminatedPodVolumes */)
require.NoError(t, err, "Failed to initialize volume manager")
// enable active deadline handler

View File

@ -106,7 +106,8 @@ func TestRunOnce(t *testing.T) {
kb.mounter,
kb.getPodsDir(),
kb.recorder,
false /* experimentalCheckNodeCapabilitiesBeforeMount */)
false, /* experimentalCheckNodeCapabilitiesBeforeMount */
false /* keepTerminatedPodVolumes */)
kb.networkPlugin, _ = network.InitNetworkPlugin([]network.NetworkPlugin{}, "", nettest.NewFakeHost(nil), componentconfig.HairpinNone, kb.nonMasqueradeCIDR, network.UseDefaultMTU)
// TODO: Factor out "StatsProvider" from Kubelet so we don't have a cyclic dependency

View File

@ -71,7 +71,8 @@ func NewDesiredStateOfWorldPopulator(
getPodStatusRetryDuration time.Duration,
podManager pod.Manager,
desiredStateOfWorld cache.DesiredStateOfWorld,
kubeContainerRuntime kubecontainer.Runtime) DesiredStateOfWorldPopulator {
kubeContainerRuntime kubecontainer.Runtime,
keepTerminatedPodVolumes bool) DesiredStateOfWorldPopulator {
return &desiredStateOfWorldPopulator{
kubeClient: kubeClient,
loopSleepDuration: loopSleepDuration,
@ -80,7 +81,8 @@ func NewDesiredStateOfWorldPopulator(
desiredStateOfWorld: desiredStateOfWorld,
pods: processedPods{
processedPods: make(map[volumetypes.UniquePodName]bool)},
kubeContainerRuntime: kubeContainerRuntime,
kubeContainerRuntime: kubeContainerRuntime,
keepTerminatedPodVolumes: keepTerminatedPodVolumes,
}
}
@ -93,6 +95,7 @@ type desiredStateOfWorldPopulator struct {
pods processedPods
kubeContainerRuntime kubecontainer.Runtime
timeOfLastGetPodStatus time.Time
keepTerminatedPodVolumes bool
}
type processedPods struct {
@ -160,13 +163,7 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() {
if !isPodTerminated(pod) {
continue
}
// Skip non-memory backed volumes belonging to terminated pods
volume := volumeToMount.VolumeSpec.Volume
if volume == nil {
continue
}
if (volume.EmptyDir == nil || volume.EmptyDir.Medium != v1.StorageMediumMemory) &&
volume.ConfigMap == nil && volume.Secret == nil {
if dswp.keepTerminatedPodVolumes {
continue
}
}

View File

@ -157,7 +157,8 @@ func NewVolumeManager(
mounter mount.Interface,
kubeletPodsDir string,
recorder record.EventRecorder,
checkNodeCapabilitiesBeforeMount bool) (VolumeManager, error) {
checkNodeCapabilitiesBeforeMount bool,
keepTerminatedPodVolumes bool) (VolumeManager, error) {
vm := &volumeManager{
kubeClient: kubeClient,
@ -191,7 +192,8 @@ func NewVolumeManager(
desiredStateOfWorldPopulatorGetPodStatusRetryDuration,
podManager,
vm.desiredStateOfWorld,
kubeContainerRuntime)
kubeContainerRuntime,
keepTerminatedPodVolumes)
return vm, nil
}

View File

@ -198,7 +198,8 @@ func newTestVolumeManager(
&mount.FakeMounter{},
"",
fakeRecorder,
false /* experimentalCheckNodeCapabilitiesBeforeMount */)
false, /* experimentalCheckNodeCapabilitiesBeforeMount */
false /* keepTerminatedPodVolumes */)
return vm, err
}