mirror of https://github.com/k3s-io/k3s
Merge pull request #35038 from sjenning/nfs-nonblock-reader2
Automatic merge from submit-queue kubelet: storage: don't hang kubelet on unresponsive nfs Fixes #31272 Currently, due to the nature of nfs, an unresponsive nfs volume in a pod can wedge the kubelet such that additional pods can not be run. The discussion thus far surrounding this issue was to wrap the `lstat`, the syscall that ends up hanging in uninterruptible sleep, in a goroutine and limiting the number of goroutines that hang to one per-pod per-volume. However, in my investigation, I found that the callsites that request a listing of the volumes from a particular volume plugin directory don't care anything about the properties provided by the `lstat` call. They only care about whether or not a directory exists. Given that constraint, this PR just avoids the `lstat` call by using `Readdirnames()` instead of `ReadDir()` or `ReadDirNoExit()` ### More detail for reviewers Consider the pod mounted nfs volume at `/var/lib/kubelet/pods/881341b5-9551-11e6-af4c-fa163e815edd/volumes/kubernetes.io~nfs/myvol`. The kubelet wedges because when we do a `ReadDir()` or `ReadDirNoExit()` it calls `syscall.Lstat` on `myvol` which requires communication with the nfs server. If the nfs server is unreachable, this call hangs forever. However, for our code, we only care what about the names of files/directory contained in `kubernetes.io~nfs` directory, not any of the more detailed information the `Lstat` call provides. Getting the names can be done with `Readdirnames()`, which doesn't need to involve the nfs server. @pmorie @eparis @ncdc @derekwaynecarr @saad-ali @thockin @vishh @kubernetes/rh-cluster-infrapull/6/head
commit
84aa5f695f
|
@ -252,18 +252,11 @@ func (kl *Kubelet) getPodVolumeNameListFromDisk(podUID types.UID) ([]string, err
|
|||
for _, volumePluginDir := range volumePluginDirs {
|
||||
volumePluginName := volumePluginDir.Name()
|
||||
volumePluginPath := path.Join(podVolDir, volumePluginName)
|
||||
volumeDirs, volumeDirsStatErrs, err := util.ReadDirNoExit(volumePluginPath)
|
||||
volumeDirs, err := util.ReadDirNoStat(volumePluginPath)
|
||||
if err != nil {
|
||||
return volumes, fmt.Errorf("Could not read directory %s: %v", volumePluginPath, err)
|
||||
}
|
||||
for i, volumeDir := range volumeDirs {
|
||||
if volumeDir != nil {
|
||||
volumes = append(volumes, volumeDir.Name())
|
||||
continue
|
||||
}
|
||||
glog.Errorf("Could not read directory %s: %v", podVolDir, volumeDirsStatErrs[i])
|
||||
|
||||
return volumes, err
|
||||
}
|
||||
volumes = append(volumes, volumeDirs...)
|
||||
}
|
||||
return volumes, nil
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
"k8s.io/kubernetes/pkg/kubelet/volumemanager/cache"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
|
||||
"k8s.io/kubernetes/pkg/util/mount"
|
||||
"k8s.io/kubernetes/pkg/util/strings"
|
||||
|
@ -531,24 +532,21 @@ func getVolumesFromPodDir(podDir string) ([]podVolume, error) {
|
|||
pluginName := volumeDir.Name()
|
||||
volumePluginPath := path.Join(volumesDir, pluginName)
|
||||
|
||||
volumePluginDirs, err := ioutil.ReadDir(volumePluginPath)
|
||||
volumePluginDirs, err := util.ReadDirNoStat(volumePluginPath)
|
||||
if err != nil {
|
||||
glog.Errorf("Could not read volume plugin directory %q: %v", volumePluginPath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
unescapePluginName := strings.UnescapeQualifiedNameForDisk(pluginName)
|
||||
for _, volumeNameDir := range volumePluginDirs {
|
||||
if volumeNameDir != nil {
|
||||
volumeName := volumeNameDir.Name()
|
||||
mountPath := path.Join(volumePluginPath, volumeName)
|
||||
volumes = append(volumes, podVolume{
|
||||
podName: volumetypes.UniquePodName(podName),
|
||||
volumeSpecName: volumeName,
|
||||
mountPath: mountPath,
|
||||
pluginName: unescapePluginName,
|
||||
})
|
||||
}
|
||||
for _, volumeName := range volumePluginDirs {
|
||||
mountPath := path.Join(volumePluginPath, volumeName)
|
||||
volumes = append(volumes, podVolume{
|
||||
podName: volumetypes.UniquePodName(podName),
|
||||
volumeSpecName: volumeName,
|
||||
mountPath: mountPath,
|
||||
pluginName: unescapePluginName,
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -84,36 +84,20 @@ func FileExists(filename string) (bool, error) {
|
|||
return true, nil
|
||||
}
|
||||
|
||||
// borrowed from ioutil.ReadDir
|
||||
// ReadDir reads the directory named by dirname and returns
|
||||
// a list of directory entries, minus those with lstat errors
|
||||
func ReadDirNoExit(dirname string) ([]os.FileInfo, []error, error) {
|
||||
// ReadDirNoStat returns a string of files/directories contained
|
||||
// in dirname without calling lstat on them.
|
||||
func ReadDirNoStat(dirname string) ([]string, error) {
|
||||
if dirname == "" {
|
||||
dirname = "."
|
||||
}
|
||||
|
||||
f, err := os.Open(dirname)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
names, err := f.Readdirnames(-1)
|
||||
list := make([]os.FileInfo, 0, len(names))
|
||||
errs := make([]error, 0, len(names))
|
||||
for _, filename := range names {
|
||||
fip, lerr := os.Lstat(dirname + "/" + filename)
|
||||
if os.IsNotExist(lerr) {
|
||||
// File disappeared between readdir + stat.
|
||||
// Just treat it as if it didn't exist.
|
||||
continue
|
||||
}
|
||||
|
||||
list = append(list, fip)
|
||||
errs = append(errs, lerr)
|
||||
}
|
||||
|
||||
return list, errs, nil
|
||||
return f.Readdirnames(-1)
|
||||
}
|
||||
|
||||
// IntPtr returns a pointer to an int
|
||||
|
|
Loading…
Reference in New Issue