mirror of https://github.com/k3s-io/k3s
Merge pull request #66832 from msau42/udev
Automatic merge from submit-queue (batch tested with PRs 64645, 66832). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Detect if GCE PD udev link is wrong and try to correct it **What this PR does / why we need it**: udev can miss scsi events and not update device links properly when disks are detached and reattached to the same node in a different order. This PR workarounds the issue by comparing the udev link name, which is the PD disk name, with the scsi disk serial number returned from scsi_id, and prevents mounting of the disk if the link is wrong. It will also run `udevadm trigger` on the disk to try to correct the bad link. **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes # **Special notes for your reviewer**: **Release note**: ```release-note This fix prevents a GCE PD volume from being mounted if the udev device link is stale and tries to correct the link. ```pull/8/head
commit
d3c09656d7
|
@ -21,6 +21,7 @@ go_library(
|
||||||
"//pkg/cloudprovider/providers/gce:go_default_library",
|
"//pkg/cloudprovider/providers/gce:go_default_library",
|
||||||
"//pkg/features:go_default_library",
|
"//pkg/features:go_default_library",
|
||||||
"//pkg/kubelet/apis:go_default_library",
|
"//pkg/kubelet/apis:go_default_library",
|
||||||
|
"//pkg/util/file:go_default_library",
|
||||||
"//pkg/util/mount:go_default_library",
|
"//pkg/util/mount:go_default_library",
|
||||||
"//pkg/util/strings:go_default_library",
|
"//pkg/util/strings:go_default_library",
|
||||||
"//pkg/volume:go_default_library",
|
"//pkg/volume:go_default_library",
|
||||||
|
@ -43,6 +44,7 @@ go_test(
|
||||||
"attacher_test.go",
|
"attacher_test.go",
|
||||||
"gce_pd_block_test.go",
|
"gce_pd_block_test.go",
|
||||||
"gce_pd_test.go",
|
"gce_pd_test.go",
|
||||||
|
"gce_util_test.go",
|
||||||
],
|
],
|
||||||
embed = [":go_default_library"],
|
embed = [":go_default_library"],
|
||||||
deps = [
|
deps = [
|
||||||
|
|
|
@ -157,7 +157,7 @@ func (attacher *gcePersistentDiskAttacher) WaitForAttach(spec *volume.Spec, devi
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
glog.V(5).Infof("Checking GCE PD %q is attached.", pdName)
|
glog.V(5).Infof("Checking GCE PD %q is attached.", pdName)
|
||||||
path, err := verifyDevicePath(devicePaths, sdBeforeSet)
|
path, err := verifyDevicePath(devicePaths, sdBeforeSet, pdName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Log error, if any, and continue checking periodically. See issue #11321
|
// Log error, if any, and continue checking periodically. See issue #11321
|
||||||
glog.Errorf("Error verifying GCE PD (%q) is attached: %v", pdName, err)
|
glog.Errorf("Error verifying GCE PD (%q) is attached: %v", pdName, err)
|
||||||
|
|
|
@ -20,6 +20,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -31,6 +32,7 @@ import (
|
||||||
gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
|
gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
|
||||||
"k8s.io/kubernetes/pkg/features"
|
"k8s.io/kubernetes/pkg/features"
|
||||||
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
|
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
|
||||||
|
utilfile "k8s.io/kubernetes/pkg/util/file"
|
||||||
"k8s.io/kubernetes/pkg/volume"
|
"k8s.io/kubernetes/pkg/volume"
|
||||||
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
||||||
"k8s.io/utils/exec"
|
"k8s.io/utils/exec"
|
||||||
|
@ -50,12 +52,19 @@ const (
|
||||||
// Replication type constants must be lower case.
|
// Replication type constants must be lower case.
|
||||||
replicationTypeNone = "none"
|
replicationTypeNone = "none"
|
||||||
replicationTypeRegionalPD = "regional-pd"
|
replicationTypeRegionalPD = "regional-pd"
|
||||||
|
|
||||||
|
// scsi_id output should be in the form of:
|
||||||
|
// 0Google PersistentDisk <disk name>
|
||||||
|
scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$`
|
||||||
)
|
)
|
||||||
|
|
||||||
// These variables are modified only in unit tests and should be constant
|
|
||||||
// otherwise.
|
|
||||||
var (
|
var (
|
||||||
|
// errorSleepDuration is modified only in unit tests and should be constant
|
||||||
|
// otherwise.
|
||||||
errorSleepDuration time.Duration = 5 * time.Second
|
errorSleepDuration time.Duration = 5 * time.Second
|
||||||
|
|
||||||
|
// regex to parse scsi_id output and extract the serial
|
||||||
|
scsiRegex = regexp.MustCompile(scsiPattern)
|
||||||
)
|
)
|
||||||
|
|
||||||
type GCEDiskUtil struct{}
|
type GCEDiskUtil struct{}
|
||||||
|
@ -261,9 +270,11 @@ func createRegionalPD(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the first path that exists, or empty string if none exist.
|
// Returns the first path that exists, or empty string if none exist.
|
||||||
func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, error) {
|
func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String, diskName string) (string, error) {
|
||||||
if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil {
|
if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil {
|
||||||
// udevadm errors should not block disk detachment, log and continue
|
// It's possible udevadm was called on other disks so it should not block this
|
||||||
|
// call. If it did fail on this disk, then the devicePath will either
|
||||||
|
// not exist or be wrong. If it's wrong, then the scsi_id check below will fail.
|
||||||
glog.Errorf("udevadmChangeToNewDrives failed with: %v", err)
|
glog.Errorf("udevadmChangeToNewDrives failed with: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,6 +282,22 @@ func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, er
|
||||||
if pathExists, err := volumeutil.PathExists(path); err != nil {
|
if pathExists, err := volumeutil.PathExists(path); err != nil {
|
||||||
return "", fmt.Errorf("Error checking if path exists: %v", err)
|
return "", fmt.Errorf("Error checking if path exists: %v", err)
|
||||||
} else if pathExists {
|
} else if pathExists {
|
||||||
|
// validate that the path actually resolves to the correct disk
|
||||||
|
serial, err := getScsiSerial(path, diskName)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to get scsi serial %v", err)
|
||||||
|
}
|
||||||
|
if serial != diskName {
|
||||||
|
// The device link is not pointing to the correct device
|
||||||
|
// Trigger udev on this device to try to fix the link
|
||||||
|
if udevErr := udevadmChangeToDrive(path); udevErr != nil {
|
||||||
|
glog.Errorf("udevadmChangeToDrive %q failed with: %v", path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return error to retry WaitForAttach and verifyDevicePath
|
||||||
|
return "", fmt.Errorf("scsi_id serial %q for device %q doesn't match disk %q", serial, path, diskName)
|
||||||
|
}
|
||||||
|
// The device link is correct
|
||||||
return path, nil
|
return path, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -278,22 +305,38 @@ func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, er
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the first path that exists, or empty string if none exist.
|
// Calls scsi_id on the given devicePath to get the serial number reported by that device.
|
||||||
func verifyAllPathsRemoved(devicePaths []string) (bool, error) {
|
func getScsiSerial(devicePath, diskName string) (string, error) {
|
||||||
allPathsRemoved := true
|
exists, err := utilfile.FileExists("/lib/udev/scsi_id")
|
||||||
for _, path := range devicePaths {
|
if err != nil {
|
||||||
if err := udevadmChangeToDrive(path); err != nil {
|
return "", fmt.Errorf("failed to check scsi_id existence: %v", err)
|
||||||
// udevadm errors should not block disk detachment, log and continue
|
|
||||||
glog.Errorf("%v", err)
|
|
||||||
}
|
|
||||||
if exists, err := volumeutil.PathExists(path); err != nil {
|
|
||||||
return false, fmt.Errorf("Error checking if path exists: %v", err)
|
|
||||||
} else {
|
|
||||||
allPathsRemoved = allPathsRemoved && !exists
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return allPathsRemoved, nil
|
if !exists {
|
||||||
|
glog.V(6).Infof("scsi_id doesn't exist; skipping check for %v", devicePath)
|
||||||
|
return diskName, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
out, err := exec.New().Command(
|
||||||
|
"/lib/udev/scsi_id",
|
||||||
|
"--page=0x83",
|
||||||
|
"--whitelisted",
|
||||||
|
fmt.Sprintf("--device=%v", devicePath)).CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("scsi_id failed for device %q with %v.", devicePath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseScsiSerial(string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the output returned by scsi_id and extract the serial number
|
||||||
|
func parseScsiSerial(output string) (string, error) {
|
||||||
|
substrings := scsiRegex.FindStringSubmatch(output)
|
||||||
|
if substrings == nil {
|
||||||
|
return "", fmt.Errorf("scsi_id output cannot be parsed: %q", output)
|
||||||
|
}
|
||||||
|
|
||||||
|
return substrings[1], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns list of all /dev/disk/by-id/* paths for given PD.
|
// Returns list of all /dev/disk/by-id/* paths for given PD.
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
Copyright 2018 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package gce_pd
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseScsiSerial(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
output string
|
||||||
|
diskName string
|
||||||
|
expectErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid",
|
||||||
|
output: "0Google PersistentDisk test-disk",
|
||||||
|
diskName: "test-disk",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "valid with newline",
|
||||||
|
output: "0Google PersistentDisk test-disk\n",
|
||||||
|
diskName: "test-disk",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid prefix",
|
||||||
|
output: "00Google PersistentDisk test-disk",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid suffix",
|
||||||
|
output: "0Google PersistentDisk test-disk more",
|
||||||
|
expectErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range cases {
|
||||||
|
serial, err := parseScsiSerial(test.output)
|
||||||
|
if err != nil && !test.expectErr {
|
||||||
|
t.Errorf("test %v failed: %v", test.name, err)
|
||||||
|
}
|
||||||
|
if err == nil && test.expectErr {
|
||||||
|
t.Errorf("test %q failed: got success", test.name)
|
||||||
|
}
|
||||||
|
if serial != test.diskName {
|
||||||
|
t.Errorf("test %v failed: expected serial %q, got %q", test.name, test.diskName, serial)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -300,4 +300,53 @@ var _ = utils.SIGDescribe("PersistentVolumes", func() {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Describe("Default StorageClass", func() {
|
||||||
|
Context("pods that use multiple volumes", func() {
|
||||||
|
It("should be reschedulable", func() {
|
||||||
|
// Only run on providers with default storageclass
|
||||||
|
framework.SkipUnlessProviderIs("openstack", "gce", "gke", "vsphere", "azure")
|
||||||
|
|
||||||
|
numVols := 4
|
||||||
|
pvcs := []*v1.PersistentVolumeClaim{}
|
||||||
|
|
||||||
|
By("Creating PVCs")
|
||||||
|
for i := 0; i < numVols; i++ {
|
||||||
|
pvc = framework.MakePersistentVolumeClaim(framework.PersistentVolumeClaimConfig{}, ns)
|
||||||
|
pvc, err = framework.CreatePVC(c, ns, pvc)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
pvcs = append(pvcs, pvc)
|
||||||
|
}
|
||||||
|
|
||||||
|
By("Waiting for PVCs to be bound")
|
||||||
|
for _, pvc := range pvcs {
|
||||||
|
framework.Logf("Created PVC %q", pvc.Name)
|
||||||
|
framework.ExpectNoError(framework.WaitForPersistentVolumeClaimPhase(v1.ClaimBound, c, ns, pvc.Name, framework.Poll, framework.ClaimProvisionTimeout))
|
||||||
|
}
|
||||||
|
|
||||||
|
By("Creating a pod and initializing data")
|
||||||
|
writeCmd := "true"
|
||||||
|
for i, pvc := range pvcs {
|
||||||
|
// mountPath is /mnt/volume<i+1>
|
||||||
|
writeCmd += fmt.Sprintf("&& touch /mnt/volume%v/%v", i+1, pvc.Name)
|
||||||
|
}
|
||||||
|
pod := framework.MakePod(ns, nil, pvcs, false, writeCmd)
|
||||||
|
pod, err = c.CoreV1().Pods(ns).Create(pod)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
framework.ExpectNoError(framework.WaitForPodSuccessInNamespace(c, pod.Name, ns))
|
||||||
|
|
||||||
|
By("Recreating the pod and validating the data")
|
||||||
|
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod))
|
||||||
|
validateCmd := "true"
|
||||||
|
for i, pvc := range pvcs {
|
||||||
|
// mountPath is /mnt/volume<i+1>
|
||||||
|
validateCmd += fmt.Sprintf("&& test -f /mnt/volume%v/%v", i+1, pvc.Name)
|
||||||
|
}
|
||||||
|
pod = framework.MakePod(ns, nil, pvcs, false, validateCmd)
|
||||||
|
pod, err = c.CoreV1().Pods(ns).Create(pod)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
framework.ExpectNoError(framework.WaitForPodSuccessInNamespace(c, pod.Name, ns))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
Loading…
Reference in New Issue