mirror of https://github.com/k3s-io/k3s
Retry attaching multipath iSCSI volumes
Don't mount single path instead of multipath volumes and always wait until at least 2 paths are available. Try up to 3 times to get all paths. Try 5 times to get at last 2 paths.pull/58/head
parent
a29b093a56
commit
362be3a58b
|
@ -36,6 +36,23 @@ import (
|
||||||
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Minimum number of paths that the volume plugin considers enough when a multipath volume is requested.
|
||||||
|
minMultipathCount = 2
|
||||||
|
|
||||||
|
// Minimal number of attempts to attach all paths of a multipath volumes. If at least minMultipathCount paths
|
||||||
|
// are available after this nr. of attempts, the volume plugin continues with mounting the volume.
|
||||||
|
minAttachAttempts = 2
|
||||||
|
|
||||||
|
// Total number of attempts to attach at least minMultipathCount paths. If there are less than minMultipathCount,
|
||||||
|
// the volume plugin tries to attach the remaining paths at least this number of times in total. After
|
||||||
|
// maxAttachAttempts attempts, it mounts even a single path.
|
||||||
|
maxAttachAttempts = 5
|
||||||
|
|
||||||
|
// How many seconds to wait for a multipath device if at least two paths are available.
|
||||||
|
multipathDeviceTimeout = 10
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
chap_st = []string{
|
chap_st = []string{
|
||||||
"discovery.sendtargets.auth.username",
|
"discovery.sendtargets.auth.username",
|
||||||
|
@ -268,7 +285,7 @@ func waitForMultiPathToExist(devicePaths []string, maxRetries int, deviceUtil vo
|
||||||
// AttachDisk returns devicePath of volume if attach succeeded otherwise returns error
|
// AttachDisk returns devicePath of volume if attach succeeded otherwise returns error
|
||||||
func (util *ISCSIUtil) AttachDisk(b iscsiDiskMounter) (string, error) {
|
func (util *ISCSIUtil) AttachDisk(b iscsiDiskMounter) (string, error) {
|
||||||
var devicePath string
|
var devicePath string
|
||||||
var devicePaths []string
|
devicePaths := map[string]string{}
|
||||||
var iscsiTransport string
|
var iscsiTransport string
|
||||||
var lastErr error
|
var lastErr error
|
||||||
|
|
||||||
|
@ -308,19 +325,27 @@ func (util *ISCSIUtil) AttachDisk(b iscsiDiskMounter) (string, error) {
|
||||||
}
|
}
|
||||||
glog.V(4).Infof("AttachDisk portal->host map for %s is %v", b.Iqn, portalHostMap)
|
glog.V(4).Infof("AttachDisk portal->host map for %s is %v", b.Iqn, portalHostMap)
|
||||||
|
|
||||||
|
for i := 1; i <= maxAttachAttempts; i++ {
|
||||||
for _, tp := range bkpPortal {
|
for _, tp := range bkpPortal {
|
||||||
|
if _, found := devicePaths[tp]; found {
|
||||||
|
glog.V(4).Infof("Device for portal %q already known", tp)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
hostNumber, loggedIn := portalHostMap[tp]
|
hostNumber, loggedIn := portalHostMap[tp]
|
||||||
if !loggedIn {
|
if !loggedIn {
|
||||||
glog.V(4).Infof("Could not get SCSI host number for portal %s, will attempt login", tp)
|
glog.V(4).Infof("Could not get SCSI host number for portal %s, will attempt login", tp)
|
||||||
|
|
||||||
// build discoverydb and discover iscsi target
|
// build discoverydb and discover iscsi target
|
||||||
b.exec.Run("iscsiadm", "-m", "discoverydb", "-t", "sendtargets", "-p", tp, "-I", b.Iface, "-o", "new")
|
b.exec.Run("iscsiadm", "-m", "discoverydb", "-t", "sendtargets", "-p", tp, "-I", b.Iface, "-o", "new")
|
||||||
|
|
||||||
// update discoverydb with CHAP secret
|
// update discoverydb with CHAP secret
|
||||||
err = updateISCSIDiscoverydb(b, tp)
|
err = updateISCSIDiscoverydb(b, tp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastErr = fmt.Errorf("iscsi: failed to update discoverydb to portal %s error: %v", tp, err)
|
lastErr = fmt.Errorf("iscsi: failed to update discoverydb to portal %s error: %v", tp, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
out, err = b.exec.Run("iscsiadm", "-m", "discoverydb", "-t", "sendtargets", "-p", tp, "-I", b.Iface, "--discover")
|
out, err = b.exec.Run("iscsiadm", "-m", "discoverydb", "-t", "sendtargets", "-p", tp, "-I", b.Iface, "--discover")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// delete discoverydb record
|
// delete discoverydb record
|
||||||
|
@ -328,12 +353,14 @@ func (util *ISCSIUtil) AttachDisk(b iscsiDiskMounter) (string, error) {
|
||||||
lastErr = fmt.Errorf("iscsi: failed to sendtargets to portal %s output: %s, err %v", tp, string(out), err)
|
lastErr = fmt.Errorf("iscsi: failed to sendtargets to portal %s output: %s, err %v", tp, string(out), err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err = updateISCSINode(b, tp)
|
err = updateISCSINode(b, tp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// failure to update node db is rare. But deleting record will likely impact those who already start using it.
|
// failure to update node db is rare. But deleting record will likely impact those who already start using it.
|
||||||
lastErr = fmt.Errorf("iscsi: failed to update iscsi node to portal %s error: %v", tp, err)
|
lastErr = fmt.Errorf("iscsi: failed to update iscsi node to portal %s error: %v", tp, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// login to iscsi target
|
// login to iscsi target
|
||||||
out, err = b.exec.Run("iscsiadm", "-m", "node", "-p", tp, "-T", b.Iqn, "-I", b.Iface, "--login")
|
out, err = b.exec.Run("iscsiadm", "-m", "node", "-p", tp, "-T", b.Iqn, "-I", b.Iface, "--login")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -342,6 +369,7 @@ func (util *ISCSIUtil) AttachDisk(b iscsiDiskMounter) (string, error) {
|
||||||
lastErr = fmt.Errorf("iscsi: failed to attach disk: Error: %s (%v)", string(out), err)
|
lastErr = fmt.Errorf("iscsi: failed to attach disk: Error: %s (%v)", string(out), err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// in case of node failure/restart, explicitly set to manual login so it doesn't hang on boot
|
// in case of node failure/restart, explicitly set to manual login so it doesn't hang on boot
|
||||||
out, err = b.exec.Run("iscsiadm", "-m", "node", "-p", tp, "-T", b.Iqn, "-o", "update", "-n", "node.startup", "-v", "manual")
|
out, err = b.exec.Run("iscsiadm", "-m", "node", "-p", tp, "-T", b.Iqn, "-o", "update", "-n", "node.startup", "-v", "manual")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -385,41 +413,57 @@ func (util *ISCSIUtil) AttachDisk(b iscsiDiskMounter) (string, error) {
|
||||||
devicePath = strings.Join([]string{"/dev/disk/by-path/pci", "*", "ip", tp, "iscsi", b.Iqn, "lun", b.Lun}, "-")
|
devicePath = strings.Join([]string{"/dev/disk/by-path/pci", "*", "ip", tp, "iscsi", b.Iqn, "lun", b.Lun}, "-")
|
||||||
}
|
}
|
||||||
|
|
||||||
if exist := waitForPathToExist(&devicePath, 10, iscsiTransport); !exist {
|
if exist := waitForPathToExist(&devicePath, multipathDeviceTimeout, iscsiTransport); !exist {
|
||||||
glog.Errorf("Could not attach disk: Timeout after 10s")
|
glog.Errorf("Could not attach disk: Timeout after 10s")
|
||||||
// update last error
|
// update last error
|
||||||
lastErr = fmt.Errorf("Could not attach disk: Timeout after 10s")
|
lastErr = fmt.Errorf("Could not attach disk: Timeout after 10s")
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
devicePaths = append(devicePaths, devicePath)
|
devicePaths[tp] = devicePath
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
glog.V(4).Infof("iscsi: tried all devices for %q %d times, %d paths found", b.Iqn, i, len(devicePaths))
|
||||||
if len(devicePaths) == 0 {
|
if len(devicePaths) == 0 {
|
||||||
|
// No path attached, report error and stop trying. kubelet will try again in a short while
|
||||||
// delete cloned iface
|
// delete cloned iface
|
||||||
b.exec.Run("iscsiadm", "-m", "iface", "-I", b.Iface, "-o", "delete")
|
b.exec.Run("iscsiadm", "-m", "iface", "-I", b.Iface, "-o", "delete")
|
||||||
glog.Errorf("iscsi: failed to get any path for iscsi disk, last err seen:\n%v", lastErr)
|
glog.Errorf("iscsi: failed to get any path for iscsi disk, last err seen:\n%v", lastErr)
|
||||||
return "", fmt.Errorf("failed to get any path for iscsi disk, last err seen:\n%v", lastErr)
|
return "", fmt.Errorf("failed to get any path for iscsi disk, last err seen:\n%v", lastErr)
|
||||||
}
|
}
|
||||||
|
if len(devicePaths) == len(bkpPortal) {
|
||||||
|
// We have all paths
|
||||||
|
glog.V(4).Infof("iscsi: all devices for %q found", b.Iqn)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if len(devicePaths) >= minMultipathCount && i >= minAttachAttempts {
|
||||||
|
// We have at least two paths for multipath and we tried the other paths long enough
|
||||||
|
glog.V(4).Infof("%d devices found for %q", len(devicePaths), b.Iqn)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if lastErr != nil {
|
if lastErr != nil {
|
||||||
glog.Errorf("iscsi: last error occurred during iscsi init:\n%v", lastErr)
|
glog.Errorf("iscsi: last error occurred during iscsi init:\n%v", lastErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
devicePathList := []string{}
|
||||||
|
for _, path := range devicePaths {
|
||||||
|
devicePathList = append(devicePathList, path)
|
||||||
|
}
|
||||||
// Try to find a multipath device for the volume
|
// Try to find a multipath device for the volume
|
||||||
if 1 < len(bkpPortal) {
|
if len(bkpPortal) > 1 {
|
||||||
// If the PV has 2 or more portals, wait up to 10 seconds for the multipath
|
// Multipath volume was requested. Wait up to 10 seconds for the multipath device to appear.
|
||||||
// device to appear
|
devicePath = waitForMultiPathToExist(devicePathList, 10, b.deviceUtil)
|
||||||
devicePath = waitForMultiPathToExist(devicePaths, 10, b.deviceUtil)
|
|
||||||
} else {
|
} else {
|
||||||
// For PVs with 1 portal, just try one time to find the multipath device. This
|
// For PVs with 1 portal, just try one time to find the multipath device. This
|
||||||
// avoids a long pause when the multipath device will never get created, and
|
// avoids a long pause when the multipath device will never get created, and
|
||||||
// matches legacy behavior.
|
// matches legacy behavior.
|
||||||
devicePath = waitForMultiPathToExist(devicePaths, 1, b.deviceUtil)
|
devicePath = waitForMultiPathToExist(devicePathList, 1, b.deviceUtil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// When no multipath device is found, just use the first (and presumably only) device
|
// When no multipath device is found, just use the first (and presumably only) device
|
||||||
if devicePath == "" {
|
if devicePath == "" {
|
||||||
devicePath = devicePaths[0]
|
devicePath = devicePathList[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.V(5).Infof("iscsi: AttachDisk devicePath: %s", devicePath)
|
glog.V(5).Infof("iscsi: AttachDisk devicePath: %s", devicePath)
|
||||||
|
|
Loading…
Reference in New Issue