2015-03-13 21:31:13 +00:00
/ *
2016-06-03 00:25:58 +00:00
Copyright 2015 The Kubernetes Authors .
2015-03-13 21:31:13 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package iscsi
import (
2017-05-31 19:52:00 +00:00
"encoding/json"
2015-10-13 18:50:49 +00:00
"fmt"
2015-03-13 21:31:13 +00:00
"os"
"path"
2015-11-05 19:06:20 +00:00
"path/filepath"
2016-07-25 04:18:38 +00:00
"regexp"
2018-09-27 08:24:59 +00:00
"strconv"
2015-03-13 21:31:13 +00:00
"strings"
"time"
2017-10-28 19:28:52 +00:00
"k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
2018-11-09 18:49:10 +00:00
"k8s.io/klog"
2017-10-28 19:28:52 +00:00
"k8s.io/kubernetes/pkg/features"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
2017-08-09 15:13:58 +00:00
volumeutil "k8s.io/kubernetes/pkg/volume/util"
2015-03-13 21:31:13 +00:00
)
2018-10-08 11:00:24 +00:00
const (
// Minimum number of paths that the volume plugin considers enough when a multipath volume is requested.
minMultipathCount = 2
// Minimal number of attempts to attach all paths of a multipath volumes. If at least minMultipathCount paths
// are available after this nr. of attempts, the volume plugin continues with mounting the volume.
minAttachAttempts = 2
// Total number of attempts to attach at least minMultipathCount paths. If there are less than minMultipathCount,
// the volume plugin tries to attach the remaining paths at least this number of times in total. After
// maxAttachAttempts attempts, it mounts even a single path.
maxAttachAttempts = 5
// How many seconds to wait for a multipath device if at least two paths are available.
multipathDeviceTimeout = 10
)
2017-03-17 20:42:15 +00:00
var (
2018-11-17 04:50:47 +00:00
chapSt = [ ] string {
2017-03-17 20:42:15 +00:00
"discovery.sendtargets.auth.username" ,
"discovery.sendtargets.auth.password" ,
"discovery.sendtargets.auth.username_in" ,
"discovery.sendtargets.auth.password_in" }
2018-11-17 04:50:47 +00:00
chapSess = [ ] string {
2017-03-17 20:42:15 +00:00
"node.session.auth.username" ,
"node.session.auth.password" ,
"node.session.auth.username_in" ,
"node.session.auth.password_in" }
2017-09-04 19:56:06 +00:00
ifaceTransportNameRe = regexp . MustCompile ( ` iface.transport_name = (.*)\n ` )
ifaceRe = regexp . MustCompile ( ` .+/iface-([^/]+)/.+ ` )
2017-03-17 20:42:15 +00:00
)
func updateISCSIDiscoverydb ( b iscsiDiskMounter , tp string ) error {
2018-11-17 04:50:47 +00:00
if ! b . chapDiscovery {
2017-08-15 13:53:46 +00:00
return nil
}
out , err := b . exec . Run ( "iscsiadm" , "-m" , "discoverydb" , "-t" , "sendtargets" , "-p" , tp , "-I" , b . Iface , "-o" , "update" , "-n" , "discovery.sendtargets.auth.authmethod" , "-v" , "CHAP" )
if err != nil {
return fmt . Errorf ( "iscsi: failed to update discoverydb with CHAP, output: %v" , string ( out ) )
}
2017-03-17 20:42:15 +00:00
2018-11-17 04:50:47 +00:00
for _ , k := range chapSt {
2017-08-15 13:53:46 +00:00
v := b . secret [ k ]
if len ( v ) > 0 {
out , err := b . exec . Run ( "iscsiadm" , "-m" , "discoverydb" , "-t" , "sendtargets" , "-p" , tp , "-I" , b . Iface , "-o" , "update" , "-n" , k , "-v" , v )
if err != nil {
return fmt . Errorf ( "iscsi: failed to update discoverydb key %q with value %q error: %v" , k , v , string ( out ) )
2017-03-17 20:42:15 +00:00
}
}
}
return nil
}
func updateISCSINode ( b iscsiDiskMounter , tp string ) error {
2018-11-17 04:50:47 +00:00
if ! b . chapSession {
2017-08-15 13:53:46 +00:00
return nil
}
2017-03-17 20:42:15 +00:00
2017-08-15 13:53:46 +00:00
out , err := b . exec . Run ( "iscsiadm" , "-m" , "node" , "-p" , tp , "-T" , b . Iqn , "-I" , b . Iface , "-o" , "update" , "-n" , "node.session.auth.authmethod" , "-v" , "CHAP" )
if err != nil {
return fmt . Errorf ( "iscsi: failed to update node with CHAP, output: %v" , string ( out ) )
}
2018-11-17 04:50:47 +00:00
for _ , k := range chapSess {
2017-08-15 13:53:46 +00:00
v := b . secret [ k ]
if len ( v ) > 0 {
out , err := b . exec . Run ( "iscsiadm" , "-m" , "node" , "-p" , tp , "-T" , b . Iqn , "-I" , b . Iface , "-o" , "update" , "-n" , k , "-v" , v )
if err != nil {
return fmt . Errorf ( "iscsi: failed to update node session key %q with value %q error: %v" , k , v , string ( out ) )
2017-03-17 20:42:15 +00:00
}
}
}
return nil
}
2015-03-13 21:31:13 +00:00
// stat a path, if not exists, retry maxRetries times
2015-11-05 19:06:20 +00:00
// when iscsi transports other than default are used, use glob instead as pci id of device is unknown
type StatFunc func ( string ) ( os . FileInfo , error )
type GlobFunc func ( string ) ( [ ] string , error )
2017-06-09 23:54:09 +00:00
func waitForPathToExist ( devicePath * string , maxRetries int , deviceTransport string ) bool {
2015-11-05 19:06:20 +00:00
// This makes unit testing a lot easier
2016-07-25 04:18:38 +00:00
return waitForPathToExistInternal ( devicePath , maxRetries , deviceTransport , os . Stat , filepath . Glob )
2015-11-05 19:06:20 +00:00
}
2017-06-09 23:54:09 +00:00
func waitForPathToExistInternal ( devicePath * string , maxRetries int , deviceTransport string , osStat StatFunc , filepathGlob GlobFunc ) bool {
2017-08-15 13:53:46 +00:00
if devicePath == nil {
return false
}
for i := 0 ; i < maxRetries ; i ++ {
var err error
if deviceTransport == "tcp" {
_ , err = osStat ( * devicePath )
} else {
fpath , _ := filepathGlob ( * devicePath )
if fpath == nil {
err = os . ErrNotExist
2017-06-09 23:54:09 +00:00
} else {
2017-08-15 13:53:46 +00:00
// There might be a case that fpath contains multiple device paths if
// multiple PCI devices connect to same iscsi target. We handle this
// case at subsequent logic. Pick up only first path here.
* devicePath = fpath [ 0 ]
2017-06-09 23:54:09 +00:00
}
2015-11-05 19:06:20 +00:00
}
2017-08-15 13:53:46 +00:00
if err == nil {
return true
}
if ! os . IsNotExist ( err ) {
return false
}
if i == maxRetries - 1 {
break
}
time . Sleep ( time . Second )
2015-03-13 21:31:13 +00:00
}
return false
}
// getDevicePrefixRefCount: given a prefix of device path, find its reference count from /proc/mounts
// returns the reference count to the device and error code
// for services like iscsi construct multiple device paths with the same prefix pattern.
// this function aggregates all references to a service based on the prefix pattern
// More specifically, this prefix semantics is to aggregate disk paths that belong to the same iSCSI target/iqn pair.
// an iSCSI target could expose multiple LUNs through the same IQN, and Linux iSCSI initiator creates disk paths that start the same prefix but end with different LUN number
// When we decide whether it is time to logout a target, we have to see if none of the LUNs are used any more.
// That's where the prefix based ref count kicks in. If we only count the disks using exact match, we could log other disks out.
func getDevicePrefixRefCount ( mounter mount . Interface , deviceNamePrefix string ) ( int , error ) {
mps , err := mounter . List ( )
if err != nil {
return - 1 , err
}
// Find the number of references to the device.
refCount := 0
for i := range mps {
2015-10-13 18:50:49 +00:00
if strings . HasPrefix ( mps [ i ] . Path , deviceNamePrefix ) {
2015-03-13 21:31:13 +00:00
refCount ++
}
}
return refCount , nil
}
2016-12-23 14:42:13 +00:00
// make a directory like /var/lib/kubelet/plugins/kubernetes.io/iscsi/iface_name/portal-some_iqn-lun-lun_id
func makePDNameInternal ( host volume . VolumeHost , portal string , iqn string , lun string , iface string ) string {
2017-02-14 20:50:26 +00:00
return path . Join ( host . GetPluginDir ( iscsiPluginName ) , "iface-" + iface , portal + "-" + iqn + "-lun-" + lun )
2015-03-13 21:31:13 +00:00
}
2017-10-28 19:28:52 +00:00
// make a directory like /var/lib/kubelet/plugins/kubernetes.io/iscsi/volumeDevices/iface_name/portal-some_iqn-lun-lun_id
func makeVDPDNameInternal ( host volume . VolumeHost , portal string , iqn string , lun string , iface string ) string {
return path . Join ( host . GetVolumeDevicePluginDir ( iscsiPluginName ) , "iface-" + iface , portal + "-" + iqn + "-lun-" + lun )
}
2015-03-13 21:31:13 +00:00
type ISCSIUtil struct { }
2017-10-28 19:28:52 +00:00
// MakeGlobalPDName returns path of global plugin dir
2015-03-13 21:31:13 +00:00
func ( util * ISCSIUtil ) MakeGlobalPDName ( iscsi iscsiDisk ) string {
2017-10-28 19:28:52 +00:00
return makePDNameInternal ( iscsi . plugin . host , iscsi . Portals [ 0 ] , iscsi . Iqn , iscsi . Lun , iscsi . Iface )
}
// MakeGlobalVDPDName returns path of global volume device plugin dir
func ( util * ISCSIUtil ) MakeGlobalVDPDName ( iscsi iscsiDisk ) string {
return makeVDPDNameInternal ( iscsi . plugin . host , iscsi . Portals [ 0 ] , iscsi . Iqn , iscsi . Lun , iscsi . Iface )
2017-05-31 19:52:00 +00:00
}
func ( util * ISCSIUtil ) persistISCSI ( conf iscsiDisk , mnt string ) error {
file := path . Join ( mnt , "iscsi.json" )
fp , err := os . Create ( file )
if err != nil {
return fmt . Errorf ( "iscsi: create %s err %s" , file , err )
}
defer fp . Close ( )
encoder := json . NewEncoder ( fp )
if err = encoder . Encode ( conf ) ; err != nil {
2018-11-17 04:50:47 +00:00
return fmt . Errorf ( "iscsi: encode err: %v" , err )
2017-05-31 19:52:00 +00:00
}
return nil
}
func ( util * ISCSIUtil ) loadISCSI ( conf * iscsiDisk , mnt string ) error {
file := path . Join ( mnt , "iscsi.json" )
fp , err := os . Open ( file )
if err != nil {
return fmt . Errorf ( "iscsi: open %s err %s" , file , err )
}
defer fp . Close ( )
decoder := json . NewDecoder ( fp )
if err = decoder . Decode ( conf ) ; err != nil {
2018-11-17 04:50:47 +00:00
return fmt . Errorf ( "iscsi: decode err: %v" , err )
2017-05-31 19:52:00 +00:00
}
return nil
2015-03-13 21:31:13 +00:00
}
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// scanOneLun scans a single LUN on one SCSI bus
// Use this to avoid scanning the whole SCSI bus for all of the LUNs, which
// would result in the kernel on this node discovering LUNs that it shouldn't
// know about. Extraneous LUNs cause problems because they may get deleted
// without us getting notified, since we were never supposed to know about
// them. When LUNs are deleted without proper cleanup in the kernel, I/O errors
// and timeouts result, which can noticeably degrade performance of future
// operations.
func scanOneLun ( hostNumber int , lunNumber int ) error {
filename := fmt . Sprintf ( "/sys/class/scsi_host/host%d/scan" , hostNumber )
fd , err := os . OpenFile ( filename , os . O_WRONLY , 0 )
if err != nil {
return err
}
defer fd . Close ( )
// Channel/Target are always 0 for iSCSI
scanCmd := fmt . Sprintf ( "0 0 %d" , lunNumber )
if written , err := fd . WriteString ( scanCmd ) ; err != nil {
return err
} else if 0 == written {
return fmt . Errorf ( "No data written to file: %s" , filename )
}
2018-11-09 18:49:10 +00:00
klog . V ( 3 ) . Infof ( "Scanned SCSI host %d LUN %d" , hostNumber , lunNumber )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
return nil
}
2018-07-06 20:04:27 +00:00
func waitForMultiPathToExist ( devicePaths [ ] string , maxRetries int , deviceUtil volumeutil . DeviceUtil ) string {
if 0 == len ( devicePaths ) {
return ""
}
for i := 0 ; i < maxRetries ; i ++ {
for _ , path := range devicePaths {
// There shouldnt be any empty device paths. However adding this check
// for safer side to avoid the possibility of an empty entry.
if path == "" {
continue
}
// check if the dev is using mpio and if so mount it via the dm-XX device
if mappedDevicePath := deviceUtil . FindMultipathDeviceForDevice ( path ) ; mappedDevicePath != "" {
return mappedDevicePath
}
}
if i == maxRetries - 1 {
break
}
time . Sleep ( time . Second )
}
return ""
}
2017-10-28 19:28:52 +00:00
// AttachDisk returns devicePath of volume if attach succeeded otherwise returns error
2017-08-09 15:13:58 +00:00
func ( util * ISCSIUtil ) AttachDisk ( b iscsiDiskMounter ) ( string , error ) {
2015-11-05 19:06:20 +00:00
var devicePath string
2018-10-08 11:00:24 +00:00
devicePaths := map [ string ] string { }
2016-07-25 04:18:38 +00:00
var iscsiTransport string
2017-03-17 20:42:15 +00:00
var lastErr error
2016-12-23 14:42:13 +00:00
2017-08-23 12:56:51 +00:00
out , err := b . exec . Run ( "iscsiadm" , "-m" , "iface" , "-I" , b . Iface , "-o" , "show" )
2016-07-25 04:18:38 +00:00
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: could not read iface %s error: %s" , b . Iface , string ( out ) )
2017-08-09 15:13:58 +00:00
return "" , err
2016-07-25 04:18:38 +00:00
}
iscsiTransport = extractTransportname ( string ( out ) )
2016-12-23 14:42:13 +00:00
2017-05-31 19:52:00 +00:00
bkpPortal := b . Portals
2017-07-12 03:37:48 +00:00
// create new iface and copy parameters from pre-configured iface to the created iface
if b . InitiatorName != "" {
// new iface name is <target portal>:<volume name>
2017-08-09 15:13:58 +00:00
newIface := bkpPortal [ 0 ] + ":" + b . VolName
2017-07-12 03:37:48 +00:00
err = cloneIface ( b , newIface )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to clone iface: %s error: %v" , b . Iface , err )
2017-08-09 15:13:58 +00:00
return "" , err
2017-07-12 03:37:48 +00:00
}
// update iface name
b . Iface = newIface
}
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// Lock the target while we login to avoid races between 2 volumes that share the same
// target both logging in or one logging out while another logs in.
b . plugin . targetLocks . LockKey ( b . Iqn )
defer b . plugin . targetLocks . UnlockKey ( b . Iqn )
// Build a map of SCSI hosts for each target portal. We will need this to
// issue the bus rescans.
portalHostMap , err := b . deviceUtil . GetISCSIPortalHostMapForTarget ( b . Iqn )
if err != nil {
return "" , err
}
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "AttachDisk portal->host map for %s is %v" , b . Iqn , portalHostMap )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
2018-10-08 11:00:24 +00:00
for i := 1 ; i <= maxAttachAttempts ; i ++ {
for _ , tp := range bkpPortal {
if _ , found := devicePaths [ tp ] ; found {
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "Device for portal %q already known" , tp )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
continue
}
2018-10-08 11:00:24 +00:00
hostNumber , loggedIn := portalHostMap [ tp ]
if ! loggedIn {
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "Could not get SCSI host number for portal %s, will attempt login" , tp )
2018-10-08 11:00:24 +00:00
// build discoverydb and discover iscsi target
b . exec . Run ( "iscsiadm" , "-m" , "discoverydb" , "-t" , "sendtargets" , "-p" , tp , "-I" , b . Iface , "-o" , "new" )
// update discoverydb with CHAP secret
err = updateISCSIDiscoverydb ( b , tp )
if err != nil {
lastErr = fmt . Errorf ( "iscsi: failed to update discoverydb to portal %s error: %v" , tp , err )
continue
}
out , err = b . exec . Run ( "iscsiadm" , "-m" , "discoverydb" , "-t" , "sendtargets" , "-p" , tp , "-I" , b . Iface , "--discover" )
if err != nil {
// delete discoverydb record
b . exec . Run ( "iscsiadm" , "-m" , "discoverydb" , "-t" , "sendtargets" , "-p" , tp , "-I" , b . Iface , "-o" , "delete" )
lastErr = fmt . Errorf ( "iscsi: failed to sendtargets to portal %s output: %s, err %v" , tp , string ( out ) , err )
continue
}
err = updateISCSINode ( b , tp )
if err != nil {
// failure to update node db is rare. But deleting record will likely impact those who already start using it.
lastErr = fmt . Errorf ( "iscsi: failed to update iscsi node to portal %s error: %v" , tp , err )
continue
}
// login to iscsi target
out , err = b . exec . Run ( "iscsiadm" , "-m" , "node" , "-p" , tp , "-T" , b . Iqn , "-I" , b . Iface , "--login" )
if err != nil {
// delete the node record from database
b . exec . Run ( "iscsiadm" , "-m" , "node" , "-p" , tp , "-I" , b . Iface , "-T" , b . Iqn , "-o" , "delete" )
lastErr = fmt . Errorf ( "iscsi: failed to attach disk: Error: %s (%v)" , string ( out ) , err )
continue
}
// in case of node failure/restart, explicitly set to manual login so it doesn't hang on boot
out , err = b . exec . Run ( "iscsiadm" , "-m" , "node" , "-p" , tp , "-T" , b . Iqn , "-o" , "update" , "-n" , "node.startup" , "-v" , "manual" )
if err != nil {
// don't fail if we can't set startup mode, but log warning so there is a clue
2018-11-09 18:49:10 +00:00
klog . Warningf ( "Warning: Failed to set iSCSI login mode to manual. Error: %v" , err )
2018-10-08 11:00:24 +00:00
}
// Rebuild the host map after logging in
portalHostMap , err := b . deviceUtil . GetISCSIPortalHostMapForTarget ( b . Iqn )
if err != nil {
return "" , err
}
2018-11-09 18:49:10 +00:00
klog . V ( 6 ) . Infof ( "AttachDisk portal->host map for %s is %v" , b . Iqn , portalHostMap )
2018-10-08 11:00:24 +00:00
hostNumber , loggedIn = portalHostMap [ tp ]
if ! loggedIn {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "Could not get SCSI host number for portal %s after logging in" , tp )
2018-10-08 11:00:24 +00:00
continue
}
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
}
2018-10-08 11:00:24 +00:00
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "AttachDisk: scanning SCSI host %d LUN %s" , hostNumber , b . Lun )
2018-10-08 11:00:24 +00:00
lunNumber , err := strconv . Atoi ( b . Lun )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
if err != nil {
2018-10-08 11:00:24 +00:00
return "" , fmt . Errorf ( "AttachDisk: lun is not a number: %s\nError: %v" , b . Lun , err )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
}
2018-10-08 11:00:24 +00:00
// Scan the iSCSI bus for the LUN
err = scanOneLun ( hostNumber , lunNumber )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
if err != nil {
return "" , err
}
2018-10-08 11:00:24 +00:00
if iscsiTransport == "" {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: could not find transport name in iface %s" , b . Iface )
2018-10-08 11:00:24 +00:00
return "" , fmt . Errorf ( "Could not parse iface file for %s" , b . Iface )
}
if iscsiTransport == "tcp" {
devicePath = strings . Join ( [ ] string { "/dev/disk/by-path/ip" , tp , "iscsi" , b . Iqn , "lun" , b . Lun } , "-" )
} else {
devicePath = strings . Join ( [ ] string { "/dev/disk/by-path/pci" , "*" , "ip" , tp , "iscsi" , b . Iqn , "lun" , b . Lun } , "-" )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
}
2018-10-08 11:00:24 +00:00
if exist := waitForPathToExist ( & devicePath , multipathDeviceTimeout , iscsiTransport ) ; ! exist {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "Could not attach disk: Timeout after 10s" )
2018-10-08 11:00:24 +00:00
// update last error
lastErr = fmt . Errorf ( "Could not attach disk: Timeout after 10s" )
continue
} else {
devicePaths [ tp ] = devicePath
}
2016-12-23 14:42:13 +00:00
}
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "iscsi: tried all devices for %q %d times, %d paths found" , b . Iqn , i , len ( devicePaths ) )
2018-10-08 11:00:24 +00:00
if len ( devicePaths ) == 0 {
// No path attached, report error and stop trying. kubelet will try again in a short while
// delete cloned iface
b . exec . Run ( "iscsiadm" , "-m" , "iface" , "-I" , b . Iface , "-o" , "delete" )
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to get any path for iscsi disk, last err seen:\n%v" , lastErr )
2018-10-08 11:00:24 +00:00
return "" , fmt . Errorf ( "failed to get any path for iscsi disk, last err seen:\n%v" , lastErr )
2017-08-15 13:53:46 +00:00
}
2018-10-08 11:00:24 +00:00
if len ( devicePaths ) == len ( bkpPortal ) {
// We have all paths
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "iscsi: all devices for %q found" , b . Iqn )
2018-10-08 11:00:24 +00:00
break
2015-03-13 21:31:13 +00:00
}
2018-10-08 11:00:24 +00:00
if len ( devicePaths ) >= minMultipathCount && i >= minAttachAttempts {
// We have at least two paths for multipath and we tried the other paths long enough
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "%d devices found for %q" , len ( devicePaths ) , b . Iqn )
2018-10-08 11:00:24 +00:00
break
2015-03-13 21:31:13 +00:00
}
}
2017-02-06 11:36:33 +00:00
2017-09-18 23:29:32 +00:00
if lastErr != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: last error occurred during iscsi init:\n%v" , lastErr )
2017-09-18 23:29:32 +00:00
}
2017-02-06 11:36:33 +00:00
2018-10-08 11:00:24 +00:00
devicePathList := [ ] string { }
for _ , path := range devicePaths {
devicePathList = append ( devicePathList , path )
}
2018-07-06 20:04:27 +00:00
// Try to find a multipath device for the volume
2018-10-08 11:00:24 +00:00
if len ( bkpPortal ) > 1 {
// Multipath volume was requested. Wait up to 10 seconds for the multipath device to appear.
devicePath = waitForMultiPathToExist ( devicePathList , 10 , b . deviceUtil )
2018-07-06 20:04:27 +00:00
} else {
// For PVs with 1 portal, just try one time to find the multipath device. This
// avoids a long pause when the multipath device will never get created, and
// matches legacy behavior.
2018-10-08 11:00:24 +00:00
devicePath = waitForMultiPathToExist ( devicePathList , 1 , b . deviceUtil )
2018-07-06 20:04:27 +00:00
}
// When no multipath device is found, just use the first (and presumably only) device
if devicePath == "" {
2018-10-08 11:00:24 +00:00
devicePath = devicePathList [ 0 ]
2016-04-19 05:10:00 +00:00
}
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "iscsi: AttachDisk devicePath: %s" , devicePath )
2017-10-28 19:28:52 +00:00
// run global mount path related operations based on volumeMode
return globalPDPathOperation ( b ) ( b , devicePath , util )
}
// globalPDPathOperation returns global mount path related operations based on volumeMode.
// If the volumeMode is 'Filesystem' or not defined, plugin needs to create a dir, persist
2018-02-09 06:53:53 +00:00
// iscsi configurations, and then format/mount the volume.
// If the volumeMode is 'Block', plugin creates a dir and persists iscsi configurations.
2017-10-28 19:28:52 +00:00
// Since volume type is block, plugin doesn't need to format/mount the volume.
func globalPDPathOperation ( b iscsiDiskMounter ) func ( iscsiDiskMounter , string , * ISCSIUtil ) ( string , error ) {
// TODO: remove feature gate check after no longer needed
if utilfeature . DefaultFeatureGate . Enabled ( features . BlockVolume ) {
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "iscsi: AttachDisk volumeMode: %s" , b . volumeMode )
2017-10-28 19:28:52 +00:00
if b . volumeMode == v1 . PersistentVolumeBlock {
// If the volumeMode is 'Block', plugin don't need to format the volume.
return func ( b iscsiDiskMounter , devicePath string , util * ISCSIUtil ) ( string , error ) {
globalPDPath := b . manager . MakeGlobalVDPDName ( * b . iscsiDisk )
// Create dir like /var/lib/kubelet/plugins/kubernetes.io/iscsi/volumeDevices/{ifaceName}/{portal-some_iqn-lun-lun_id}
if err := os . MkdirAll ( globalPDPath , 0750 ) ; err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to mkdir %s, error" , globalPDPath )
2017-10-28 19:28:52 +00:00
return "" , err
}
// Persist iscsi disk config to json file for DetachDisk path
util . persistISCSI ( * ( b . iscsiDisk ) , globalPDPath )
return devicePath , nil
}
}
2015-03-13 21:31:13 +00:00
}
2017-10-28 19:28:52 +00:00
// If the volumeMode is 'Filesystem', plugin needs to format the volume
// and mount it to globalPDPath.
return func ( b iscsiDiskMounter , devicePath string , util * ISCSIUtil ) ( string , error ) {
globalPDPath := b . manager . MakeGlobalPDName ( * b . iscsiDisk )
notMnt , err := b . mounter . IsLikelyNotMountPoint ( globalPDPath )
if err != nil && ! os . IsNotExist ( err ) {
return "" , fmt . Errorf ( "Heuristic determination of mount point failed:%v" , err )
}
// Return confirmed devicePath to caller
if ! notMnt {
2018-11-09 18:49:10 +00:00
klog . Infof ( "iscsi: %s already mounted" , globalPDPath )
2017-10-28 19:28:52 +00:00
return devicePath , nil
}
// Create dir like /var/lib/kubelet/plugins/kubernetes.io/iscsi/{ifaceName}/{portal-some_iqn-lun-lun_id}
if err := os . MkdirAll ( globalPDPath , 0750 ) ; err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to mkdir %s, error" , globalPDPath )
2017-10-28 19:28:52 +00:00
return "" , err
}
// Persist iscsi disk config to json file for DetachDisk path
util . persistISCSI ( * ( b . iscsiDisk ) , globalPDPath )
2015-03-13 21:31:13 +00:00
2017-10-28 19:28:52 +00:00
err = b . mounter . FormatAndMount ( devicePath , globalPDPath , b . fsType , nil )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to mount iscsi volume %s [%s] to %s, error %v" , devicePath , b . fsType , globalPDPath , err )
2017-10-28 19:28:52 +00:00
}
return devicePath , nil
}
2015-03-13 21:31:13 +00:00
}
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// Delete 1 block device of the form "sd*"
func deleteDevice ( deviceName string ) error {
filename := fmt . Sprintf ( "/sys/block/%s/device/delete" , deviceName )
fd , err := os . OpenFile ( filename , os . O_WRONLY , 0 )
if err != nil {
// The file was not present, so just return without error
return nil
}
defer fd . Close ( )
if written , err := fd . WriteString ( "1" ) ; err != nil {
return err
} else if 0 == written {
return fmt . Errorf ( "No data written to file: %s" , filename )
}
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "Deleted block device: %s" , deviceName )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
return nil
}
// deleteDevices tries to remove all the block devices and multipath map devices
// associated with a given iscsi device
func deleteDevices ( c iscsiDiskUnmounter ) error {
lunNumber , err := strconv . Atoi ( c . iscsiDisk . Lun )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi delete devices: lun is not a number: %s\nError: %v" , c . iscsiDisk . Lun , err )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
return err
}
// Enumerate the devices so we can delete them
deviceNames , err := c . deviceUtil . FindDevicesForISCSILun ( c . iscsiDisk . Iqn , lunNumber )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi delete devices: could not get devices associated with LUN %d on target %s\nError: %v" ,
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
lunNumber , c . iscsiDisk . Iqn , err )
return err
}
// Find the multipath device path(s)
mpathDevices := make ( map [ string ] bool )
for _ , deviceName := range deviceNames {
path := "/dev/" + deviceName
// check if the dev is using mpio and if so mount it via the dm-XX device
if mappedDevicePath := c . deviceUtil . FindMultipathDeviceForDevice ( path ) ; mappedDevicePath != "" {
mpathDevices [ mappedDevicePath ] = true
}
}
// Flush any multipath device maps
for mpathDevice := range mpathDevices {
_ , err = c . exec . Run ( "multipath" , "-f" , mpathDevice )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "Warning: Failed to flush multipath device map: %s\nError: %v" , mpathDevice , err )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// Fall through -- keep deleting the block devices
}
2018-11-09 18:49:10 +00:00
klog . V ( 4 ) . Infof ( "Flushed multipath device: %s" , mpathDevice )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
}
for _ , deviceName := range deviceNames {
err = deleteDevice ( deviceName )
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "Warning: Failed to delete block device: %s\nError: %v" , deviceName , err )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// Fall through -- keep deleting other block devices
}
}
return nil
}
2017-10-28 19:28:52 +00:00
// DetachDisk unmounts and detaches a volume from node
2016-03-23 05:12:21 +00:00
func ( util * ISCSIUtil ) DetachDisk ( c iscsiDiskUnmounter , mntPath string ) error {
2017-08-09 15:13:58 +00:00
if pathExists , pathErr := volumeutil . PathExists ( mntPath ) ; pathErr != nil {
return fmt . Errorf ( "Error checking if path exists: %v" , pathErr )
} else if ! pathExists {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "Warning: Unmount skipped because path does not exist: %v" , mntPath )
2017-08-09 15:13:58 +00:00
return nil
}
2018-09-27 08:24:59 +00:00
notMnt , err := c . mounter . IsLikelyNotMountPoint ( mntPath )
if err != nil {
2015-03-13 21:31:13 +00:00
return err
}
2018-09-27 08:24:59 +00:00
if ! notMnt {
if err := c . mounter . Unmount ( mntPath ) ; err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi detach disk: failed to unmount: %s\nError: %v" , mntPath , err )
2018-09-27 08:24:59 +00:00
return err
}
}
2015-03-13 21:31:13 +00:00
// if device is no longer used, see if need to logout the target
2017-08-15 13:53:46 +00:00
device , prefix , err := extractDeviceAndPrefix ( mntPath )
if err != nil {
return err
}
var bkpPortal [ ] string
var volName , iqn , iface , initiatorName string
found := true
// load iscsi disk config from json file
if err := util . loadISCSI ( c . iscsiDisk , mntPath ) ; err == nil {
bkpPortal , iqn , iface , volName = c . iscsiDisk . Portals , c . iscsiDisk . Iqn , c . iscsiDisk . Iface , c . iscsiDisk . VolName
initiatorName = c . iscsiDisk . InitiatorName
} else {
// If the iscsi disk config is not found, fall back to the original behavior.
// This portal/iqn/iface is no longer referenced, log out.
// Extract the portal and iqn from device path.
bkpPortal = make ( [ ] string , 1 )
bkpPortal [ 0 ] , iqn , err = extractPortalAndIqn ( device )
2015-10-13 18:50:49 +00:00
if err != nil {
return err
}
2017-08-15 13:53:46 +00:00
// Extract the iface from the mountPath and use it to log out. If the iface
// is not found, maintain the previous behavior to facilitate kubelet upgrade.
// Logout may fail as no session may exist for the portal/IQN on the specified interface.
iface , found = extractIface ( mntPath )
}
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// Delete all the scsi devices and any multipath devices after unmounting
if err = deleteDevices ( c ) ; err != nil {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "iscsi detach disk: failed to delete devices\nError: %v" , err )
Avoid deleted iSCSI LUNs in the kernel
This change ensures that iSCSI block devices are deleted after
unmounting, and implements scanning of individual LUNs rather
than scanning the whole iSCSI bus.
In cases where an iSCSI bus is in use by more than one attachment,
detaching used to leave behind phantom block devices, which could
cause I/O errors, long timeouts, or even corruption in the case
when the underlying LUN number was recycled. This change makes
sure to flush references to the block devices after unmounting.
The original iSCSI code scanned the whole target every time a LUN
was attached. On storage controllers that export multiple LUNs on
the same target IQN, this led to a situation where nodes would
see SCSI disks that they weren't supposed to -- possibly dozens or
hundreds of extra SCSI disks. This caused 3 significant problems:
1) The large number of disks wasted resources on the node and
caused a minor drag on performance.
2) The scanning of all the devices caused a huge number of uevents
from the kernel, causing udev to bog down for multiple minutes in
some cases, triggering timeouts and other transient failures.
3) Because Kubernetes was not tracking all the "extra" LUNs that
got discovered, they would not get cleaned up until the last LUN
on a particular target was detached, causing a logout. This led
to significant complications:
In the time window between when a LUN was unintentially scanned,
and when it was removed due to a logout, if it was deleted on the
backend, a phantom reference remained on the node. In the best
case, the phantom LUN would cause I/O errors and timeouts in the
udev system. In the worst case, the backend could reuse the LUN
number for a new volume, and if that new volume were to be
scheduled to a pod with a phantom reference to the old LUN by the
same number, the initiator could get confused and possibly corrupt
data on that volume.
To avoid these problems, the new implementation only scans for
the specific LUN number it expects to see. It's worth noting that
the default behavior of iscsiadm is to automatically scan the
whole bus on login. That behavior can be disabled by setting
node.session.scan = manual
in iscsid.conf, and for the reasons mentioned above, it is
strongly recommended to set that option. This change still works
regardless of the setting in iscsid.conf, and while automatic
scanning will cause some problems, this change doesn't make the
problems any worse, and can make things better in some cases.
2018-07-25 03:58:19 +00:00
// Fall through -- even if deleting fails, a logout may fix problems
}
// Lock the target while we determine if we can safely log out or not
c . plugin . targetLocks . LockKey ( iqn )
defer c . plugin . targetLocks . UnlockKey ( iqn )
// if device is no longer used, see if need to logout the target
refCount , err := getDevicePrefixRefCount ( c . mounter , prefix )
if err != nil || refCount != 0 {
return nil
}
2017-08-15 13:53:46 +00:00
portals := removeDuplicate ( bkpPortal )
if len ( portals ) == 0 {
2017-10-28 19:28:52 +00:00
return fmt . Errorf ( "iscsi detach disk: failed to detach iscsi disk. Couldn't get connected portals from configurations" )
}
err = util . detachISCSIDisk ( c . exec , portals , iqn , iface , volName , initiatorName , found )
if err != nil {
return fmt . Errorf ( "failed to finish detachISCSIDisk, err: %v" , err )
}
return nil
}
// DetachBlockISCSIDisk removes loopback device for a volume and detaches a volume from node
func ( util * ISCSIUtil ) DetachBlockISCSIDisk ( c iscsiDiskUnmapper , mapPath string ) error {
if pathExists , pathErr := volumeutil . PathExists ( mapPath ) ; pathErr != nil {
return fmt . Errorf ( "Error checking if path exists: %v" , pathErr )
} else if ! pathExists {
2018-11-09 18:49:10 +00:00
klog . Warningf ( "Warning: Unmap skipped because path does not exist: %v" , mapPath )
2017-10-28 19:28:52 +00:00
return nil
}
// If we arrive here, device is no longer used, see if need to logout the target
// device: 192.168.0.10:3260-iqn.2017-05.com.example:test-lun-0
device , _ , err := extractDeviceAndPrefix ( mapPath )
if err != nil {
return err
}
var bkpPortal [ ] string
var volName , iqn , lun , iface , initiatorName string
found := true
// load iscsi disk config from json file
if err := util . loadISCSI ( c . iscsiDisk , mapPath ) ; err == nil {
bkpPortal , iqn , lun , iface , volName = c . iscsiDisk . Portals , c . iscsiDisk . Iqn , c . iscsiDisk . Lun , c . iscsiDisk . Iface , c . iscsiDisk . VolName
initiatorName = c . iscsiDisk . InitiatorName
} else {
// If the iscsi disk config is not found, fall back to the original behavior.
// This portal/iqn/iface is no longer referenced, log out.
// Extract the portal and iqn from device path.
bkpPortal = make ( [ ] string , 1 )
bkpPortal [ 0 ] , iqn , err = extractPortalAndIqn ( device )
if err != nil {
return err
}
arr := strings . Split ( device , "-lun-" )
if len ( arr ) < 2 {
2018-02-09 06:53:53 +00:00
return fmt . Errorf ( "failed to retrieve lun from mapPath: %v" , mapPath )
2017-10-28 19:28:52 +00:00
}
lun = arr [ 1 ]
// Extract the iface from the mountPath and use it to log out. If the iface
// is not found, maintain the previous behavior to facilitate kubelet upgrade.
// Logout may fail as no session may exist for the portal/IQN on the specified interface.
iface , found = extractIface ( mapPath )
}
portals := removeDuplicate ( bkpPortal )
if len ( portals ) == 0 {
return fmt . Errorf ( "iscsi detach disk: failed to detach iscsi disk. Couldn't get connected portals from configurations" )
}
devicePath := getDevByPath ( portals [ 0 ] , iqn , lun )
2018-11-09 18:49:10 +00:00
klog . V ( 5 ) . Infof ( "iscsi: devicePath: %s" , devicePath )
2017-10-28 19:28:52 +00:00
if _ , err = os . Stat ( devicePath ) ; err != nil {
return fmt . Errorf ( "failed to validate devicePath: %s" , devicePath )
}
// check if the dev is using mpio and if so mount it via the dm-XX device
if mappedDevicePath := c . deviceUtil . FindMultipathDeviceForDevice ( devicePath ) ; mappedDevicePath != "" {
devicePath = mappedDevicePath
}
// Detach a volume from kubelet node
err = util . detachISCSIDisk ( c . exec , portals , iqn , iface , volName , initiatorName , found )
if err != nil {
return fmt . Errorf ( "failed to finish detachISCSIDisk, err: %v" , err )
}
return nil
}
2017-07-12 03:37:48 +00:00
2017-10-28 19:28:52 +00:00
func ( util * ISCSIUtil ) detachISCSIDisk ( exec mount . Exec , portals [ ] string , iqn , iface , volName , initiatorName string , found bool ) error {
2017-08-15 13:53:46 +00:00
for _ , portal := range portals {
logoutArgs := [ ] string { "-m" , "node" , "-p" , portal , "-T" , iqn , "--logout" }
deleteArgs := [ ] string { "-m" , "node" , "-p" , portal , "-T" , iqn , "-o" , "delete" }
if found {
logoutArgs = append ( logoutArgs , [ ] string { "-I" , iface } ... )
deleteArgs = append ( deleteArgs , [ ] string { "-I" , iface } ... )
}
2018-11-09 18:49:10 +00:00
klog . Infof ( "iscsi: log out target %s iqn %s iface %s" , portal , iqn , iface )
2017-10-28 19:28:52 +00:00
out , err := exec . Run ( "iscsiadm" , logoutArgs ... )
2017-08-15 13:53:46 +00:00
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to detach disk Error: %s" , string ( out ) )
2017-08-15 13:53:46 +00:00
}
// Delete the node record
2018-11-09 18:49:10 +00:00
klog . Infof ( "iscsi: delete node record target %s iqn %s" , portal , iqn )
2017-10-28 19:28:52 +00:00
out , err = exec . Run ( "iscsiadm" , deleteArgs ... )
2017-08-15 13:53:46 +00:00
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to delete node record Error: %s" , string ( out ) )
2015-03-13 21:31:13 +00:00
}
}
2017-08-15 13:53:46 +00:00
// Delete the iface after all sessions have logged out
// If the iface is not created via iscsi plugin, skip to delete
if initiatorName != "" && found && iface == ( portals [ 0 ] + ":" + volName ) {
deleteArgs := [ ] string { "-m" , "iface" , "-I" , iface , "-o" , "delete" }
2017-10-28 19:28:52 +00:00
out , err := exec . Run ( "iscsiadm" , deleteArgs ... )
2017-08-15 13:53:46 +00:00
if err != nil {
2018-11-09 18:49:10 +00:00
klog . Errorf ( "iscsi: failed to delete iface Error: %s" , string ( out ) )
2017-08-15 13:53:46 +00:00
}
}
2015-03-13 21:31:13 +00:00
return nil
}
2015-10-13 18:50:49 +00:00
2017-10-28 19:28:52 +00:00
func getDevByPath ( portal , iqn , lun string ) string {
return "/dev/disk/by-path/ip-" + portal + "-iscsi-" + iqn + "-lun-" + lun
}
2016-07-25 04:18:38 +00:00
func extractTransportname ( ifaceOutput string ) ( iscsiTransport string ) {
2017-09-04 19:56:06 +00:00
rexOutput := ifaceTransportNameRe . FindStringSubmatch ( ifaceOutput )
2017-08-15 13:53:46 +00:00
if rexOutput == nil {
2016-07-25 04:18:38 +00:00
return ""
}
2017-08-15 13:53:46 +00:00
iscsiTransport = rexOutput [ 1 ]
2016-07-25 04:18:38 +00:00
// While iface.transport_name is a required parameter, handle it being unspecified anyways
if iscsiTransport == "<empty>" {
iscsiTransport = "tcp"
}
return iscsiTransport
}
2015-10-13 18:50:49 +00:00
func extractDeviceAndPrefix ( mntPath string ) ( string , string , error ) {
ind := strings . LastIndex ( mntPath , "/" )
if ind < 0 {
return "" , "" , fmt . Errorf ( "iscsi detach disk: malformatted mnt path: %s" , mntPath )
}
device := mntPath [ ( ind + 1 ) : ]
2016-12-23 14:42:13 +00:00
// strip -lun- from mount path
ind = strings . LastIndex ( mntPath , "-lun-" )
2015-10-13 18:50:49 +00:00
if ind < 0 {
return "" , "" , fmt . Errorf ( "iscsi detach disk: malformatted mnt path: %s" , mntPath )
}
2016-12-23 14:42:13 +00:00
prefix := mntPath [ : ind ]
2015-10-13 18:50:49 +00:00
return device , prefix , nil
}
2017-02-14 20:50:26 +00:00
func extractIface ( mntPath string ) ( string , bool ) {
2017-09-04 19:56:06 +00:00
reOutput := ifaceRe . FindStringSubmatch ( mntPath )
2017-08-15 13:53:46 +00:00
if reOutput != nil {
return reOutput [ 1 ] , true
2016-12-23 14:42:13 +00:00
}
2017-02-14 20:50:26 +00:00
return "" , false
2016-12-23 14:42:13 +00:00
}
2015-10-13 18:50:49 +00:00
func extractPortalAndIqn ( device string ) ( string , string , error ) {
ind1 := strings . Index ( device , "-" )
if ind1 < 0 {
return "" , "" , fmt . Errorf ( "iscsi detach disk: no portal in %s" , device )
}
portal := device [ 0 : ind1 ]
ind2 := strings . Index ( device , "iqn." )
2015-11-24 08:13:59 +00:00
if ind2 < 0 {
ind2 = strings . Index ( device , "eui." )
}
2015-10-13 18:50:49 +00:00
if ind2 < 0 {
return "" , "" , fmt . Errorf ( "iscsi detach disk: no iqn in %s" , device )
}
ind := strings . LastIndex ( device , "-lun-" )
iqn := device [ ind2 : ind ]
return portal , iqn , nil
}
2017-05-31 19:52:00 +00:00
// Remove duplicates or string
func removeDuplicate ( s [ ] string ) [ ] string {
m := map [ string ] bool { }
for _ , v := range s {
if v != "" && ! m [ v ] {
s [ len ( m ) ] = v
m [ v ] = true
}
}
s = s [ : len ( m ) ]
return s
}
2017-07-12 03:37:48 +00:00
func parseIscsiadmShow ( output string ) ( map [ string ] string , error ) {
params := make ( map [ string ] string )
slice := strings . Split ( output , "\n" )
for _ , line := range slice {
if ! strings . HasPrefix ( line , "iface." ) || strings . Contains ( line , "<empty>" ) {
continue
}
iface := strings . Fields ( line )
if len ( iface ) != 3 || iface [ 1 ] != "=" {
return nil , fmt . Errorf ( "Error: invalid iface setting: %v" , iface )
}
// iscsi_ifacename is immutable once the iface is created
if iface [ 0 ] == "iface.iscsi_ifacename" {
continue
}
params [ iface [ 0 ] ] = iface [ 2 ]
}
return params , nil
}
func cloneIface ( b iscsiDiskMounter , newIface string ) error {
var lastErr error
// get pre-configured iface records
2017-08-23 12:56:51 +00:00
out , err := b . exec . Run ( "iscsiadm" , "-m" , "iface" , "-I" , b . Iface , "-o" , "show" )
2017-07-12 03:37:48 +00:00
if err != nil {
lastErr = fmt . Errorf ( "iscsi: failed to show iface records: %s (%v)" , string ( out ) , err )
return lastErr
}
// parse obtained records
params , err := parseIscsiadmShow ( string ( out ) )
if err != nil {
lastErr = fmt . Errorf ( "iscsi: failed to parse iface records: %s (%v)" , string ( out ) , err )
return lastErr
}
// update initiatorname
params [ "iface.initiatorname" ] = b . InitiatorName
// create new iface
2017-08-23 12:56:51 +00:00
out , err = b . exec . Run ( "iscsiadm" , "-m" , "iface" , "-I" , newIface , "-o" , "new" )
2017-07-12 03:37:48 +00:00
if err != nil {
lastErr = fmt . Errorf ( "iscsi: failed to create new iface: %s (%v)" , string ( out ) , err )
return lastErr
}
// update new iface records
for key , val := range params {
2017-08-23 12:56:51 +00:00
_ , err = b . exec . Run ( "iscsiadm" , "-m" , "iface" , "-I" , newIface , "-o" , "update" , "-n" , key , "-v" , val )
2017-07-12 03:37:48 +00:00
if err != nil {
2017-08-23 12:56:51 +00:00
b . exec . Run ( "iscsiadm" , "-m" , "iface" , "-I" , newIface , "-o" , "delete" )
2017-07-12 03:37:48 +00:00
lastErr = fmt . Errorf ( "iscsi: failed to update iface records: %s (%v). iface(%s) will be used" , string ( out ) , err , b . Iface )
break
}
}
return lastErr
}