mirror of https://github.com/k3s-io/k3s
AWS: Add sequential allocator for device names.
On AWS, we should not reuse device names as long as possible, see https://aws.amazon.com/premiumsupport/knowledge-center/ebs-stuck-attaching/ "If you specify a device name that is not in use by EC2, but is being used by the block device driver within the EC2 instance, the attachment of the EBS volume does not succeed and the EBS volume is stuck in the attaching state." This patch adds a device name allocator that tries to find a name that's next to the last used device name instead of using the first available one. This way we will loop through all device names ("xvdba" .. "xvdzz") before a device name is reused.pull/6/head
parent
7d235e147c
commit
65f6bcb927
|
@ -16,6 +16,7 @@ go_library(
|
|||
"aws_loadbalancer.go",
|
||||
"aws_routes.go",
|
||||
"aws_utils.go",
|
||||
"device_allocator.go",
|
||||
"log_handler.go",
|
||||
"retry_handler.go",
|
||||
"sets_ippermissions.go",
|
||||
|
@ -50,6 +51,7 @@ go_test(
|
|||
name = "go_default_test",
|
||||
srcs = [
|
||||
"aws_test.go",
|
||||
"device_allocator_test.go",
|
||||
"retry_handler_test.go",
|
||||
],
|
||||
library = "go_default_library",
|
||||
|
|
|
@ -365,6 +365,9 @@ type Cloud struct {
|
|||
// and then get a second request before we attach the volume
|
||||
attachingMutex sync.Mutex
|
||||
attaching map[types.NodeName]map[mountDevice]awsVolumeID
|
||||
|
||||
// state of our device allocator for each node
|
||||
deviceAllocators map[types.NodeName]DeviceAllocator
|
||||
}
|
||||
|
||||
var _ Volumes = &Cloud{}
|
||||
|
@ -797,6 +800,7 @@ func newAWSCloud(config io.Reader, awsServices Services) (*Cloud, error) {
|
|||
region: regionName,
|
||||
|
||||
attaching: make(map[types.NodeName]map[mountDevice]awsVolumeID),
|
||||
deviceAllocators: make(map[types.NodeName]DeviceAllocator),
|
||||
}
|
||||
|
||||
selfAWSInstance, err := awsCloud.buildSelfAWSInstance()
|
||||
|
@ -1210,20 +1214,17 @@ func (c *Cloud) getMountDevice(i *awsInstance, volumeID awsVolumeID, assign bool
|
|||
return mountDevice(""), false, nil
|
||||
}
|
||||
|
||||
// Find the first unused device in sequence 'ba', 'bb', 'bc', ... 'bz', 'ca', ... 'zz'
|
||||
var chosen mountDevice
|
||||
for first := 'b'; first <= 'z' && chosen == ""; first++ {
|
||||
for second := 'a'; second <= 'z' && chosen == ""; second++ {
|
||||
candidate := mountDevice(fmt.Sprintf("%c%c", first, second))
|
||||
if _, found := deviceMappings[candidate]; !found {
|
||||
chosen = candidate
|
||||
break
|
||||
// Find the next unused device name
|
||||
deviceAllocator := c.deviceAllocators[i.nodeName]
|
||||
if deviceAllocator == nil {
|
||||
// we want device names with two significant characters, starting with
|
||||
// /dev/xvdba (leaving xvda - xvdz and xvdaa-xvdaz to the system)
|
||||
deviceAllocator = NewDeviceAllocator(2, "ba")
|
||||
c.deviceAllocators[i.nodeName] = deviceAllocator
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if chosen == "" {
|
||||
glog.Warningf("Could not assign a mount device (all in use?). mappings=%v", deviceMappings)
|
||||
chosen, err := deviceAllocator.GetNext(deviceMappings)
|
||||
if err != nil {
|
||||
glog.Warningf("Could not assign a mount device. mappings=%v, error: %v", deviceMappings, err)
|
||||
return "", false, fmt.Errorf("Too many EBS volumes attached to node %s.", i.nodeName)
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package aws
|
||||
|
||||
import "fmt"
|
||||
|
||||
// ExistingDevices is a map of assigned devices. Presence of a key with a device
|
||||
// name in the map means that the device is allocated. Value is irrelevant and
|
||||
// can be used for anything that DeviceAllocator user wants.
|
||||
// Only the relevant part of device name should be in the map, e.g. "ba" for
|
||||
// "/dev/xvdba".
|
||||
type ExistingDevices map[mountDevice]awsVolumeID
|
||||
|
||||
// On AWS, we should assign new (not yet used) device names to attached volumes.
|
||||
// If we reuse a previously used name, we may get the volume "attaching" forever,
|
||||
// see https://aws.amazon.com/premiumsupport/knowledge-center/ebs-stuck-attaching/.
|
||||
// DeviceAllocator finds available device name, taking into account already
|
||||
// assigned device names from ExistingDevices map. It tries to find the next
|
||||
// device name to the previously assigned one (from previous DeviceAllocator
|
||||
// call), so all available device names are used eventually and it minimizes
|
||||
// device name reuse.
|
||||
// All these allocations are in-memory, nothing is written to / read from
|
||||
// /dev directory.
|
||||
type DeviceAllocator interface {
|
||||
// GetNext returns a free device name or error when there is no free device
|
||||
// name. Only the device suffix is returned, e.g. "ba" for "/dev/xvdba".
|
||||
// It's up to the called to add appropriate "/dev/sd" or "/dev/xvd" prefix.
|
||||
GetNext(existingDevices ExistingDevices) (mountDevice, error)
|
||||
}
|
||||
|
||||
type deviceAllocator struct {
|
||||
firstDevice mountDevice
|
||||
lastAssignedDevice mountDevice
|
||||
length int
|
||||
}
|
||||
|
||||
// NewDeviceAllocator creates new DeviceAlllocator that allocates device names
|
||||
// of given length ("aaa" for length 3) and with given first device, so all
|
||||
// devices before the first device are left to the operating system.
|
||||
// With length 2 and firstDevice "ba", it will allocate device names
|
||||
// ba, bb, ..., bz, ca, ... cz, ..., da, ... zz, so a..z and aa..az can be used
|
||||
// by the operating system.
|
||||
func NewDeviceAllocator(length int, firstDevice mountDevice) DeviceAllocator {
|
||||
lastDevice := make([]byte, length)
|
||||
for i := 0; i < length; i++ {
|
||||
lastDevice[i] = 'z'
|
||||
}
|
||||
return &deviceAllocator{
|
||||
firstDevice: firstDevice,
|
||||
lastAssignedDevice: mountDevice(lastDevice),
|
||||
length: length,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *deviceAllocator) GetNext(existingDevices ExistingDevices) (mountDevice, error) {
|
||||
candidate := d.lastAssignedDevice
|
||||
|
||||
for {
|
||||
candidate = d.nextDevice(candidate)
|
||||
if _, found := existingDevices[candidate]; !found {
|
||||
d.lastAssignedDevice = candidate
|
||||
return candidate, nil
|
||||
}
|
||||
if candidate == d.lastAssignedDevice {
|
||||
return "", fmt.Errorf("no devices are available")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *deviceAllocator) nextDevice(device mountDevice) mountDevice {
|
||||
dev := []byte(device)
|
||||
for i := d.length - 1; i >= 0; i-- {
|
||||
if dev[i] != 'z' {
|
||||
dev[i]++
|
||||
return mountDevice(dev)
|
||||
}
|
||||
dev[i] = 'a'
|
||||
}
|
||||
// all parts of device were 'z', jump to the first device
|
||||
return d.firstDevice
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package aws
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDeviceAllocator(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
existingDevices ExistingDevices
|
||||
length int
|
||||
firstDevice mountDevice
|
||||
lastAllocated mountDevice
|
||||
expectedOutput mountDevice
|
||||
}{
|
||||
{
|
||||
"empty device list",
|
||||
ExistingDevices{},
|
||||
2,
|
||||
"aa",
|
||||
"aa",
|
||||
"ab",
|
||||
},
|
||||
{
|
||||
"empty device list with wrap",
|
||||
ExistingDevices{},
|
||||
2,
|
||||
"ba",
|
||||
"zz",
|
||||
"ba", // next to 'zz' is the first one, 'ba'
|
||||
},
|
||||
{
|
||||
"device list",
|
||||
ExistingDevices{"aa": "used", "ab": "used", "ac": "used"},
|
||||
2,
|
||||
"aa",
|
||||
"aa",
|
||||
"ad", // all up to "ac" are used
|
||||
},
|
||||
{
|
||||
"device list with wrap",
|
||||
ExistingDevices{"zy": "used", "zz": "used", "ba": "used"},
|
||||
2,
|
||||
"ba",
|
||||
"zx",
|
||||
"bb", // "zy", "zz" and "ba" are used
|
||||
},
|
||||
{
|
||||
"three characters with wrap",
|
||||
ExistingDevices{"zzy": "used", "zzz": "used", "baa": "used"},
|
||||
3,
|
||||
"baa",
|
||||
"zzx",
|
||||
"bab",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
allocator := NewDeviceAllocator(test.length, test.firstDevice).(*deviceAllocator)
|
||||
allocator.lastAssignedDevice = test.lastAllocated
|
||||
|
||||
got, err := allocator.GetNext(test.existingDevices)
|
||||
if err != nil {
|
||||
t.Errorf("text %q: unexpected error: %v", test.name, err)
|
||||
}
|
||||
if got != test.expectedOutput {
|
||||
t.Errorf("text %q: expected %q, got %q", test.name, test.expectedOutput, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceAllocatorError(t *testing.T) {
|
||||
allocator := NewDeviceAllocator(2, "ba").(*deviceAllocator)
|
||||
existingDevices := ExistingDevices{}
|
||||
|
||||
// make all devices used
|
||||
var first, second byte
|
||||
for first = 'b'; first <= 'z'; first++ {
|
||||
for second = 'a'; second <= 'z'; second++ {
|
||||
device := [2]byte{first, second}
|
||||
existingDevices[mountDevice(device[:])] = "used"
|
||||
}
|
||||
}
|
||||
|
||||
device, err := allocator.GetNext(existingDevices)
|
||||
if err == nil {
|
||||
t.Errorf("expected error, got device %q", device)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue