Merge pull request #66397 from gnufied/fix-default-max-volume-ebs

Automatic merge from submit-queue (batch tested with PRs 66410, 66398, 66061, 66397, 65558). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix volume limit for EBS on m5 and c5 instances

This is a fix for lower volume limits on m5 and c5 instance types while we wait for https://github.com/kubernetes/features/issues/554 to land GA.

This problem became urgent because many of our users are trying to migrate to those instance types in light of spectre/meltdown vulnerability but  lower volume limit on those instance types often causes cluster instability. Yes they can workaround by configuring the scheduler with lower limit but often this becomes somewhat difficult to do when cluster is mixed. 

The newer default limits were picked from https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html

Text about spectre/meltdown is available on - https://community.bitnami.com/t/spectre-variant-2/54961/5

/sig storage
/sig scheduling

```release-note
Fix volume limit for EBS on m5 and c5 instance types
```
pull/8/head
Kubernetes Submit Queue 2018-07-20 18:51:11 -07:00 committed by GitHub
commit 827aa934ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 16 deletions

View File

@ -29,6 +29,7 @@ import (
utilfeature "k8s.io/apiserver/pkg/util/feature" utilfeature "k8s.io/apiserver/pkg/util/feature"
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing" utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
"k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/features"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"k8s.io/kubernetes/pkg/scheduler/algorithm" "k8s.io/kubernetes/pkg/scheduler/algorithm"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache" schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
volumeutil "k8s.io/kubernetes/pkg/volume/util" volumeutil "k8s.io/kubernetes/pkg/volume/util"
@ -826,6 +827,23 @@ func TestVolumeCountConflicts(t *testing.T) {
} }
} }
func TestMaxVolumeFunc(t *testing.T) {
node := &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node-for-m5-instance",
Labels: map[string]string{
kubeletapis.LabelInstanceType: "m5.large",
},
},
}
os.Unsetenv(KubeMaxPDVols)
maxVolumeFunc := getMaxVolumeFunc(EBSVolumeFilterType)
maxVolume := maxVolumeFunc(node)
if maxVolume != DefaultMaxEBSM5VolumeLimit {
t.Errorf("Expected max volume to be %d got %d", DefaultMaxEBSM5VolumeLimit, maxVolume)
}
}
func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo { func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulercache.NodeInfo {
nodeInfo := schedulercache.NewNodeInfo(pods...) nodeInfo := schedulercache.NewNodeInfo(pods...)
node := &v1.Node{ node := &v1.Node{

View File

@ -20,6 +20,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"regexp"
"strconv" "strconv"
"sync" "sync"
@ -97,6 +98,8 @@ const (
// Amazon recommends no more than 40; the system root volume uses at least one. // Amazon recommends no more than 40; the system root volume uses at least one.
// See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits // See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/volume_limits.html#linux-specific-volume-limits
DefaultMaxEBSVolumes = 39 DefaultMaxEBSVolumes = 39
// DefaultMaxEBSM5VolumeLimit is default EBS volume limit on m5 and c5 instances
DefaultMaxEBSM5VolumeLimit = 25
// DefaultMaxGCEPDVolumes defines the maximum number of PD Volumes for GCE // DefaultMaxGCEPDVolumes defines the maximum number of PD Volumes for GCE
// GCE instances can have up to 16 PD volumes attached. // GCE instances can have up to 16 PD volumes attached.
DefaultMaxGCEPDVolumes = 16 DefaultMaxGCEPDVolumes = 16
@ -291,7 +294,7 @@ func NoDiskConflict(pod *v1.Pod, meta algorithm.PredicateMetadata, nodeInfo *sch
type MaxPDVolumeCountChecker struct { type MaxPDVolumeCountChecker struct {
filter VolumeFilter filter VolumeFilter
volumeLimitKey v1.ResourceName volumeLimitKey v1.ResourceName
maxVolumes int maxVolumeFunc func(node *v1.Node) int
pvInfo PersistentVolumeInfo pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo pvcInfo PersistentVolumeClaimInfo
@ -317,7 +320,6 @@ type VolumeFilter struct {
func NewMaxPDVolumeCountPredicate( func NewMaxPDVolumeCountPredicate(
filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate { filterName string, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
var filter VolumeFilter var filter VolumeFilter
var maxVolumes int
var volumeLimitKey v1.ResourceName var volumeLimitKey v1.ResourceName
switch filterName { switch filterName {
@ -325,15 +327,12 @@ func NewMaxPDVolumeCountPredicate(
case EBSVolumeFilterType: case EBSVolumeFilterType:
filter = EBSVolumeFilter filter = EBSVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey) volumeLimitKey = v1.ResourceName(volumeutil.EBSVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxEBSVolumes)
case GCEPDVolumeFilterType: case GCEPDVolumeFilterType:
filter = GCEPDVolumeFilter filter = GCEPDVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey) volumeLimitKey = v1.ResourceName(volumeutil.GCEVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxGCEPDVolumes)
case AzureDiskVolumeFilterType: case AzureDiskVolumeFilterType:
filter = AzureDiskVolumeFilter filter = AzureDiskVolumeFilter
volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey) volumeLimitKey = v1.ResourceName(volumeutil.AzureVolumeLimitKey)
maxVolumes = getMaxVols(DefaultMaxAzureDiskVolumes)
default: default:
glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType, glog.Fatalf("Wrong filterName, Only Support %v %v %v ", EBSVolumeFilterType,
GCEPDVolumeFilterType, AzureDiskVolumeFilterType) GCEPDVolumeFilterType, AzureDiskVolumeFilterType)
@ -343,7 +342,7 @@ func NewMaxPDVolumeCountPredicate(
c := &MaxPDVolumeCountChecker{ c := &MaxPDVolumeCountChecker{
filter: filter, filter: filter,
volumeLimitKey: volumeLimitKey, volumeLimitKey: volumeLimitKey,
maxVolumes: maxVolumes, maxVolumeFunc: getMaxVolumeFunc(filterName),
pvInfo: pvInfo, pvInfo: pvInfo,
pvcInfo: pvcInfo, pvcInfo: pvcInfo,
randomVolumeIDPrefix: rand.String(32), randomVolumeIDPrefix: rand.String(32),
@ -352,19 +351,52 @@ func NewMaxPDVolumeCountPredicate(
return c.predicate return c.predicate
} }
// getMaxVols checks the max PD volumes environment variable, otherwise returning a default value func getMaxVolumeFunc(filterName string) func(node *v1.Node) int {
func getMaxVols(defaultVal int) int { return func(node *v1.Node) int {
maxVolumesFromEnv := getMaxVolLimitFromEnv()
if maxVolumesFromEnv > 0 {
return maxVolumesFromEnv
}
var nodeInstanceType string
for k, v := range node.ObjectMeta.Labels {
if k == kubeletapis.LabelInstanceType {
nodeInstanceType = v
}
}
switch filterName {
case EBSVolumeFilterType:
return getMaxEBSVolume(nodeInstanceType)
case GCEPDVolumeFilterType:
return DefaultMaxGCEPDVolumes
case AzureDiskVolumeFilterType:
return DefaultMaxAzureDiskVolumes
default:
return -1
}
}
}
func getMaxEBSVolume(nodeInstanceType string) int {
if ok, _ := regexp.MatchString("^[cm]5.*", nodeInstanceType); ok {
return DefaultMaxEBSM5VolumeLimit
}
return DefaultMaxEBSVolumes
}
// getMaxVolLimitFromEnv checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVolLimitFromEnv() int {
if rawMaxVols := os.Getenv(KubeMaxPDVols); rawMaxVols != "" { if rawMaxVols := os.Getenv(KubeMaxPDVols); rawMaxVols != "" {
if parsedMaxVols, err := strconv.Atoi(rawMaxVols); err != nil { if parsedMaxVols, err := strconv.Atoi(rawMaxVols); err != nil {
glog.Errorf("Unable to parse maximum PD volumes value, using default of %v: %v", defaultVal, err) glog.Errorf("Unable to parse maximum PD volumes value, using default: %v", err)
} else if parsedMaxVols <= 0 { } else if parsedMaxVols <= 0 {
glog.Errorf("Maximum PD volumes must be a positive value, using default of %v", defaultVal) glog.Errorf("Maximum PD volumes must be a positive value, using default ")
} else { } else {
return parsedMaxVols return parsedMaxVols
} }
} }
return defaultVal return -1
} }
func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error { func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []v1.Volume, namespace string, filteredVolumes map[string]bool) error {
@ -454,7 +486,7 @@ func (c *MaxPDVolumeCountChecker) predicate(pod *v1.Pod, meta algorithm.Predicat
} }
numNewVolumes := len(newVolumes) numNewVolumes := len(newVolumes)
maxAttachLimit := c.maxVolumes maxAttachLimit := c.maxVolumeFunc(nodeInfo.Node())
if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) { if utilfeature.DefaultFeatureGate.Enabled(features.AttachVolumeLimit) {
volumeLimits := nodeInfo.VolumeLimits() volumeLimits := nodeInfo.VolumeLimits()

View File

@ -3913,7 +3913,6 @@ func TestVolumeZonePredicateWithVolumeBinding(t *testing.T) {
func TestGetMaxVols(t *testing.T) { func TestGetMaxVols(t *testing.T) {
previousValue := os.Getenv(KubeMaxPDVols) previousValue := os.Getenv(KubeMaxPDVols)
defaultValue := 39
tests := []struct { tests := []struct {
rawMaxVols string rawMaxVols string
@ -3922,12 +3921,12 @@ func TestGetMaxVols(t *testing.T) {
}{ }{
{ {
rawMaxVols: "invalid", rawMaxVols: "invalid",
expected: defaultValue, expected: -1,
name: "Unable to parse maximum PD volumes value, using default value", name: "Unable to parse maximum PD volumes value, using default value",
}, },
{ {
rawMaxVols: "-2", rawMaxVols: "-2",
expected: defaultValue, expected: -1,
name: "Maximum PD volumes must be a positive value, using default value", name: "Maximum PD volumes must be a positive value, using default value",
}, },
{ {
@ -3940,7 +3939,7 @@ func TestGetMaxVols(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
os.Setenv(KubeMaxPDVols, test.rawMaxVols) os.Setenv(KubeMaxPDVols, test.rawMaxVols)
result := getMaxVols(defaultValue) result := getMaxVolLimitFromEnv()
if result != test.expected { if result != test.expected {
t.Errorf("expected %v got %v", test.expected, result) t.Errorf("expected %v got %v", test.expected, result)
} }