Merge pull request #41398 from codablock/azure_max_pd

Automatic merge from submit-queue

Add scheduler predicate to filter for max Azure disks attached

**What this PR does / why we need it**: This PR adds scheduler predicates for maximum Azure Disks count. This allows to use the environment variable KUBE_MAX_PD_VOLS on scheduler the same as it's already possible with GCE and AWS.

This is needed as we need a way to specify the maximum attachable disks on Azure to avoid permanently failing disk attachment in cases k8s scheduled too many PODs with AzureDisk volumes onto the same node. 

I've chosen 16 as the default value for DefaultMaxAzureDiskVolumes even though it may be too high for many smaller VM types and too low for the larger VM types. This means, the default behavior may change for clusters with large VM types. For smaller VM types, the behavior will not change (it will keep failing attaching).

In the future, the value should be determined at run time on a per node basis, depending on the VM size. I know that this is already implemented in the ongoing Azure Managed Disks work, but I don't remember where to find this anymore and also forgot who was working on this. Maybe @colemickens can help here.

**Release note**:

```release-note
Support KUBE_MAX_PD_VOLS on Azure
```

CC @colemickens @brendandburns
pull/6/head
Kubernetes Submit Queue 2017-02-21 06:11:09 -08:00 committed by GitHub
commit f2e234e47f
3 changed files with 37 additions and 3 deletions

View File

@ -339,6 +339,23 @@ var GCEPDVolumeFilter VolumeFilter = VolumeFilter{
},
}
// AzureDiskVolumeFilter is a VolumeFilter for filtering Azure Disk Volumes
var AzureDiskVolumeFilter VolumeFilter = VolumeFilter{
FilterVolume: func(vol *v1.Volume) (string, bool) {
if vol.AzureDisk != nil {
return vol.AzureDisk.DiskName, true
}
return "", false
},
FilterPersistentVolume: func(pv *v1.PersistentVolume) (string, bool) {
if pv.Spec.AzureDisk != nil {
return pv.Spec.AzureDisk.DiskName, true
}
return "", false
},
}
type VolumeZoneChecker struct {
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo

View File

@ -138,6 +138,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "NoVolumeZoneConflict"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
@ -161,6 +162,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "NoVolumeZoneConflict"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
@ -194,6 +196,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "CheckNodeMemoryPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
@ -221,6 +224,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "CheckNodeMemoryPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
@ -257,6 +261,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{"name": "CheckNodeDiskPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MaxAzureDiskVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
@ -287,6 +292,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
{Name: "CheckNodeDiskPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MaxAzureDiskVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},

View File

@ -36,9 +36,11 @@ import (
const (
// GCE instances can have up to 16 PD volumes attached.
DefaultMaxGCEPDVolumes = 16
ClusterAutoscalerProvider = "ClusterAutoscalerProvider"
StatefulSetKind = "StatefulSet"
DefaultMaxGCEPDVolumes = 16
// Larger Azure VMs can actually have much more disks attached. TODO We should determine the max based on VM size
DefaultMaxAzureDiskVolumes = 16
ClusterAutoscalerProvider = "ClusterAutoscalerProvider"
StatefulSetKind = "StatefulSet"
)
func init() {
@ -136,6 +138,15 @@ func defaultPredicates() sets.String {
return predicates.NewMaxPDVolumeCountPredicate(predicates.GCEPDVolumeFilter, maxVols, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by whether or not there would be too many Azure Disk volumes attached to the node
factory.RegisterFitPredicateFactory(
"MaxAzureDiskVolumeCount",
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
// TODO: allow for generically parameterized scheduler predicates, because this is a bit ugly
maxVols := getMaxVols(DefaultMaxAzureDiskVolumes)
return predicates.NewMaxPDVolumeCountPredicate(predicates.AzureDiskVolumeFilter, maxVols, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by inter-pod affinity.
factory.RegisterFitPredicateFactory(
"MatchInterPodAffinity",