mirror of https://github.com/k3s-io/k3s
Merge pull request #41398 from codablock/azure_max_pd
Automatic merge from submit-queue Add scheduler predicate to filter for max Azure disks attached **What this PR does / why we need it**: This PR adds scheduler predicates for maximum Azure Disks count. This allows to use the environment variable KUBE_MAX_PD_VOLS on scheduler the same as it's already possible with GCE and AWS. This is needed as we need a way to specify the maximum attachable disks on Azure to avoid permanently failing disk attachment in cases k8s scheduled too many PODs with AzureDisk volumes onto the same node. I've chosen 16 as the default value for DefaultMaxAzureDiskVolumes even though it may be too high for many smaller VM types and too low for the larger VM types. This means, the default behavior may change for clusters with large VM types. For smaller VM types, the behavior will not change (it will keep failing attaching). In the future, the value should be determined at run time on a per node basis, depending on the VM size. I know that this is already implemented in the ongoing Azure Managed Disks work, but I don't remember where to find this anymore and also forgot who was working on this. Maybe @colemickens can help here. **Release note**: ```release-note Support KUBE_MAX_PD_VOLS on Azure ``` CC @colemickens @brendandburnspull/6/head
commit
f2e234e47f
|
@ -339,6 +339,23 @@ var GCEPDVolumeFilter VolumeFilter = VolumeFilter{
|
|||
},
|
||||
}
|
||||
|
||||
// AzureDiskVolumeFilter is a VolumeFilter for filtering Azure Disk Volumes
|
||||
var AzureDiskVolumeFilter VolumeFilter = VolumeFilter{
|
||||
FilterVolume: func(vol *v1.Volume) (string, bool) {
|
||||
if vol.AzureDisk != nil {
|
||||
return vol.AzureDisk.DiskName, true
|
||||
}
|
||||
return "", false
|
||||
},
|
||||
|
||||
FilterPersistentVolume: func(pv *v1.PersistentVolume) (string, bool) {
|
||||
if pv.Spec.AzureDisk != nil {
|
||||
return pv.Spec.AzureDisk.DiskName, true
|
||||
}
|
||||
return "", false
|
||||
},
|
||||
}
|
||||
|
||||
type VolumeZoneChecker struct {
|
||||
pvInfo PersistentVolumeInfo
|
||||
pvcInfo PersistentVolumeClaimInfo
|
||||
|
|
|
@ -138,6 +138,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
{"name": "NoVolumeZoneConflict"},
|
||||
{"name": "MaxEBSVolumeCount"},
|
||||
{"name": "MaxGCEPDVolumeCount"},
|
||||
{"name": "MaxAzureDiskVolumeCount"},
|
||||
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
|
||||
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
|
||||
],"priorities": [
|
||||
|
@ -161,6 +162,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
{Name: "NoVolumeZoneConflict"},
|
||||
{Name: "MaxEBSVolumeCount"},
|
||||
{Name: "MaxGCEPDVolumeCount"},
|
||||
{Name: "MaxAzureDiskVolumeCount"},
|
||||
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
|
||||
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
|
||||
},
|
||||
|
@ -194,6 +196,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
{"name": "CheckNodeMemoryPressure"},
|
||||
{"name": "MaxEBSVolumeCount"},
|
||||
{"name": "MaxGCEPDVolumeCount"},
|
||||
{"name": "MaxAzureDiskVolumeCount"},
|
||||
{"name": "MatchInterPodAffinity"},
|
||||
{"name": "GeneralPredicates"},
|
||||
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
|
||||
|
@ -221,6 +224,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
{Name: "CheckNodeMemoryPressure"},
|
||||
{Name: "MaxEBSVolumeCount"},
|
||||
{Name: "MaxGCEPDVolumeCount"},
|
||||
{Name: "MaxAzureDiskVolumeCount"},
|
||||
{Name: "MatchInterPodAffinity"},
|
||||
{Name: "GeneralPredicates"},
|
||||
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
|
||||
|
@ -257,6 +261,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
{"name": "CheckNodeDiskPressure"},
|
||||
{"name": "MaxEBSVolumeCount"},
|
||||
{"name": "MaxGCEPDVolumeCount"},
|
||||
{"name": "MaxAzureDiskVolumeCount"},
|
||||
{"name": "MatchInterPodAffinity"},
|
||||
{"name": "GeneralPredicates"},
|
||||
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
|
||||
|
@ -287,6 +292,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
|
|||
{Name: "CheckNodeDiskPressure"},
|
||||
{Name: "MaxEBSVolumeCount"},
|
||||
{Name: "MaxGCEPDVolumeCount"},
|
||||
{Name: "MaxAzureDiskVolumeCount"},
|
||||
{Name: "MatchInterPodAffinity"},
|
||||
{Name: "GeneralPredicates"},
|
||||
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
|
||||
|
|
|
@ -36,9 +36,11 @@ import (
|
|||
|
||||
const (
|
||||
// GCE instances can have up to 16 PD volumes attached.
|
||||
DefaultMaxGCEPDVolumes = 16
|
||||
ClusterAutoscalerProvider = "ClusterAutoscalerProvider"
|
||||
StatefulSetKind = "StatefulSet"
|
||||
DefaultMaxGCEPDVolumes = 16
|
||||
// Larger Azure VMs can actually have much more disks attached. TODO We should determine the max based on VM size
|
||||
DefaultMaxAzureDiskVolumes = 16
|
||||
ClusterAutoscalerProvider = "ClusterAutoscalerProvider"
|
||||
StatefulSetKind = "StatefulSet"
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -136,6 +138,15 @@ func defaultPredicates() sets.String {
|
|||
return predicates.NewMaxPDVolumeCountPredicate(predicates.GCEPDVolumeFilter, maxVols, args.PVInfo, args.PVCInfo)
|
||||
},
|
||||
),
|
||||
// Fit is determined by whether or not there would be too many Azure Disk volumes attached to the node
|
||||
factory.RegisterFitPredicateFactory(
|
||||
"MaxAzureDiskVolumeCount",
|
||||
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
|
||||
// TODO: allow for generically parameterized scheduler predicates, because this is a bit ugly
|
||||
maxVols := getMaxVols(DefaultMaxAzureDiskVolumes)
|
||||
return predicates.NewMaxPDVolumeCountPredicate(predicates.AzureDiskVolumeFilter, maxVols, args.PVInfo, args.PVCInfo)
|
||||
},
|
||||
),
|
||||
// Fit is determined by inter-pod affinity.
|
||||
factory.RegisterFitPredicateFactory(
|
||||
"MatchInterPodAffinity",
|
||||
|
|
Loading…
Reference in New Issue