diff --git a/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go b/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go index 91d9c5e4ba..fd47540397 100644 --- a/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go +++ b/pkg/scheduler/algorithm/predicates/csi_volume_predicate.go @@ -154,19 +154,22 @@ func (c *CSIMaxVolumeLimitChecker) getCSIDriver(pvc *v1.PersistentVolumeClaim) ( placeHolderCSIDriver := "" placeHolderHandle := "" if pvName == "" { - klog.V(4).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName) + klog.V(5).Infof("Persistent volume had no name for claim %s/%s", namespace, pvcName) return c.getDriverNameFromSC(pvc) } pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName) if err != nil { klog.V(4).Infof("Unable to look up PV info for PVC %s/%s and PV %s", namespace, pvcName, pvName) - return placeHolderCSIDriver, placeHolderHandle + // If we can't fetch PV associated with PVC, may be it got deleted + // or PVC was prebound to a PVC that hasn't been created yet. + // fallback to using StorageClass for volume counting + return c.getDriverNameFromSC(pvc) } csiSource := pv.Spec.PersistentVolumeSource.CSI if csiSource == nil { - klog.V(4).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName) + klog.V(5).Infof("Not considering non-CSI volume %s/%s", namespace, pvcName) return placeHolderCSIDriver, placeHolderHandle } return csiSource.Driver, csiSource.VolumeHandle @@ -175,21 +178,26 @@ func (c *CSIMaxVolumeLimitChecker) getCSIDriver(pvc *v1.PersistentVolumeClaim) ( func (c *CSIMaxVolumeLimitChecker) getDriverNameFromSC(pvc *v1.PersistentVolumeClaim) (string, string) { namespace := pvc.Namespace pvcName := pvc.Name - scName := *pvc.Spec.StorageClassName + scName := pvc.Spec.StorageClassName placeHolderCSIDriver := "" placeHolderHandle := "" - if scName == "" { - klog.V(4).Infof("pvc %s/%s has no storageClass", namespace, pvcName) + if scName == nil { + // if StorageClass is not set or found, then PVC must be using immediate binding mode + // and hence it must be bound before scheduling. So it is safe to not count it. + klog.V(5).Infof("pvc %s/%s has no storageClass", namespace, pvcName) return placeHolderCSIDriver, placeHolderHandle } - storageClass, err := c.scInfo.GetStorageClassInfo(scName) + storageClass, err := c.scInfo.GetStorageClassInfo(*scName) if err != nil { - klog.V(4).Infof("no storage %s found for pvc %s/%s", scName, namespace, pvcName) + klog.V(5).Infof("no storage %s found for pvc %s/%s", *scName, namespace, pvcName) return placeHolderCSIDriver, placeHolderHandle } + // We use random prefix to avoid conflict with volume-ids. If PVC is bound in the middle + // predicate and there is another pod(on same node) that uses same volume then we will overcount + // the volume and consider both volumes as different. volumeHandle := fmt.Sprintf("%s-%s/%s", c.randomVolumeIDPrefix, namespace, pvcName) return storageClass.Provisioner, volumeHandle } diff --git a/pkg/scheduler/algorithm/predicates/csi_volume_predicate_test.go b/pkg/scheduler/algorithm/predicates/csi_volume_predicate_test.go index 2603811c96..3cc4d8dbc5 100644 --- a/pkg/scheduler/algorithm/predicates/csi_volume_predicate_test.go +++ b/pkg/scheduler/algorithm/predicates/csi_volume_predicate_test.go @@ -17,6 +17,7 @@ limitations under the License. package predicates import ( + "fmt" "reflect" "testing" @@ -35,7 +36,7 @@ func TestCSIVolumeCountPredicate(t *testing.T) { { VolumeSource: v1.VolumeSource{ PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ - ClaimName: "csi-ebs", + ClaimName: "csi-ebs-0", }, }, }, @@ -83,7 +84,70 @@ func TestCSIVolumeCountPredicate(t *testing.T) { { VolumeSource: v1.VolumeSource{ PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ - ClaimName: "csi-ebs-4", + ClaimName: "csi-4", + }, + }, + }, + }, + }, + } + + // Different pod than pendingVolumePod, but using the same unbound PVC + unboundPVCPod2 := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "csi-4", + }, + }, + }, + }, + }, + } + + missingPVPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "csi-6", + }, + }, + }, + }, + }, + } + + noSCPVCPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "csi-5", + }, + }, + }, + }, + }, + } + gceTwoVolPod := &v1.Pod{ + Spec: v1.PodSpec{ + Volumes: []v1.Volume{ + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "cs-gce-1", + }, + }, + }, + { + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: "csi-gce-2", }, }, }, @@ -96,22 +160,25 @@ func TestCSIVolumeCountPredicate(t *testing.T) { existingPods []*v1.Pod filterName string maxVols int + driverNames []string fits bool test string }{ { newPod: oneVolPod, existingPods: []*v1.Pod{runningPod, twoVolPod}, - filterName: "csi-ebs", + filterName: "csi", maxVols: 4, + driverNames: []string{"ebs"}, fits: true, test: "fits when node capacity >= new pods CSI volume", }, { newPod: oneVolPod, existingPods: []*v1.Pod{runningPod, twoVolPod}, - filterName: "csi-ebs", + filterName: "csi", maxVols: 2, + driverNames: []string{"ebs"}, fits: false, test: "doesn't when node capacity <= pods CSI volume", }, @@ -119,10 +186,60 @@ func TestCSIVolumeCountPredicate(t *testing.T) { { newPod: oneVolPod, existingPods: []*v1.Pod{pendingVolumePod, twoVolPod}, - filterName: "csi-ebs", + filterName: "csi", maxVols: 2, + driverNames: []string{"ebs"}, fits: false, - test: "doesn't when node capacity <= pods CSI volume", + test: "count pending PVCs towards capacity <= pods CSI volume", + }, + // two same pending PVCs should be counted as 1 + { + newPod: oneVolPod, + existingPods: []*v1.Pod{pendingVolumePod, unboundPVCPod2, twoVolPod}, + filterName: "csi", + maxVols: 3, + driverNames: []string{"ebs"}, + fits: true, + test: "count multiple pending pvcs towards capacity >= pods CSI volume", + }, + // should count PVCs with invalid PV name but valid SC + { + newPod: oneVolPod, + existingPods: []*v1.Pod{missingPVPod, twoVolPod}, + filterName: "csi", + maxVols: 2, + driverNames: []string{"ebs"}, + fits: false, + test: "should count PVCs with invalid PV name but valid SC", + }, + // don't count a volume which has storageclass missing + { + newPod: oneVolPod, + existingPods: []*v1.Pod{runningPod, noSCPVCPod}, + filterName: "csi", + maxVols: 2, + driverNames: []string{"ebs"}, + fits: true, + test: "don't count pvcs with missing SC towards capacity", + }, + // don't count multiple volume types + { + newPod: oneVolPod, + existingPods: []*v1.Pod{gceTwoVolPod, twoVolPod}, + filterName: "csi", + maxVols: 2, + driverNames: []string{"ebs", "gce"}, + fits: true, + test: "don't count pvcs with different type towards capacity", + }, + { + newPod: gceTwoVolPod, + existingPods: []*v1.Pod{twoVolPod, runningPod}, + filterName: "csi", + maxVols: 2, + driverNames: []string{"ebs", "gce"}, + fits: true, + test: "don't count pvcs with different type towards capacity", }, } @@ -130,8 +247,11 @@ func TestCSIVolumeCountPredicate(t *testing.T) { expectedFailureReasons := []PredicateFailureReason{ErrMaxVolumeCountExceeded} // running attachable predicate tests with feature gate and limit present on nodes for _, test := range tests { - node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.filterName) - pred := NewCSIMaxVolumeLimitPredicate(getFakeCSIPVInfo("csi-ebs", "csi-ebs"), getFakeCSIPVCInfo("csi-ebs", "csi-ebs-gp2"), getFakeCSIStorageClassInfo("csi-ebs-gp2", "csi-ebs")) + node := getNodeWithPodAndVolumeLimits(test.existingPods, int64(test.maxVols), test.driverNames...) + pred := NewCSIMaxVolumeLimitPredicate(getFakeCSIPVInfo(test.filterName, test.driverNames...), + getFakeCSIPVCInfo(test.filterName, "csi-sc", test.driverNames...), + getFakeCSIStorageClassInfo("csi-sc", test.driverNames[0])) + fits, reasons, err := pred(test.newPod, GetPredicateMetadata(test.newPod, nil), node) if err != nil { t.Errorf("Using allocatable [%s]%s: unexpected error: %v", test.filterName, test.test, err) @@ -145,63 +265,56 @@ func TestCSIVolumeCountPredicate(t *testing.T) { } } -func getFakeCSIPVInfo(volumeName, driverName string) FakePersistentVolumeInfo { - return FakePersistentVolumeInfo{ - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName}, - Spec: v1.PersistentVolumeSpec{ - PersistentVolumeSource: v1.PersistentVolumeSource{ - CSI: &v1.CSIPersistentVolumeSource{ - Driver: driverName, - VolumeHandle: volumeName, +func getFakeCSIPVInfo(volumeName string, driverNames ...string) FakePersistentVolumeInfo { + pvInfos := FakePersistentVolumeInfo{} + for _, driver := range driverNames { + for j := 0; j < 4; j++ { + volumeHandle := fmt.Sprintf("%s-%s-%d", volumeName, driver, j) + pv := v1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{Name: volumeHandle}, + Spec: v1.PersistentVolumeSpec{ + PersistentVolumeSource: v1.PersistentVolumeSource{ + CSI: &v1.CSIPersistentVolumeSource{ + Driver: driver, + VolumeHandle: volumeHandle, + }, }, }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-2"}, - Spec: v1.PersistentVolumeSpec{ - PersistentVolumeSource: v1.PersistentVolumeSource{ - CSI: &v1.CSIPersistentVolumeSource{ - Driver: driverName, - VolumeHandle: volumeName + "-2", - }, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-3"}, - Spec: v1.PersistentVolumeSpec{ - PersistentVolumeSource: v1.PersistentVolumeSource{ - CSI: &v1.CSIPersistentVolumeSource{ - Driver: driverName, - VolumeHandle: volumeName + "-3", - }, - }, - }, - }, + } + pvInfos = append(pvInfos, pv) + } + } + return pvInfos } -func getFakeCSIPVCInfo(volumeName, scName string) FakePersistentVolumeClaimInfo { - return FakePersistentVolumeClaimInfo{ - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName}, - Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-2"}, - Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName + "-2"}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-3"}, - Spec: v1.PersistentVolumeClaimSpec{VolumeName: volumeName + "-3"}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-4"}, - Spec: v1.PersistentVolumeClaimSpec{StorageClassName: &scName}, - }, +func getFakeCSIPVCInfo(volumeName, scName string, driverNames ...string) FakePersistentVolumeClaimInfo { + pvcInfos := FakePersistentVolumeClaimInfo{} + for _, driver := range driverNames { + for j := 0; j < 4; j++ { + v := fmt.Sprintf("%s-%s-%d", volumeName, driver, j) + pvc := v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: v}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: v}, + } + pvcInfos = append(pvcInfos, pvc) + } } + + pvcInfos = append(pvcInfos, v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-4"}, + Spec: v1.PersistentVolumeClaimSpec{StorageClassName: &scName}, + }) + pvcInfos = append(pvcInfos, v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-5"}, + Spec: v1.PersistentVolumeClaimSpec{}, + }) + // a pvc with missing PV but available storageclass. + pvcInfos = append(pvcInfos, v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: volumeName + "-6"}, + Spec: v1.PersistentVolumeClaimSpec{StorageClassName: &scName, VolumeName: "missing-in-action"}, + }) + return pvcInfos } func getFakeCSIStorageClassInfo(scName, provisionerName string) FakeStorageClassInfo { diff --git a/pkg/scheduler/algorithm/predicates/max_attachable_volume_predicate_test.go b/pkg/scheduler/algorithm/predicates/max_attachable_volume_predicate_test.go index 4da8962ad0..75afdc6e5c 100644 --- a/pkg/scheduler/algorithm/predicates/max_attachable_volume_predicate_test.go +++ b/pkg/scheduler/algorithm/predicates/max_attachable_volume_predicate_test.go @@ -937,16 +937,17 @@ func TestMaxVolumeFuncM4(t *testing.T) { } } -func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, filter string) *schedulernodeinfo.NodeInfo { +func getNodeWithPodAndVolumeLimits(pods []*v1.Pod, limit int64, driverNames ...string) *schedulernodeinfo.NodeInfo { nodeInfo := schedulernodeinfo.NewNodeInfo(pods...) node := &v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "node-for-max-pd-test-1"}, Status: v1.NodeStatus{ - Allocatable: v1.ResourceList{ - getVolumeLimitKey(filter): *resource.NewQuantity(limit, resource.DecimalSI), - }, + Allocatable: v1.ResourceList{}, }, } + for _, driver := range driverNames { + node.Status.Allocatable[getVolumeLimitKey(driver)] = *resource.NewQuantity(limit, resource.DecimalSI) + } nodeInfo.SetNode(node) return nodeInfo }