Improve HPA sample sanitization

After my previous changes HPA wasn't behaving correctly in the following
situation:

- Pods use a lot of CPU during initilization, become ready right after they initialize,
- Scale up triggers,
- When new pods become ready HPA counts their usage (even though it's not related to any work that needs doing),
- Another scale up, even though existing pods can handle work, no problem.
pull/8/head
Joachim Bartosik 2018-08-08 15:00:17 +02:00
parent 816f2a4868
commit 7d6676eab1
6 changed files with 467 additions and 156 deletions

View File

@ -73,6 +73,7 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",

View File

@ -103,6 +103,7 @@ type testCase struct {
reportedLevels []uint64
reportedCPURequests []resource.Quantity
reportedPodReadiness []v1.ConditionStatus
reportedPodStartTime []metav1.Time
reportedPodPhase []v1.PodPhase
scaleUpdated bool
statusUpdated bool
@ -261,6 +262,10 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
if tc.reportedPodReadiness != nil {
podReadiness = tc.reportedPodReadiness[i]
}
var podStartTime metav1.Time
if tc.reportedPodStartTime != nil {
podStartTime = tc.reportedPodStartTime[i]
}
podPhase := v1.PodRunning
if tc.reportedPodPhase != nil {
@ -283,6 +288,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
Status: podReadiness,
},
},
StartTime: &podStartTime,
},
ObjectMeta: metav1.ObjectMeta{
Name: podName,
@ -660,6 +666,14 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
return hpaController, informerFactory
}
func hotCpuCreationTime() metav1.Time {
return metav1.Time{Time: time.Now()}
}
func coolCpuCreationTime() metav1.Time {
return metav1.Time{Time: time.Now().Add(-3 * time.Minute)}
}
func (tc *testCase) runTestWithController(t *testing.T, hpaController *HorizontalController, informerFactory informers.SharedInformerFactory) {
stop := make(chan struct{})
defer close(stop)
@ -716,6 +730,23 @@ func TestScaleUpUnreadyLessScale(t *testing.T) {
tc.runTest(t)
}
func TestScaleUpHotCpuLessScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 4,
CPUTarget: 30,
CPUCurrent: 60,
verifyCPUCurrent: true,
reportedLevels: []uint64{300, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
useMetricsAPI: true,
}
tc.runTest(t)
}
func TestScaleUpUnreadyNoScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
@ -738,6 +769,29 @@ func TestScaleUpUnreadyNoScale(t *testing.T) {
tc.runTest(t)
}
func TestScaleUpHotCpuNoScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 3,
CPUTarget: 30,
CPUCurrent: 40,
verifyCPUCurrent: true,
reportedLevels: []uint64{400, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
useMetricsAPI: true,
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "ReadyForNewScale",
}),
}
tc.runTest(t)
}
func TestScaleUpIgnoresFailedPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
@ -818,12 +872,12 @@ func TestScaleUpCM(t *testing.T) {
tc.runTest(t)
}
func TestScaleUpCMUnreadyLessScale(t *testing.T) {
func TestScaleUpCMUnreadyAndHotCpuNoLessScale(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 4,
expectedDesiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -836,17 +890,18 @@ func TestScaleUpCMUnreadyLessScale(t *testing.T) {
},
reportedLevels: []uint64{50000, 10000, 30000},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
}
tc.runTest(t)
}
func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
func TestScaleUpCMUnreadyandCpuHot(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 3,
expectedDesiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -859,11 +914,48 @@ func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
},
reportedLevels: []uint64{50000, 15000, 30000},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "ReadyForNewScale",
Reason: "SucceededRescale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.ScalingLimited,
Status: v1.ConditionTrue,
Reason: "TooManyReplicas",
}),
}
tc.runTest(t)
}
func TestScaleUpHotCpuNoScaleWouldScaleDown(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
expectedDesiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
MetricName: "qps",
TargetAverageValue: resource.MustParse("15.0"),
},
},
},
reportedLevels: []uint64{50000, 15000, 30000},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.AbleToScale,
Status: v1.ConditionTrue,
Reason: "SucceededRescale",
}, autoscalingv2.HorizontalPodAutoscalerCondition{
Type: autoscalingv2.ScalingLimited,
Status: v1.ConditionTrue,
Reason: "TooManyReplicas",
}),
}
tc.runTest(t)
@ -1043,7 +1135,7 @@ func TestScaleDownPerPodCMExternal(t *testing.T) {
tc.runTest(t)
}
func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
func TestScaleDownIncludeUnreadyPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
@ -1060,6 +1152,23 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
tc.runTest(t)
}
func TestScaleDownIgnoreHotCpuPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 5,
expectedDesiredReplicas: 2,
CPUTarget: 50,
CPUCurrent: 30,
verifyCPUCurrent: true,
reportedLevels: []uint64{100, 300, 500, 250, 250},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
useMetricsAPI: true,
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
}
tc.runTest(t)
}
func TestScaleDownIgnoresFailedPods(t *testing.T) {
tc := testCase{
minReplicas: 2,
@ -1975,7 +2084,7 @@ func TestAvoidUncessaryUpdates(t *testing.T) {
verifyCPUCurrent: true,
reportedLevels: []uint64{400, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
useMetricsAPI: true,
}
testClient, _, _, _, _ := tc.prepareTestClient(t)

View File

@ -222,6 +222,7 @@ func (tc *legacyTestCase) prepareTestClient(t *testing.T) (*fake.Clientset, *sca
podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
pod := v1.Pod{
Status: v1.PodStatus{
StartTime: &metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
Phase: v1.PodRunning,
Conditions: []v1.PodCondition{
{
@ -545,8 +546,7 @@ func TestLegacyScaleUpUnreadyLessScale(t *testing.T) {
initialReplicas: 3,
desiredReplicas: 4,
CPUTarget: 30,
CPUCurrent: 60,
verifyCPUCurrent: true,
verifyCPUCurrent: false,
reportedLevels: []uint64{300, 500, 700},
reportedCPURequests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue},
@ -634,12 +634,12 @@ func TestLegacyScaleUpCM(t *testing.T) {
tc.runTest(t)
}
func TestLegacyScaleUpCMUnreadyLessScale(t *testing.T) {
func TestLegacyScaleUpCMUnreadyNoLessScale(t *testing.T) {
tc := legacyTestCase{
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 4,
desiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{
@ -662,7 +662,7 @@ func TestLegacyScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
minReplicas: 2,
maxReplicas: 6,
initialReplicas: 3,
desiredReplicas: 3,
desiredReplicas: 6,
CPUTarget: 0,
metricsTarget: []autoscalingv2.MetricSpec{
{

View File

@ -68,6 +68,7 @@ func (tc *legacyReplicaCalcTestCase) prepareTestClient(t *testing.T) *fake.Clien
pod := v1.Pod{
Status: v1.PodStatus{
Phase: v1.PodRunning,
StartTime: &metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
@ -310,10 +311,10 @@ func TestLegacyReplicaCalcScaleUpCM(t *testing.T) {
tc.runTest(t)
}
func TestLegacyReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
func TestLegacyReplicaCalcScaleUpCMUnreadyNoLessScale(t *testing.T) {
tc := legacyReplicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 4,
expectedReplicas: 6,
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
metric: &metricInfo{
name: "qps",
@ -325,16 +326,16 @@ func TestLegacyReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
tc.runTest(t)
}
func TestLegacyReplicaCalcScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
func TestLegacyReplicaCalcScaleUpCMUnreadyScale(t *testing.T) {
tc := legacyReplicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 3,
expectedReplicas: 7,
podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
metric: &metricInfo{
name: "qps",
levels: []int64{50000, 15000, 30000},
targetUtilization: 15000,
expectedUtilization: 15000,
expectedUtilization: 31666,
},
}
tc.runTest(t)

View File

@ -73,41 +73,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
}
requests := make(map[string]int64, itemsLen)
readyPodCount := 0
unreadyPods := sets.NewString()
missingPods := sets.NewString()
for _, pod := range podList.Items {
podSum := int64(0)
for _, container := range pod.Spec.Containers {
if containerRequest, ok := container.Resources.Requests[resource]; ok {
podSum += containerRequest.MilliValue()
} else {
return 0, 0, 0, time.Time{}, fmt.Errorf("missing request for %s on container %s in pod %s/%s", resource, container.Name, namespace, pod.Name)
}
}
requests[pod.Name] = podSum
if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
// save this pod name for later, but pretend it doesn't exist for now
if pod.Status.Phase != v1.PodFailed {
// Failed pods should not be counted as unready pods as they will
// not become running anymore.
unreadyPods.Insert(pod.Name)
}
delete(metrics, pod.Name)
continue
}
if _, found := metrics[pod.Name]; !found {
// save this pod name for later, but pretend it doesn't exist for now
missingPods.Insert(pod.Name)
continue
}
readyPodCount++
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource)
removeMetricsForPods(metrics, ignoredPods)
requests, err := calculatePodRequests(podList.Items, resource)
if err != nil {
return 0, 0, 0, time.Time{}, err
}
if len(metrics) == 0 {
@ -119,8 +89,8 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
return 0, 0, 0, time.Time{}, err
}
rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0
if !rebalanceUnready && len(missingPods) == 0 {
rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0
if !rebalanceIgnored && len(missingPods) == 0 {
if math.Abs(1.0-usageRatio) <= c.tolerance {
// return the current replicas if the change would be too small
return currentReplicas, utilization, rawUtilization, timestamp, nil
@ -144,9 +114,9 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
}
}
if rebalanceUnready {
if rebalanceIgnored {
// on a scale-up, treat unready pods as using 0% of the resource request
for podName := range unreadyPods {
for podName := range ignoredPods {
metrics[podName] = 0
}
}
@ -176,7 +146,7 @@ func (c *ReplicaCalculator) GetRawResourceReplicas(currentReplicas int32, target
return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
}
replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector)
replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, resource)
return replicaCount, utilization, timestamp, err
}
@ -189,12 +159,12 @@ func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUtili
return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err)
}
replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector)
replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, v1.ResourceName(""))
return replicaCount, utilization, timestamp, err
}
// calcPlainMetricReplicas calculates the desired replicas for plain (i.e. non-utilization percentage) metrics.
func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector) (replicaCount int32, utilization int64, err error) {
func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, utilization int64, err error) {
podList, err := c.podsGetter.Pods(namespace).List(metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
return 0, 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
@ -204,26 +174,8 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
}
readyPodCount := 0
unreadyPods := sets.NewString()
missingPods := sets.NewString()
for _, pod := range podList.Items {
if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
// save this pod name for later, but pretend it doesn't exist for now
unreadyPods.Insert(pod.Name)
delete(metrics, pod.Name)
continue
}
if _, found := metrics[pod.Name]; !found {
// save this pod name for later, but pretend it doesn't exist for now
missingPods.Insert(pod.Name)
continue
}
readyPodCount++
}
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource)
removeMetricsForPods(metrics, ignoredPods)
if len(metrics) == 0 {
return 0, 0, fmt.Errorf("did not receive metrics for any ready pods")
@ -231,9 +183,9 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
usageRatio, utilization := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)
rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0
rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0
if !rebalanceUnready && len(missingPods) == 0 {
if !rebalanceIgnored && len(missingPods) == 0 {
if math.Abs(1.0-usageRatio) <= c.tolerance {
// return the current replicas if the change would be too small
return currentReplicas, utilization, nil
@ -257,9 +209,9 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
}
}
if rebalanceUnready {
if rebalanceIgnored {
// on a scale-up, treat unready pods as using 0% of the resource request
for podName := range unreadyPods {
for podName := range ignoredPods {
metrics[podName] = 0
}
}
@ -386,21 +338,58 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
return replicaCount, utilization, timestamp, nil
}
// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
func hasPodBeenReadyBefore(pod *v1.Pod) bool {
_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
if readyCondition == nil {
return false
func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
missingPods = sets.NewString()
ignoredPods = sets.NewString()
for _, pod := range pods {
if pod.Status.Phase == v1.PodFailed {
continue
}
if readyCondition.Status == v1.ConditionTrue {
return true
if _, found := metrics[pod.Name]; !found {
missingPods.Insert(pod.Name)
continue
}
lastReady := readyCondition.LastTransitionTime.Time
if pod.Status.StartTime == nil {
return false
if resource == v1.ResourceCPU {
var ignorePod bool
_, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
if condition == nil || pod.Status.StartTime == nil {
ignorePod = true
} else {
if condition.Status == v1.ConditionTrue {
ignorePod = pod.Status.StartTime.Add(2 * time.Minute).After(time.Now())
} else {
ignorePod = pod.Status.StartTime.Add(maxDelayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
}
}
if ignorePod {
ignoredPods.Insert(pod.Name)
continue
}
}
readyPodCount++
}
return
}
func calculatePodRequests(pods []v1.Pod, resource v1.ResourceName) (map[string]int64, error) {
requests := make(map[string]int64, len(pods))
for _, pod := range pods {
podSum := int64(0)
for _, container := range pod.Spec.Containers {
if containerRequest, ok := container.Resources.Requests[resource]; ok {
podSum += containerRequest.MilliValue()
} else {
return nil, fmt.Errorf("missing request for %s", resource)
}
}
requests[pod.Name] = podSum
}
return requests, nil
}
func removeMetricsForPods(metrics metricsclient.PodMetricsInfo, pods sets.String) {
for _, pod := range pods.UnsortedList() {
delete(metrics, pod)
}
started := pod.Status.StartTime.Time
// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
// created assume it was ready in the past.
return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
}

View File

@ -29,10 +29,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/kubernetes/pkg/api/legacyscheme"
"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
cmapi "k8s.io/metrics/pkg/apis/custom_metrics/v1beta1"
emapi "k8s.io/metrics/pkg/apis/external_metrics/v1beta1"
metricsapi "k8s.io/metrics/pkg/apis/metrics/v1beta1"
@ -88,6 +90,7 @@ type replicaCalcTestCase struct {
metric *metricInfo
podReadiness []v1.ConditionStatus
podStartTime []metav1.Time
podPhase []v1.PodPhase
}
@ -111,6 +114,10 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
if tc.podReadiness != nil && i < len(tc.podReadiness) {
podReadiness = tc.podReadiness[i]
}
var podStartTime metav1.Time
if tc.podStartTime != nil {
podStartTime = tc.podStartTime[i]
}
podPhase := v1.PodRunning
if tc.podPhase != nil {
podPhase = tc.podPhase[i]
@ -119,6 +126,7 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
pod := v1.Pod{
Status: v1.PodStatus{
Phase: podPhase,
StartTime: &podStartTime,
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
@ -439,6 +447,24 @@ func TestReplicaCalcScaleUpUnreadyLessScale(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleUpHotCpuLessScale(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 4,
podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{300, 500, 700},
targetUtilization: 30,
expectedUtilization: 60,
expectedValue: numContainersPerPod * 600,
},
}
tc.runTest(t)
}
func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
@ -457,6 +483,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleHotCpuNoScale(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 3,
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
podStartTime: []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{400, 500, 700},
targetUtilization: 30,
expectedUtilization: 40,
expectedValue: numContainersPerPod * 400,
},
}
tc.runTest(t)
}
func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 2,
@ -491,11 +536,12 @@ func TestReplicaCalcScaleUpCM(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
func TestReplicaCalcScaleUpCMUnreadyHotCpuNoLessScale(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 4,
expectedReplicas: 6,
podReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
podStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
metric: &metricInfo{
name: "qps",
levels: []int64{50000, 10000, 30000},
@ -507,16 +553,17 @@ func TestReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
func TestReplicaCalcScaleUpCMUnreadyHotCpuScaleWouldScaleDown(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 3,
expectedReplicas: 7,
podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
metric: &metricInfo{
name: "qps",
levels: []int64{50000, 15000, 30000},
targetUtilization: 15000,
expectedUtilization: 15000,
expectedUtilization: 31666,
metricType: podMetric,
},
}
@ -709,7 +756,7 @@ func TestReplicaCalcScaleDownPerPodCMExternal(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) {
func TestReplicaCalcScaleDownIncludeUnreadyPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 5,
expectedReplicas: 2,
@ -727,6 +774,24 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcScaleDownIgnoreHotCpuPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 5,
expectedReplicas: 2,
podStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{100, 300, 500, 250, 250},
targetUtilization: 50,
expectedUtilization: 30,
expectedValue: numContainersPerPod * 300,
},
}
tc.runTest(t)
}
func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 5,
@ -943,7 +1008,7 @@ func TestReplicaCalcMissingMetricsNoChangeLt(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcMissingMetricsUnreadyNoChange(t *testing.T) {
func TestReplicaCalcMissingMetricsUnreadyChange(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 3,
@ -961,6 +1026,24 @@ func TestReplicaCalcMissingMetricsUnreadyNoChange(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcMissingMetricsHotCpuNoChange(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 3,
podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{100, 450},
targetUtilization: 50,
expectedUtilization: 45,
expectedValue: numContainersPerPod * 450,
},
}
tc.runTest(t)
}
func TestReplicaCalcMissingMetricsUnreadyScaleUp(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
@ -979,6 +1062,25 @@ func TestReplicaCalcMissingMetricsUnreadyScaleUp(t *testing.T) {
tc.runTest(t)
}
func TestReplicaCalcMissingMetricsHotCpuScaleUp(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 3,
expectedReplicas: 4,
podReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue},
podStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
resource: &resourceInfo{
name: v1.ResourceCPU,
requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
levels: []int64{100, 2000},
targetUtilization: 50,
expectedUtilization: 200,
expectedValue: numContainersPerPod * 2000,
},
}
tc.runTest(t)
}
func TestReplicaCalcMissingMetricsUnreadyScaleDown(t *testing.T) {
tc := replicaCalcTestCase{
currentReplicas: 4,
@ -1069,74 +1171,183 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
tc.runTest(t)
}
func TestHasPodBeenReadyBefore(t *testing.T) {
func TestGroupPods(t *testing.T) {
tests := []struct {
name string
conditions []v1.PodCondition
started time.Time
expected bool
pods []v1.Pod
metrics metricsclient.PodMetricsInfo
resource v1.ResourceName
expectReadyPodCount int
expectUnreadyPods sets.String
expectMissingPods sets.String
}{
{
"initially unready",
[]v1.PodCondition{
"void",
[]v1.Pod{},
metricsclient.PodMetricsInfo{},
v1.ResourceName(""),
0,
sets.NewString(),
sets.NewString(),
},
{
"a ready pod",
[]v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "bentham",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
},
},
},
metricsclient.PodMetricsInfo{
"bentham": 1,
},
v1.ResourceName("hedons"),
1,
sets.NewString(),
sets.NewString(),
},
{
"an unready pod",
[]v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "lucretius",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
StartTime: &metav1.Time{
Time: time.Now(),
},
},
},
},
metricsclient.PodMetricsInfo{
"lucretius": 1,
},
v1.ResourceCPU,
0,
sets.NewString("lucretius"),
sets.NewString(),
},
{
"a ready cpu pod",
[]v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "niccolo",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
StartTime: &metav1.Time{
Time: time.Now().Add(-3 * time.Minute),
},
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionFalse,
},
},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
false,
},
{
"currently unready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
Status: v1.ConditionFalse,
},
},
metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
true,
},
{
"currently ready",
[]v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{
Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
},
LastTransitionTime: metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
Status: v1.ConditionTrue,
},
},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
true,
},
},
},
metricsclient.PodMetricsInfo{
"niccolo": 1,
},
v1.ResourceCPU,
1,
sets.NewString(),
sets.NewString(),
},
{
"no ready status",
[]v1.PodCondition{},
metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
false,
"a missing pod",
[]v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "epicurus",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
StartTime: &metav1.Time{
Time: time.Now().Add(-3 * time.Minute),
},
},
},
},
metricsclient.PodMetricsInfo{},
v1.ResourceCPU,
0,
sets.NewString(),
sets.NewString("epicurus"),
},
{
"all together",
[]v1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "lucretius",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
StartTime: &metav1.Time{
Time: time.Now(),
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "niccolo",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
StartTime: &metav1.Time{
Time: time.Now().Add(-3 * time.Minute),
},
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
Status: v1.ConditionTrue,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: "epicurus",
},
Status: v1.PodStatus{
Phase: v1.PodSucceeded,
StartTime: &metav1.Time{
Time: time.Now().Add(-3 * time.Minute),
},
},
},
},
metricsclient.PodMetricsInfo{
"lucretius": 1,
"niccolo": 1,
},
v1.ResourceCPU,
1,
sets.NewString("lucretius"),
sets.NewString("epicurus"),
},
}
for _, tc := range tests {
pod := &v1.Pod{
Status: v1.PodStatus{
Conditions: tc.conditions,
StartTime: &metav1.Time{
Time: tc.started,
},
},
readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource)
if readyPodCount != tc.expectReadyPodCount {
t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
}
got := hasPodBeenReadyBefore(pod)
if got != tc.expected {
t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
if !unreadyPods.Equal(tc.expectUnreadyPods) {
t.Errorf("%s got unreadyPods %v, expected %v", tc.name, unreadyPods, tc.expectUnreadyPods)
}
if !missingPods.Equal(tc.expectMissingPods) {
t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)
}
}
}