Merge pull request #67252 from jbartosik/metric-sanitization

Automatic merge from submit-queue (batch tested with PRs 66916, 67252, 67794, 67619, 67328). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fix HPA sample sanitization **What this PR does / why we need it**: @mwielgus pointed out a case when HPA fails as a result of my changes to HPA algorithm: - Have pods that use a lot of CPU during initilization, become ready right after they initialize, - Trigger a scale up, - When new pods become ready will will count their usage (even though it's not related to any work that needs doing), - This triggers another scale up, even though existing pods can handle work, no problem. The fix is: - Use all samples for non-cpu metrics. - Only use CPU samples if: - Pod is ready and was started more than 2 minutes ago, or - Pod is unready and last readiness change happened more than 10s after it was started. Reasoning behind this in: https://docs.google.com/document/d/1UdtYedhmCxjaJIQi6hwJMY0eHQQKxlVD8lSHZC1BPOA/edit **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: **Special notes for your reviewer**: **Release note**: ```release-note Replace scale up forbidden window with disregarding CPU samples collected when pod was initializing. ```
2018-08-24 15:25:07 -07:00 · 2018-08-24 15:25:07 -07:00 · 663551bebd
parent a697d71cb5 4fd6a1684d
commit 663551bebd
17 changed files with 536 additions and 188 deletions
--- a/api/api-rules/violation_exceptions.list
+++ b/api/api-rules/violation_exceptions.list
@ -93,6 +93,8 @@ API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alp
 API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerDownscaleForbiddenWindow
 API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerTolerance
 API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerUseRESTClients
+API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerCPUTaintPeriod
+API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerInitialReadinessDelay
 API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,JobControllerConfiguration,ConcurrentJobSyncs
 API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Port
 API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Address
--- a/cmd/kube-controller-manager/app/autoscaling.go
+++ b/cmd/kube-controller-manager/app/autoscaling.go
@ -84,6 +84,8 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
 		metricsClient,
 		hpaClient.CoreV1(),
 		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance,
+		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod.Duration,
+		ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration,
 	)
 	go podautoscaler.NewHorizontalController(
 		hpaClient.CoreV1(),
--- a/cmd/kube-controller-manager/app/options/hpacontroller.go
+++ b/cmd/kube-controller-manager/app/options/hpacontroller.go
@ -30,6 +30,8 @@ type HPAControllerOptions struct {
 	HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration
 	HorizontalPodAutoscalerUpscaleForbiddenWindow   metav1.Duration
 	HorizontalPodAutoscalerSyncPeriod               metav1.Duration
+	HorizontalPodAutoscalerCPUTaintPeriod           metav1.Duration
+	HorizontalPodAutoscalerInitialReadinessDelay    metav1.Duration
 }

 // AddFlags adds flags related to HPAController for controller manager to the specified FlagSet.
@ -44,6 +46,8 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
 	fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
 	fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
 	fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy.  This is required for custom metrics support in the horizontal pod autoscaler.")
+	fs.DurationVar(&o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "horizontal-pod-autoscaler-cpu-taint-period", o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "The period after pod start for which CPU samples are considered tainted by initialization.")
+	fs.DurationVar(&o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "horizontal-pod-autoscaler-initial-readiness-delay", o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.")
 }

 // ApplyTo fills up HPAController config with options.
--- a/cmd/kube-controller-manager/app/options/options.go
+++ b/cmd/kube-controller-manager/app/options/options.go
@ -134,6 +134,8 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) {
 			HorizontalPodAutoscalerSyncPeriod:               componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod,
 			HorizontalPodAutoscalerUpscaleForbiddenWindow:   componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow,
 			HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow,
+			HorizontalPodAutoscalerCPUTaintPeriod:           componentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod,
+			HorizontalPodAutoscalerInitialReadinessDelay:    componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay,
 			HorizontalPodAutoscalerTolerance:                componentConfig.HPAController.HorizontalPodAutoscalerTolerance,
 			HorizontalPodAutoscalerUseRESTClients:           componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients,
 		},
--- a/cmd/kube-controller-manager/app/options/options_test.go
+++ b/cmd/kube-controller-manager/app/options/options_test.go
@ -73,6 +73,8 @@ func TestAddFlags(t *testing.T) {
 		"--horizontal-pod-autoscaler-downscale-delay=2m",
 		"--horizontal-pod-autoscaler-sync-period=45s",
 		"--horizontal-pod-autoscaler-upscale-delay=1m",
+		"--horizontal-pod-autoscaler-cpu-taint-period=90s",
+		"--horizontal-pod-autoscaler-initial-readiness-delay=50s",
 		"--http2-max-streams-per-connection=47",
 		"--kube-api-burst=100",
 		"--kube-api-content-type=application/json",
@ -185,6 +187,8 @@ func TestAddFlags(t *testing.T) {
 			HorizontalPodAutoscalerSyncPeriod:               metav1.Duration{Duration: 45 * time.Second},
 			HorizontalPodAutoscalerUpscaleForbiddenWindow:   metav1.Duration{Duration: 1 * time.Minute},
 			HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute},
+			HorizontalPodAutoscalerCPUTaintPeriod:           metav1.Duration{Duration: 90 * time.Second},
+			HorizontalPodAutoscalerInitialReadinessDelay:    metav1.Duration{Duration: 50 * time.Second},
 			HorizontalPodAutoscalerTolerance:                0.1,
 			HorizontalPodAutoscalerUseRESTClients:           true,
 		},
--- a/pkg/apis/componentconfig/types.go
+++ b/pkg/apis/componentconfig/types.go
@ -271,6 +271,14 @@ type HPAControllerConfiguration struct {
 	// through the kube-aggregator when enabled, instead of using the legacy metrics client
 	// through the API server proxy.
 	HorizontalPodAutoscalerUseRESTClients bool
+	// HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU
+	// samples from the pod contaminated by initialization and disregard them.
+	HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
+	// HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness
+	// changes are treated as readiness being set for the first time. The only effect of this is that
+	// HPA will disregard CPU samples from unready pods that had last readiness change during that
+	// period.
+	HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
 }

 type JobControllerConfiguration struct {
--- a/pkg/apis/componentconfig/v1alpha1/defaults.go
+++ b/pkg/apis/componentconfig/v1alpha1/defaults.go
@ -89,6 +89,14 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *KubeControllerManagerCo
 	if obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow == zero {
 		obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow = metav1.Duration{Duration: 3 * time.Minute}
 	}
+	if obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod == zero {
+		// Assuming CPU is collected every minute and initialization takes another minute HPA should
+		// disregard samples from first two minutes as contaminated by initialization.
+		obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod = metav1.Duration{Duration: time.Minute}
+	}
+	if obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay == zero {
+		obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay = metav1.Duration{Duration: 30 * time.Second}
+	}
 	if obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow == zero {
 		obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow = metav1.Duration{Duration: 5 * time.Minute}
 	}
--- a/pkg/apis/componentconfig/v1alpha1/types.go
+++ b/pkg/apis/componentconfig/v1alpha1/types.go
@ -320,6 +320,14 @@ type HPAControllerConfiguration struct {
 	// through the kube-aggregator when enabled, instead of using the legacy metrics client
 	// through the API server proxy.
 	HorizontalPodAutoscalerUseRESTClients *bool
+	// HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU
+	// samples from the pod contaminated by initialization and disregard them.
+	HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
+	// HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness
+	// changes are treated as readiness being set for the first time. The only effect of this is that
+	// HPA will disregard CPU samples from unready pods that had last readiness change during that
+	// period.
+	HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
 }

 type JobControllerConfiguration struct {
--- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go
+++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go
@ -606,6 +606,8 @@ func autoConvert_v1alpha1_HPAControllerConfiguration_To_componentconfig_HPAContr
 	if err := v1.Convert_Pointer_bool_To_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil {
 		return err
 	}
+	out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
+	out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
 	return nil
 }

@ -622,6 +624,8 @@ func autoConvert_componentconfig_HPAControllerConfiguration_To_v1alpha1_HPAContr
 	if err := v1.Convert_bool_To_Pointer_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil {
 		return err
 	}
+	out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
+	out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
 	return nil
 }

--- a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go
+++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go
@ -242,6 +242,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati
 		*out = new(bool)
 		**out = **in
 	}
+	out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
+	out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
 	return
 }

--- a/pkg/apis/componentconfig/zz_generated.deepcopy.go
+++ b/pkg/apis/componentconfig/zz_generated.deepcopy.go
@ -232,6 +232,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati
 	out.HorizontalPodAutoscalerSyncPeriod = in.HorizontalPodAutoscalerSyncPeriod
 	out.HorizontalPodAutoscalerUpscaleForbiddenWindow = in.HorizontalPodAutoscalerUpscaleForbiddenWindow
 	out.HorizontalPodAutoscalerDownscaleForbiddenWindow = in.HorizontalPodAutoscalerDownscaleForbiddenWindow
+	out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
+	out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
 	return
 }

--- a/pkg/controller/podautoscaler/BUILD
+++ b/pkg/controller/podautoscaler/BUILD
@ -73,6 +73,7 @@ go_test(
        "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
+        "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
        "//staging/src/k8s.io/apimachinery/pkg/watch:go_default_library",
        "//staging/src/k8s.io/client-go/informers:go_default_library",
        "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@ -103,6 +103,7 @@ type testCase struct {
 	reportedLevels          []uint64
 	reportedCPURequests     []resource.Quantity
 	reportedPodReadiness    []v1.ConditionStatus
+	reportedPodStartTime    []metav1.Time
 	reportedPodPhase        []v1.PodPhase
 	scaleUpdated            bool
 	statusUpdated           bool
@ -261,6 +262,10 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 			if tc.reportedPodReadiness != nil {
 				podReadiness = tc.reportedPodReadiness[i]
 			}
+			var podStartTime metav1.Time
+			if tc.reportedPodStartTime != nil {
+				podStartTime = tc.reportedPodStartTime[i]
+			}

 			podPhase := v1.PodRunning
 			if tc.reportedPodPhase != nil {
@ -283,6 +288,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa
 							Status: podReadiness,
 						},
 					},
+					StartTime: &podStartTime,
 				},
 				ObjectMeta: metav1.ObjectMeta{
 					Name:      podName,
@ -636,11 +642,7 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
 		return true, obj, nil
 	})

-	replicaCalc := &ReplicaCalculator{
-		metricsClient: metricsClient,
-		podsGetter:    testClient.Core(),
-		tolerance:     defaultTestingTolerance,
-	}
+	replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)

 	informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
 	defaultDownscaleForbiddenWindow := 5 * time.Minute
@ -660,6 +662,14 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
 	return hpaController, informerFactory
 }

+func hotCpuCreationTime() metav1.Time {
+	return metav1.Time{Time: time.Now()}
+}
+
+func coolCpuCreationTime() metav1.Time {
+	return metav1.Time{Time: time.Now().Add(-3 * time.Minute)}
+}
+
 func (tc *testCase) runTestWithController(t *testing.T, hpaController *HorizontalController, informerFactory informers.SharedInformerFactory) {
 	stop := make(chan struct{})
 	defer close(stop)
@ -716,6 +726,23 @@ func TestScaleUpUnreadyLessScale(t *testing.T) {
 	tc.runTest(t)
 }

+func TestScaleUpHotCpuLessScale(t *testing.T) {
+	tc := testCase{
+		minReplicas:             2,
+		maxReplicas:             6,
+		initialReplicas:         3,
+		expectedDesiredReplicas: 4,
+		CPUTarget:               30,
+		CPUCurrent:              60,
+		verifyCPUCurrent:        true,
+		reportedLevels:          []uint64{300, 500, 700},
+		reportedCPURequests:     []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		reportedPodStartTime:    []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
+		useMetricsAPI:           true,
+	}
+	tc.runTest(t)
+}
+
 func TestScaleUpUnreadyNoScale(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
@ -738,6 +765,29 @@ func TestScaleUpUnreadyNoScale(t *testing.T) {
 	tc.runTest(t)
 }

+func TestScaleUpHotCpuNoScale(t *testing.T) {
+	tc := testCase{
+		minReplicas:             2,
+		maxReplicas:             6,
+		initialReplicas:         3,
+		expectedDesiredReplicas: 3,
+		CPUTarget:               30,
+		CPUCurrent:              40,
+		verifyCPUCurrent:        true,
+		reportedLevels:          []uint64{400, 500, 700},
+		reportedCPURequests:     []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		reportedPodReadiness:    []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		reportedPodStartTime:    []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
+		useMetricsAPI:           true,
+		expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "ReadyForNewScale",
+		}),
+	}
+	tc.runTest(t)
+}
+
 func TestScaleUpIgnoresFailedPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
@ -818,12 +868,12 @@ func TestScaleUpCM(t *testing.T) {
 	tc.runTest(t)
 }

-func TestScaleUpCMUnreadyLessScale(t *testing.T) {
+func TestScaleUpCMUnreadyAndHotCpuNoLessScale(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
 		maxReplicas:             6,
 		initialReplicas:         3,
-		expectedDesiredReplicas: 4,
+		expectedDesiredReplicas: 6,
 		CPUTarget:               0,
 		metricsTarget: []autoscalingv2.MetricSpec{
 			{
@ -836,17 +886,18 @@ func TestScaleUpCMUnreadyLessScale(t *testing.T) {
 		},
 		reportedLevels:       []uint64{50000, 10000, 30000},
 		reportedPodReadiness: []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
+		reportedPodStartTime: []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
 		reportedCPURequests:  []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 	}
 	tc.runTest(t)
 }

-func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
+func TestScaleUpCMUnreadyandCpuHot(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
 		maxReplicas:             6,
 		initialReplicas:         3,
-		expectedDesiredReplicas: 3,
+		expectedDesiredReplicas: 6,
 		CPUTarget:               0,
 		metricsTarget: []autoscalingv2.MetricSpec{
 			{
@ -859,11 +910,48 @@ func TestScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
 		},
 		reportedLevels:       []uint64{50000, 15000, 30000},
 		reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
+		reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
 		reportedCPURequests:  []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 		expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
 			Type:   autoscalingv2.AbleToScale,
 			Status: v1.ConditionTrue,
-			Reason: "ReadyForNewScale",
+			Reason: "SucceededRescale",
+		}, autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.ScalingLimited,
+			Status: v1.ConditionTrue,
+			Reason: "TooManyReplicas",
+		}),
+	}
+	tc.runTest(t)
+}
+
+func TestScaleUpHotCpuNoScaleWouldScaleDown(t *testing.T) {
+	tc := testCase{
+		minReplicas:             2,
+		maxReplicas:             6,
+		initialReplicas:         3,
+		expectedDesiredReplicas: 6,
+		CPUTarget:               0,
+		metricsTarget: []autoscalingv2.MetricSpec{
+			{
+				Type: autoscalingv2.PodsMetricSourceType,
+				Pods: &autoscalingv2.PodsMetricSource{
+					MetricName:         "qps",
+					TargetAverageValue: resource.MustParse("15.0"),
+				},
+			},
+		},
+		reportedLevels:       []uint64{50000, 15000, 30000},
+		reportedCPURequests:  []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		reportedPodStartTime: []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
+		expectedConditions: statusOkWithOverrides(autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.AbleToScale,
+			Status: v1.ConditionTrue,
+			Reason: "SucceededRescale",
+		}, autoscalingv2.HorizontalPodAutoscalerCondition{
+			Type:   autoscalingv2.ScalingLimited,
+			Status: v1.ConditionTrue,
+			Reason: "TooManyReplicas",
 		}),
 	}
 	tc.runTest(t)
@ -1043,7 +1131,7 @@ func TestScaleDownPerPodCMExternal(t *testing.T) {
 	tc.runTest(t)
 }

-func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
+func TestScaleDownIncludeUnreadyPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
 		maxReplicas:             6,
@ -1060,6 +1148,23 @@ func TestScaleDownIgnoresUnreadyPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestScaleDownIgnoreHotCpuPods(t *testing.T) {
+	tc := testCase{
+		minReplicas:             2,
+		maxReplicas:             6,
+		initialReplicas:         5,
+		expectedDesiredReplicas: 2,
+		CPUTarget:               50,
+		CPUCurrent:              30,
+		verifyCPUCurrent:        true,
+		reportedLevels:          []uint64{100, 300, 500, 250, 250},
+		reportedCPURequests:     []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+		useMetricsAPI:           true,
+		reportedPodStartTime:    []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
+	}
+	tc.runTest(t)
+}
+
 func TestScaleDownIgnoresFailedPods(t *testing.T) {
 	tc := testCase{
 		minReplicas:             2,
@ -1975,7 +2080,7 @@ func TestAvoidUncessaryUpdates(t *testing.T) {
 		verifyCPUCurrent:        true,
 		reportedLevels:          []uint64{400, 500, 700},
 		reportedCPURequests:     []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
-		reportedPodReadiness:    []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		reportedPodStartTime:    []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
 		useMetricsAPI:           true,
 	}
 	testClient, _, _, _, _ := tc.prepareTestClient(t)
--- a/pkg/controller/podautoscaler/legacy_horizontal_test.go
+++ b/pkg/controller/podautoscaler/legacy_horizontal_test.go
@ -222,6 +222,7 @@ func (tc *legacyTestCase) prepareTestClient(t *testing.T) (*fake.Clientset, *sca
 			podName := fmt.Sprintf("%s-%d", podNamePrefix, i)
 			pod := v1.Pod{
 				Status: v1.PodStatus{
+					StartTime: &metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
 					Phase:     v1.PodRunning,
 					Conditions: []v1.PodCondition{
 						{
@ -484,11 +485,7 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
 		return true, obj, nil
 	})

-	replicaCalc := &ReplicaCalculator{
-		metricsClient: metricsClient,
-		podsGetter:    testClient.Core(),
-		tolerance:     defaultTestingTolerance,
-	}
+	replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)

 	informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
 	defaultDownscaleForbiddenWindow := 5 * time.Minute
@ -545,8 +542,7 @@ func TestLegacyScaleUpUnreadyLessScale(t *testing.T) {
 		initialReplicas:      3,
 		desiredReplicas:      4,
 		CPUTarget:            30,
-		CPUCurrent:           60,
-		verifyCPUCurrent:     true,
+		verifyCPUCurrent:     false,
 		reportedLevels:       []uint64{300, 500, 700},
 		reportedCPURequests:  []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
 		reportedPodReadiness: []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue},
@ -634,12 +630,12 @@ func TestLegacyScaleUpCM(t *testing.T) {
 	tc.runTest(t)
 }

-func TestLegacyScaleUpCMUnreadyLessScale(t *testing.T) {
+func TestLegacyScaleUpCMUnreadyNoLessScale(t *testing.T) {
 	tc := legacyTestCase{
 		minReplicas:     2,
 		maxReplicas:     6,
 		initialReplicas: 3,
-		desiredReplicas: 4,
+		desiredReplicas: 6,
 		CPUTarget:       0,
 		metricsTarget: []autoscalingv2.MetricSpec{
 			{
@ -662,7 +658,7 @@ func TestLegacyScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
 		minReplicas:     2,
 		maxReplicas:     6,
 		initialReplicas: 3,
-		desiredReplicas: 3,
+		desiredReplicas: 6,
 		CPUTarget:       0,
 		metricsTarget: []autoscalingv2.MetricSpec{
 			{
--- a/pkg/controller/podautoscaler/legacy_replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/legacy_replica_calculator_test.go
@ -68,6 +68,7 @@ func (tc *legacyReplicaCalcTestCase) prepareTestClient(t *testing.T) *fake.Clien
 			pod := v1.Pod{
 				Status: v1.PodStatus{
 					Phase:     v1.PodRunning,
+					StartTime: &metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
 					Conditions: []v1.PodCondition{
 						{
 							Type:   v1.PodReady,
@ -185,11 +186,7 @@ func (tc *legacyReplicaCalcTestCase) runTest(t *testing.T) {
 	testClient := tc.prepareTestClient(t)
 	metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort)

-	replicaCalc := &ReplicaCalculator{
-		metricsClient: metricsClient,
-		podsGetter:    testClient.Core(),
-		tolerance:     defaultTestingTolerance,
-	}
+	replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)

 	selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
 		MatchLabels: map[string]string{"name": podNamePrefix},
@ -310,10 +307,10 @@ func TestLegacyReplicaCalcScaleUpCM(t *testing.T) {
 	tc.runTest(t)
 }

-func TestLegacyReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
+func TestLegacyReplicaCalcScaleUpCMUnreadyNoLessScale(t *testing.T) {
 	tc := legacyReplicaCalcTestCase{
 		currentReplicas:  3,
-		expectedReplicas: 4,
+		expectedReplicas: 6,
 		podReadiness:     []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
 		metric: &metricInfo{
 			name:                "qps",
@ -325,16 +322,16 @@ func TestLegacyReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
 	tc.runTest(t)
 }

-func TestLegacyReplicaCalcScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
+func TestLegacyReplicaCalcScaleUpCMUnreadyScale(t *testing.T) {
 	tc := legacyReplicaCalcTestCase{
 		currentReplicas:  3,
-		expectedReplicas: 3,
+		expectedReplicas: 7,
 		podReadiness:     []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
 		metric: &metricInfo{
 			name:                "qps",
 			levels:              []int64{50000, 15000, 30000},
 			targetUtilization:   15000,
-			expectedUtilization: 15000,
+			expectedUtilization: 31666,
 		},
 	}
 	tc.runTest(t)
--- a/pkg/controller/podautoscaler/replica_calculator.go
+++ b/pkg/controller/podautoscaler/replica_calculator.go
@ -21,6 +21,7 @@ import (
 	"math"
 	"time"

+	"github.com/golang/glog"
 	autoscaling "k8s.io/api/autoscaling/v2beta1"
 	"k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -29,29 +30,34 @@ import (
 	v1coreclient "k8s.io/client-go/kubernetes/typed/core/v1"
 	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 	metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
+	"runtime/debug"
 )

 const (
+	// TODO(jbartosik): use actual value.
+	cpuSampleWindow = time.Minute
 	// defaultTestingTolerance is default value for calculating when to
 	// scale up/scale down.
 	defaultTestingTolerance                     = 0.1
-
-	// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is
-	// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
-	maxDelayOfInitialReadinessStatus = 10 * time.Second
+	defaultTestingCpuTaintAfterStart            = 2 * time.Minute
+	defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second
 )

 type ReplicaCalculator struct {
 	metricsClient                 metricsclient.MetricsClient
 	podsGetter                    v1coreclient.PodsGetter
 	tolerance                     float64
+	cpuTaintAfterStart            time.Duration
+	delayOfInitialReadinessStatus time.Duration
 }

-func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64) *ReplicaCalculator {
+func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
 	return &ReplicaCalculator{
 		metricsClient:                 metricsClient,
 		podsGetter:                    podsGetter,
 		tolerance:                     tolerance,
+		cpuTaintAfterStart:            cpuTaintAfterStart,
+		delayOfInitialReadinessStatus: delayOfInitialReadinessStatus,
 	}
 }

@ -73,41 +79,11 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
 		return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
 	}

-	requests := make(map[string]int64, itemsLen)
-	readyPodCount := 0
-	unreadyPods := sets.NewString()
-	missingPods := sets.NewString()
-
-	for _, pod := range podList.Items {
-		podSum := int64(0)
-		for _, container := range pod.Spec.Containers {
-			if containerRequest, ok := container.Resources.Requests[resource]; ok {
-				podSum += containerRequest.MilliValue()
-			} else {
-				return 0, 0, 0, time.Time{}, fmt.Errorf("missing request for %s on container %s in pod %s/%s", resource, container.Name, namespace, pod.Name)
-			}
-		}
-
-		requests[pod.Name] = podSum
-
-		if pod.Status.Phase != v1.PodRunning || !podutil.IsPodReady(&pod) {
-			// save this pod name for later, but pretend it doesn't exist for now
-			if pod.Status.Phase != v1.PodFailed {
-				// Failed pods should not be counted as unready pods as they will
-				// not become running anymore.
-				unreadyPods.Insert(pod.Name)
-			}
-			delete(metrics, pod.Name)
-			continue
-		}
-
-		if _, found := metrics[pod.Name]; !found {
-			// save this pod name for later, but pretend it doesn't exist for now
-			missingPods.Insert(pod.Name)
-			continue
-		}
-
-		readyPodCount++
+	readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
+	removeMetricsForPods(metrics, ignoredPods)
+	requests, err := calculatePodRequests(podList.Items, resource)
+	if err != nil {
+		return 0, 0, 0, time.Time{}, err
 	}

 	if len(metrics) == 0 {
@ -119,8 +95,8 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
 		return 0, 0, 0, time.Time{}, err
 	}

-	rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0
-	if !rebalanceUnready && len(missingPods) == 0 {
+	rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0
+	if !rebalanceIgnored && len(missingPods) == 0 {
 		if math.Abs(1.0-usageRatio) <= c.tolerance {
 			// return the current replicas if the change would be too small
 			return currentReplicas, utilization, rawUtilization, timestamp, nil
@ -144,9 +120,9 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
 		}
 	}

-	if rebalanceUnready {
+	if rebalanceIgnored {
 		// on a scale-up, treat unready pods as using 0% of the resource request
-		for podName := range unreadyPods {
+		for podName := range ignoredPods {
 			metrics[podName] = 0
 		}
 	}
@ -176,7 +152,7 @@ func (c *ReplicaCalculator) GetRawResourceReplicas(currentReplicas int32, target
 		return 0, 0, time.Time{}, fmt.Errorf("unable to get metrics for resource %s: %v", resource, err)
 	}

-	replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector)
+	replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, resource)
 	return replicaCount, utilization, timestamp, err
 }

@ -189,12 +165,12 @@ func (c *ReplicaCalculator) GetMetricReplicas(currentReplicas int32, targetUtili
 		return 0, 0, time.Time{}, fmt.Errorf("unable to get metric %s: %v", metricName, err)
 	}

-	replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector)
+	replicaCount, utilization, err = c.calcPlainMetricReplicas(metrics, currentReplicas, targetUtilization, namespace, selector, v1.ResourceName(""))
 	return replicaCount, utilization, timestamp, err
 }

 // calcPlainMetricReplicas calculates the desired replicas for plain (i.e. non-utilization percentage) metrics.
-func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector) (replicaCount int32, utilization int64, err error) {
+func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMetricsInfo, currentReplicas int32, targetUtilization int64, namespace string, selector labels.Selector, resource v1.ResourceName) (replicaCount int32, utilization int64, err error) {
 	podList, err := c.podsGetter.Pods(namespace).List(metav1.ListOptions{LabelSelector: selector.String()})
 	if err != nil {
 		return 0, 0, fmt.Errorf("unable to get pods while calculating replica count: %v", err)
@ -204,26 +180,8 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
 		return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
 	}

-	readyPodCount := 0
-	unreadyPods := sets.NewString()
-	missingPods := sets.NewString()
-
-	for _, pod := range podList.Items {
-		if pod.Status.Phase != v1.PodRunning || !hasPodBeenReadyBefore(&pod) {
-			// save this pod name for later, but pretend it doesn't exist for now
-			unreadyPods.Insert(pod.Name)
-			delete(metrics, pod.Name)
-			continue
-		}
-
-		if _, found := metrics[pod.Name]; !found {
-			// save this pod name for later, but pretend it doesn't exist for now
-			missingPods.Insert(pod.Name)
-			continue
-		}
-
-		readyPodCount++
-	}
+	readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
+	removeMetricsForPods(metrics, ignoredPods)

 	if len(metrics) == 0 {
 		return 0, 0, fmt.Errorf("did not receive metrics for any ready pods")
@ -231,9 +189,9 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet

 	usageRatio, utilization := metricsclient.GetMetricUtilizationRatio(metrics, targetUtilization)

-	rebalanceUnready := len(unreadyPods) > 0 && usageRatio > 1.0
+	rebalanceIgnored := len(ignoredPods) > 0 && usageRatio > 1.0

-	if !rebalanceUnready && len(missingPods) == 0 {
+	if !rebalanceIgnored && len(missingPods) == 0 {
 		if math.Abs(1.0-usageRatio) <= c.tolerance {
 			// return the current replicas if the change would be too small
 			return currentReplicas, utilization, nil
@ -257,9 +215,9 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
 		}
 	}

-	if rebalanceUnready {
+	if rebalanceIgnored {
 		// on a scale-up, treat unready pods as using 0% of the resource request
-		for podName := range unreadyPods {
+		for podName := range ignoredPods {
 			metrics[podName] = 0
 		}
 	}
@ -386,21 +344,59 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
 	return replicaCount, utilization, timestamp, nil
 }

-// hasPodBeenReadyBefore returns true if the pod is ready or if it's not ready
-func hasPodBeenReadyBefore(pod *v1.Pod) bool {
-	_, readyCondition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
-	if readyCondition == nil {
-		return false
+func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
+	missingPods = sets.NewString()
+	ignoredPods = sets.NewString()
+	glog.Errorf("groupPods stack: %v", string(debug.Stack()))
+	for _, pod := range pods {
+		if pod.Status.Phase == v1.PodFailed {
+			continue
 		}
-	if readyCondition.Status == v1.ConditionTrue {
-		return true
+		if _, found := metrics[pod.Name]; !found {
+			missingPods.Insert(pod.Name)
+			continue
 		}
-	lastReady := readyCondition.LastTransitionTime.Time
-	if pod.Status.StartTime == nil {
-		return false
+		if resource == v1.ResourceCPU {
+			var ignorePod bool
+			_, condition := podutil.GetPodCondition(&pod.Status, v1.PodReady)
+			if condition == nil || pod.Status.StartTime == nil {
+				ignorePod = true
+			} else {
+				if condition.Status == v1.ConditionTrue {
+					ignorePod = pod.Status.StartTime.Add(cpuTaintAfterStart + cpuSampleWindow).After(time.Now())
+				} else {
+					ignorePod = pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
+				}
+			}
+			if ignorePod {
+				ignoredPods.Insert(pod.Name)
+				continue
+			}
+		}
+
+		readyPodCount++
+	}
+	return
+}
+
+func calculatePodRequests(pods []v1.Pod, resource v1.ResourceName) (map[string]int64, error) {
+	requests := make(map[string]int64, len(pods))
+	for _, pod := range pods {
+		podSum := int64(0)
+		for _, container := range pod.Spec.Containers {
+			if containerRequest, ok := container.Resources.Requests[resource]; ok {
+				podSum += containerRequest.MilliValue()
+			} else {
+				return nil, fmt.Errorf("missing request for %s", resource)
+			}
+		}
+		requests[pod.Name] = podSum
+	}
+	return requests, nil
+}
+
+func removeMetricsForPods(metrics metricsclient.PodMetricsInfo, pods sets.String) {
+	for _, pod := range pods.UnsortedList() {
+		delete(metrics, pod)
 	}
-	started := pod.Status.StartTime.Time
-	// If last status change was longer than maxDelayOfInitialReadinessStatus after the pod was
-	// created assume it was ready in the past.
-	return lastReady.After(started.Add(maxDelayOfInitialReadinessStatus))
 }
--- a/pkg/controller/podautoscaler/replica_calculator_test.go
+++ b/pkg/controller/podautoscaler/replica_calculator_test.go
@ -29,10 +29,12 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/sets"
 	"k8s.io/client-go/kubernetes/fake"
 	core "k8s.io/client-go/testing"
 	"k8s.io/kubernetes/pkg/api/legacyscheme"
 	"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
+	metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
 	cmapi "k8s.io/metrics/pkg/apis/custom_metrics/v1beta1"
 	emapi "k8s.io/metrics/pkg/apis/external_metrics/v1beta1"
 	metricsapi "k8s.io/metrics/pkg/apis/metrics/v1beta1"
@ -88,6 +90,7 @@ type replicaCalcTestCase struct {
 	metric   *metricInfo

 	podReadiness []v1.ConditionStatus
+	podStartTime []metav1.Time
 	podPhase     []v1.PodPhase
 }

@ -111,6 +114,10 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
 			if tc.podReadiness != nil && i < len(tc.podReadiness) {
 				podReadiness = tc.podReadiness[i]
 			}
+			var podStartTime metav1.Time
+			if tc.podStartTime != nil {
+				podStartTime = tc.podStartTime[i]
+			}
 			podPhase := v1.PodRunning
 			if tc.podPhase != nil {
 				podPhase = tc.podPhase[i]
@ -119,6 +126,7 @@ func (tc *replicaCalcTestCase) prepareTestClientSet() *fake.Clientset {
 			pod := v1.Pod{
 				Status: v1.PodStatus{
 					Phase:     podPhase,
+					StartTime: &podStartTime,
 					Conditions: []v1.PodCondition{
 						{
 							Type:   v1.PodReady,
@ -316,11 +324,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
 	testClient, testMetricsClient, testCMClient, testEMClient := tc.prepareTestClient(t)
 	metricsClient := metrics.NewRESTMetricsClient(testMetricsClient.MetricsV1beta1(), testCMClient, testEMClient)

-	replicaCalc := &ReplicaCalculator{
-		metricsClient: metricsClient,
-		podsGetter:    testClient.Core(),
-		tolerance:     defaultTestingTolerance,
-	}
+	replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)

 	selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
 		MatchLabels: map[string]string{"name": podNamePrefix},
@ -439,6 +443,24 @@ func TestReplicaCalcScaleUpUnreadyLessScale(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleUpHotCpuLessScale(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  3,
+		expectedReplicas: 4,
+		podStartTime:     []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{300, 500, 700},
+
+			targetUtilization:   30,
+			expectedUtilization: 60,
+			expectedValue:       numContainersPerPod * 600,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
@ -457,6 +479,25 @@ func TestReplicaCalcScaleUpUnreadyNoScale(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleHotCpuNoScale(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  3,
+		expectedReplicas: 3,
+		podReadiness:     []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionFalse, v1.ConditionFalse},
+		podStartTime:     []metav1.Time{coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{400, 500, 700},
+
+			targetUtilization:   30,
+			expectedUtilization: 40,
+			expectedValue:       numContainersPerPod * 400,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleUpIgnoresFailedPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  2,
@ -491,11 +532,12 @@ func TestReplicaCalcScaleUpCM(t *testing.T) {
 	tc.runTest(t)
 }

-func TestReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
+func TestReplicaCalcScaleUpCMUnreadyHotCpuNoLessScale(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
-		expectedReplicas: 4,
+		expectedReplicas: 6,
 		podReadiness:     []v1.ConditionStatus{v1.ConditionTrue, v1.ConditionTrue, v1.ConditionFalse},
+		podStartTime:     []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
 		metric: &metricInfo{
 			name:                "qps",
 			levels:              []int64{50000, 10000, 30000},
@ -507,16 +549,17 @@ func TestReplicaCalcScaleUpCMUnreadyLessScale(t *testing.T) {
 	tc.runTest(t)
 }

-func TestReplicaCalcScaleUpCMUnreadyNoScaleWouldScaleDown(t *testing.T) {
+func TestReplicaCalcScaleUpCMUnreadyHotCpuScaleWouldScaleDown(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
-		expectedReplicas: 3,
+		expectedReplicas: 7,
 		podReadiness:     []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionFalse},
+		podStartTime:     []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime()},
 		metric: &metricInfo{
 			name:                "qps",
 			levels:              []int64{50000, 15000, 30000},
 			targetUtilization:   15000,
-			expectedUtilization: 15000,
+			expectedUtilization: 31666,
 			metricType:          podMetric,
 		},
 	}
@ -709,7 +752,7 @@ func TestReplicaCalcScaleDownPerPodCMExternal(t *testing.T) {
 	tc.runTest(t)
 }

-func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) {
+func TestReplicaCalcScaleDownIncludeUnreadyPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  5,
 		expectedReplicas: 2,
@ -727,6 +770,24 @@ func TestReplicaCalcScaleDownIgnoresUnreadyPods(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcScaleDownIgnoreHotCpuPods(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  5,
+		expectedReplicas: 2,
+		podStartTime:     []metav1.Time{coolCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime(), hotCpuCreationTime(), hotCpuCreationTime()},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{100, 300, 500, 250, 250},
+
+			targetUtilization:   50,
+			expectedUtilization: 30,
+			expectedValue:       numContainersPerPod * 300,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcScaleDownIgnoresFailedPods(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  5,
@ -943,7 +1004,7 @@ func TestReplicaCalcMissingMetricsNoChangeLt(t *testing.T) {
 	tc.runTest(t)
 }

-func TestReplicaCalcMissingMetricsUnreadyNoChange(t *testing.T) {
+func TestReplicaCalcMissingMetricsUnreadyChange(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
 		expectedReplicas: 3,
@ -961,6 +1022,24 @@ func TestReplicaCalcMissingMetricsUnreadyNoChange(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcMissingMetricsHotCpuNoChange(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  3,
+		expectedReplicas: 3,
+		podStartTime:     []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{100, 450},
+
+			targetUtilization:   50,
+			expectedUtilization: 45,
+			expectedValue:       numContainersPerPod * 450,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcMissingMetricsUnreadyScaleUp(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  3,
@ -979,6 +1058,25 @@ func TestReplicaCalcMissingMetricsUnreadyScaleUp(t *testing.T) {
 	tc.runTest(t)
 }

+func TestReplicaCalcMissingMetricsHotCpuScaleUp(t *testing.T) {
+	tc := replicaCalcTestCase{
+		currentReplicas:  3,
+		expectedReplicas: 4,
+		podReadiness:     []v1.ConditionStatus{v1.ConditionFalse, v1.ConditionTrue, v1.ConditionTrue},
+		podStartTime:     []metav1.Time{hotCpuCreationTime(), coolCpuCreationTime(), coolCpuCreationTime()},
+		resource: &resourceInfo{
+			name:     v1.ResourceCPU,
+			requests: []resource.Quantity{resource.MustParse("1.0"), resource.MustParse("1.0"), resource.MustParse("1.0")},
+			levels:   []int64{100, 2000},
+
+			targetUtilization:   50,
+			expectedUtilization: 200,
+			expectedValue:       numContainersPerPod * 2000,
+		},
+	}
+	tc.runTest(t)
+}
+
 func TestReplicaCalcMissingMetricsUnreadyScaleDown(t *testing.T) {
 	tc := replicaCalcTestCase{
 		currentReplicas:  4,
@ -1069,74 +1167,183 @@ func TestReplicaCalcComputedToleranceAlgImplementation(t *testing.T) {
 	tc.runTest(t)
 }

-func TestHasPodBeenReadyBefore(t *testing.T) {
+func TestGroupPods(t *testing.T) {
 	tests := []struct {
 		name                string
-		conditions []v1.PodCondition
-		started    time.Time
-		expected   bool
+		pods                []v1.Pod
+		metrics             metricsclient.PodMetricsInfo
+		resource            v1.ResourceName
+		expectReadyPodCount int
+		expectUnreadyPods   sets.String
+		expectMissingPods   sets.String
 	}{
 		{
-			"initially unready",
-			[]v1.PodCondition{
+			"void",
+			[]v1.Pod{},
+			metricsclient.PodMetricsInfo{},
+			v1.ResourceName(""),
+			0,
+			sets.NewString(),
+			sets.NewString(),
+		},
+		{
+			"a ready pod",
+			[]v1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "bentham",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+					},
+				},
+			},
+			metricsclient.PodMetricsInfo{
+				"bentham": 1,
+			},
+			v1.ResourceName("hedons"),
+			1,
+			sets.NewString(),
+			sets.NewString(),
+		},
+		{
+			"an unready pod",
+			[]v1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "lucretius",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+						StartTime: &metav1.Time{
+							Time: time.Now(),
+						},
+					},
+				},
+			},
+			metricsclient.PodMetricsInfo{
+				"lucretius": 1,
+			},
+			v1.ResourceCPU,
+			0,
+			sets.NewString("lucretius"),
+			sets.NewString(),
+		},
+		{
+			"a ready cpu pod",
+			[]v1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "niccolo",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+						StartTime: &metav1.Time{
+							Time: time.Now().Add(-3 * time.Minute),
+						},
+						Conditions: []v1.PodCondition{
 							{
 								Type:               v1.PodReady,
-					LastTransitionTime: metav1.Time{
-						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
-					},
-					Status: v1.ConditionFalse,
-				},
-			},
-			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
-			false,
-		},
-		{
-			"currently unready",
-			[]v1.PodCondition{
-				{
-					Type: v1.PodReady,
-					LastTransitionTime: metav1.Time{
-						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
-					},
-					Status: v1.ConditionFalse,
-				},
-			},
-			metav1.Date(2018, 7, 25, 17, 0, 0, 0, time.UTC).Time,
-			true,
-		},
-		{
-			"currently ready",
-			[]v1.PodCondition{
-				{
-					Type: v1.PodReady,
-					LastTransitionTime: metav1.Time{
-						Time: metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
-					},
+								LastTransitionTime: metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
 								Status:             v1.ConditionTrue,
 							},
 						},
-			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
-			true,
+					},
+				},
+			},
+			metricsclient.PodMetricsInfo{
+				"niccolo": 1,
+			},
+			v1.ResourceCPU,
+			1,
+			sets.NewString(),
+			sets.NewString(),
 		},
 		{
-			"no ready status",
-			[]v1.PodCondition{},
-			metav1.Date(2018, 7, 25, 17, 10, 0, 0, time.UTC).Time,
-			false,
+			"a missing pod",
+			[]v1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "epicurus",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+						StartTime: &metav1.Time{
+							Time: time.Now().Add(-3 * time.Minute),
+						},
+					},
+				},
+			},
+			metricsclient.PodMetricsInfo{},
+			v1.ResourceCPU,
+			0,
+			sets.NewString(),
+			sets.NewString("epicurus"),
+		},
+		{
+			"all together",
+			[]v1.Pod{
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "lucretius",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+						StartTime: &metav1.Time{
+							Time: time.Now(),
+						},
+					},
+				},
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "niccolo",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+						StartTime: &metav1.Time{
+							Time: time.Now().Add(-3 * time.Minute),
+						},
+						Conditions: []v1.PodCondition{
+							{
+								Type:               v1.PodReady,
+								LastTransitionTime: metav1.Time{Time: time.Now().Add(-3 * time.Minute)},
+								Status:             v1.ConditionTrue,
+							},
+						},
+					},
+				},
+				{
+					ObjectMeta: metav1.ObjectMeta{
+						Name: "epicurus",
+					},
+					Status: v1.PodStatus{
+						Phase: v1.PodSucceeded,
+						StartTime: &metav1.Time{
+							Time: time.Now().Add(-3 * time.Minute),
+						},
+					},
+				},
+			},
+			metricsclient.PodMetricsInfo{
+				"lucretius": 1,
+				"niccolo":   1,
+			},
+			v1.ResourceCPU,
+			1,
+			sets.NewString("lucretius"),
+			sets.NewString("epicurus"),
 		},
 	}
 	for _, tc := range tests {
-		pod := &v1.Pod{
-			Status: v1.PodStatus{
-				Conditions: tc.conditions,
-				StartTime: &metav1.Time{
-					Time: tc.started,
-				},
-			},
+		readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
+		if readyPodCount != tc.expectReadyPodCount {
+			t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
 		}
-		got := hasPodBeenReadyBefore(pod)
-		if got != tc.expected {
-			t.Errorf("[TestHasPodBeenReadyBefore.%s] got %v, want %v", tc.name, got, tc.expected)
+		if !unreadyPods.Equal(tc.expectUnreadyPods) {
+			t.Errorf("%s got unreadyPods %v, expected %v", tc.name, unreadyPods, tc.expectUnreadyPods)
+		}
+		if !missingPods.Equal(tc.expectMissingPods) {
+			t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)
 		}
 	}
 }