Make HPA more configurable

Duration of initialization taint on CPU and window of initial readiness
setting controlled by flags.

Adding API violation exceptions following example of e50340ee23
pull/8/head
Joachim Bartosik 2018-08-21 10:18:31 +02:00
parent 7d6676eab1
commit 4fd6a1684d
16 changed files with 75 additions and 38 deletions

View File

@ -93,6 +93,8 @@ API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alp
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerDownscaleForbiddenWindow API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerDownscaleForbiddenWindow
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerTolerance API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerTolerance
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerUseRESTClients API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerUseRESTClients
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerCPUTaintPeriod
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerInitialReadinessDelay
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,JobControllerConfiguration,ConcurrentJobSyncs API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,JobControllerConfiguration,ConcurrentJobSyncs
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Port API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Port
API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Address API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Address

View File

@ -84,6 +84,8 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me
metricsClient, metricsClient,
hpaClient.CoreV1(), hpaClient.CoreV1(),
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance, ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod.Duration,
ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration,
) )
go podautoscaler.NewHorizontalController( go podautoscaler.NewHorizontalController(
hpaClient.CoreV1(), hpaClient.CoreV1(),

View File

@ -30,6 +30,8 @@ type HPAControllerOptions struct {
HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration
HorizontalPodAutoscalerUpscaleForbiddenWindow metav1.Duration HorizontalPodAutoscalerUpscaleForbiddenWindow metav1.Duration
HorizontalPodAutoscalerSyncPeriod metav1.Duration HorizontalPodAutoscalerSyncPeriod metav1.Duration
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
} }
// AddFlags adds flags related to HPAController for controller manager to the specified FlagSet. // AddFlags adds flags related to HPAController for controller manager to the specified FlagSet.
@ -44,6 +46,8 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) {
fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.") fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.")
fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.") fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.")
fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.") fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.")
fs.DurationVar(&o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "horizontal-pod-autoscaler-cpu-taint-period", o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "The period after pod start for which CPU samples are considered tainted by initialization.")
fs.DurationVar(&o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "horizontal-pod-autoscaler-initial-readiness-delay", o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.")
} }
// ApplyTo fills up HPAController config with options. // ApplyTo fills up HPAController config with options.

View File

@ -134,6 +134,8 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) {
HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod, HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod,
HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow, HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow,
HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow, HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow,
HorizontalPodAutoscalerCPUTaintPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod,
HorizontalPodAutoscalerInitialReadinessDelay: componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay,
HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance, HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance,
HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients, HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients,
}, },

View File

@ -73,6 +73,8 @@ func TestAddFlags(t *testing.T) {
"--horizontal-pod-autoscaler-downscale-delay=2m", "--horizontal-pod-autoscaler-downscale-delay=2m",
"--horizontal-pod-autoscaler-sync-period=45s", "--horizontal-pod-autoscaler-sync-period=45s",
"--horizontal-pod-autoscaler-upscale-delay=1m", "--horizontal-pod-autoscaler-upscale-delay=1m",
"--horizontal-pod-autoscaler-cpu-taint-period=90s",
"--horizontal-pod-autoscaler-initial-readiness-delay=50s",
"--http2-max-streams-per-connection=47", "--http2-max-streams-per-connection=47",
"--kube-api-burst=100", "--kube-api-burst=100",
"--kube-api-content-type=application/json", "--kube-api-content-type=application/json",
@ -185,6 +187,8 @@ func TestAddFlags(t *testing.T) {
HorizontalPodAutoscalerSyncPeriod: metav1.Duration{Duration: 45 * time.Second}, HorizontalPodAutoscalerSyncPeriod: metav1.Duration{Duration: 45 * time.Second},
HorizontalPodAutoscalerUpscaleForbiddenWindow: metav1.Duration{Duration: 1 * time.Minute}, HorizontalPodAutoscalerUpscaleForbiddenWindow: metav1.Duration{Duration: 1 * time.Minute},
HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute}, HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute},
HorizontalPodAutoscalerCPUTaintPeriod: metav1.Duration{Duration: 90 * time.Second},
HorizontalPodAutoscalerInitialReadinessDelay: metav1.Duration{Duration: 50 * time.Second},
HorizontalPodAutoscalerTolerance: 0.1, HorizontalPodAutoscalerTolerance: 0.1,
HorizontalPodAutoscalerUseRESTClients: true, HorizontalPodAutoscalerUseRESTClients: true,
}, },

View File

@ -371,6 +371,14 @@ type HPAControllerConfiguration struct {
// through the kube-aggregator when enabled, instead of using the legacy metrics client // through the kube-aggregator when enabled, instead of using the legacy metrics client
// through the API server proxy. // through the API server proxy.
HorizontalPodAutoscalerUseRESTClients bool HorizontalPodAutoscalerUseRESTClients bool
// HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU
// samples from the pod contaminated by initialization and disregard them.
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
// HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness
// changes are treated as readiness being set for the first time. The only effect of this is that
// HPA will disregard CPU samples from unready pods that had last readiness change during that
// period.
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
} }
type JobControllerConfiguration struct { type JobControllerConfiguration struct {

View File

@ -95,6 +95,14 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *KubeControllerManagerCo
if obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow == zero { if obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow == zero {
obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow = metav1.Duration{Duration: 3 * time.Minute} obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow = metav1.Duration{Duration: 3 * time.Minute}
} }
if obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod == zero {
// Assuming CPU is collected every minute and initialization takes another minute HPA should
// disregard samples from first two minutes as contaminated by initialization.
obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod = metav1.Duration{Duration: time.Minute}
}
if obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay == zero {
obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay = metav1.Duration{Duration: 30 * time.Second}
}
if obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow == zero { if obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow == zero {
obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow = metav1.Duration{Duration: 5 * time.Minute} obj.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow = metav1.Duration{Duration: 5 * time.Minute}
} }

View File

@ -416,6 +416,14 @@ type HPAControllerConfiguration struct {
// through the kube-aggregator when enabled, instead of using the legacy metrics client // through the kube-aggregator when enabled, instead of using the legacy metrics client
// through the API server proxy. // through the API server proxy.
HorizontalPodAutoscalerUseRESTClients *bool HorizontalPodAutoscalerUseRESTClients *bool
// HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU
// samples from the pod contaminated by initialization and disregard them.
HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration
// HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness
// changes are treated as readiness being set for the first time. The only effect of this is that
// HPA will disregard CPU samples from unready pods that had last readiness change during that
// period.
HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration
} }
type JobControllerConfiguration struct { type JobControllerConfiguration struct {

View File

@ -667,6 +667,8 @@ func autoConvert_v1alpha1_HPAControllerConfiguration_To_componentconfig_HPAContr
if err := v1.Convert_Pointer_bool_To_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil { if err := v1.Convert_Pointer_bool_To_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil {
return err return err
} }
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return nil return nil
} }
@ -683,6 +685,8 @@ func autoConvert_componentconfig_HPAControllerConfiguration_To_v1alpha1_HPAContr
if err := v1.Convert_bool_To_Pointer_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil { if err := v1.Convert_bool_To_Pointer_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil {
return err return err
} }
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return nil return nil
} }

View File

@ -242,6 +242,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati
*out = new(bool) *out = new(bool)
**out = **in **out = **in
} }
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return return
} }

View File

@ -232,6 +232,8 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati
out.HorizontalPodAutoscalerSyncPeriod = in.HorizontalPodAutoscalerSyncPeriod out.HorizontalPodAutoscalerSyncPeriod = in.HorizontalPodAutoscalerSyncPeriod
out.HorizontalPodAutoscalerUpscaleForbiddenWindow = in.HorizontalPodAutoscalerUpscaleForbiddenWindow out.HorizontalPodAutoscalerUpscaleForbiddenWindow = in.HorizontalPodAutoscalerUpscaleForbiddenWindow
out.HorizontalPodAutoscalerDownscaleForbiddenWindow = in.HorizontalPodAutoscalerDownscaleForbiddenWindow out.HorizontalPodAutoscalerDownscaleForbiddenWindow = in.HorizontalPodAutoscalerDownscaleForbiddenWindow
out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod
out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay
return return
} }

View File

@ -642,11 +642,7 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform
return true, obj, nil return true, obj, nil
}) })
replicaCalc := &ReplicaCalculator{ replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultDownscaleForbiddenWindow := 5 * time.Minute defaultDownscaleForbiddenWindow := 5 * time.Minute

View File

@ -485,11 +485,7 @@ func (tc *legacyTestCase) runTest(t *testing.T) {
return true, obj, nil return true, obj, nil
}) })
replicaCalc := &ReplicaCalculator{ replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc())
defaultDownscaleForbiddenWindow := 5 * time.Minute defaultDownscaleForbiddenWindow := 5 * time.Minute

View File

@ -186,11 +186,7 @@ func (tc *legacyReplicaCalcTestCase) runTest(t *testing.T) {
testClient := tc.prepareTestClient(t) testClient := tc.prepareTestClient(t)
metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort) metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort)
replicaCalc := &ReplicaCalculator{ replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{"name": podNamePrefix}, MatchLabels: map[string]string{"name": podNamePrefix},

View File

@ -21,6 +21,7 @@ import (
"math" "math"
"time" "time"
"github.com/golang/glog"
autoscaling "k8s.io/api/autoscaling/v2beta1" autoscaling "k8s.io/api/autoscaling/v2beta1"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -29,29 +30,34 @@ import (
v1coreclient "k8s.io/client-go/kubernetes/typed/core/v1" v1coreclient "k8s.io/client-go/kubernetes/typed/core/v1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod" podutil "k8s.io/kubernetes/pkg/api/v1/pod"
metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" metricsclient "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
"runtime/debug"
) )
const ( const (
// TODO(jbartosik): use actual value.
cpuSampleWindow = time.Minute
// defaultTestingTolerance is default value for calculating when to // defaultTestingTolerance is default value for calculating when to
// scale up/scale down. // scale up/scale down.
defaultTestingTolerance = 0.1 defaultTestingTolerance = 0.1
defaultTestingCpuTaintAfterStart = 2 * time.Minute
// Pod begins existence as unready. If pod is unready and timestamp of last pod readiness change is defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second
// less than maxDelayOfInitialReadinessStatus after pod start we assume it has never been ready.
maxDelayOfInitialReadinessStatus = 10 * time.Second
) )
type ReplicaCalculator struct { type ReplicaCalculator struct {
metricsClient metricsclient.MetricsClient metricsClient metricsclient.MetricsClient
podsGetter v1coreclient.PodsGetter podsGetter v1coreclient.PodsGetter
tolerance float64 tolerance float64
cpuTaintAfterStart time.Duration
delayOfInitialReadinessStatus time.Duration
} }
func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64) *ReplicaCalculator { func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator {
return &ReplicaCalculator{ return &ReplicaCalculator{
metricsClient: metricsClient, metricsClient: metricsClient,
podsGetter: podsGetter, podsGetter: podsGetter,
tolerance: tolerance, tolerance: tolerance,
cpuTaintAfterStart: cpuTaintAfterStart,
delayOfInitialReadinessStatus: delayOfInitialReadinessStatus,
} }
} }
@ -73,7 +79,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti
return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count") return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count")
} }
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource) readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
removeMetricsForPods(metrics, ignoredPods) removeMetricsForPods(metrics, ignoredPods)
requests, err := calculatePodRequests(podList.Items, resource) requests, err := calculatePodRequests(podList.Items, resource)
if err != nil { if err != nil {
@ -174,7 +180,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet
return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count") return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count")
} }
readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource) readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus)
removeMetricsForPods(metrics, ignoredPods) removeMetricsForPods(metrics, ignoredPods)
if len(metrics) == 0 { if len(metrics) == 0 {
@ -338,9 +344,10 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3
return replicaCount, utilization, timestamp, nil return replicaCount, utilization, timestamp, nil
} }
func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) { func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) {
missingPods = sets.NewString() missingPods = sets.NewString()
ignoredPods = sets.NewString() ignoredPods = sets.NewString()
glog.Errorf("groupPods stack: %v", string(debug.Stack()))
for _, pod := range pods { for _, pod := range pods {
if pod.Status.Phase == v1.PodFailed { if pod.Status.Phase == v1.PodFailed {
continue continue
@ -356,9 +363,9 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.
ignorePod = true ignorePod = true
} else { } else {
if condition.Status == v1.ConditionTrue { if condition.Status == v1.ConditionTrue {
ignorePod = pod.Status.StartTime.Add(2 * time.Minute).After(time.Now()) ignorePod = pod.Status.StartTime.Add(cpuTaintAfterStart + cpuSampleWindow).After(time.Now())
} else { } else {
ignorePod = pod.Status.StartTime.Add(maxDelayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time) ignorePod = pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time)
} }
} }
if ignorePod { if ignorePod {

View File

@ -324,11 +324,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) {
testClient, testMetricsClient, testCMClient, testEMClient := tc.prepareTestClient(t) testClient, testMetricsClient, testCMClient, testEMClient := tc.prepareTestClient(t)
metricsClient := metrics.NewRESTMetricsClient(testMetricsClient.MetricsV1beta1(), testCMClient, testEMClient) metricsClient := metrics.NewRESTMetricsClient(testMetricsClient.MetricsV1beta1(), testCMClient, testEMClient)
replicaCalc := &ReplicaCalculator{ replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
metricsClient: metricsClient,
podsGetter: testClient.Core(),
tolerance: defaultTestingTolerance,
}
selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{
MatchLabels: map[string]string{"name": podNamePrefix}, MatchLabels: map[string]string{"name": podNamePrefix},
@ -1339,7 +1335,7 @@ func TestGroupPods(t *testing.T) {
}, },
} }
for _, tc := range tests { for _, tc := range tests {
readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource) readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus)
if readyPodCount != tc.expectReadyPodCount { if readyPodCount != tc.expectReadyPodCount {
t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount) t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount)
} }