diff --git a/api/api-rules/violation_exceptions.list b/api/api-rules/violation_exceptions.list index 891b48be7e..8129b04508 100644 --- a/api/api-rules/violation_exceptions.list +++ b/api/api-rules/violation_exceptions.list @@ -93,7 +93,7 @@ API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alp API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerDownscaleForbiddenWindow API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerTolerance API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerUseRESTClients -API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerCPUTaintPeriod +API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerCPUInitializationPeriod API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,HPAControllerConfiguration,HorizontalPodAutoscalerInitialReadinessDelay API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,JobControllerConfiguration,ConcurrentJobSyncs API rule violation: names_match,k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1,KubeCloudSharedConfiguration,Port diff --git a/cmd/kube-controller-manager/app/autoscaling.go b/cmd/kube-controller-manager/app/autoscaling.go index ece5f9b239..9f86fe7411 100644 --- a/cmd/kube-controller-manager/app/autoscaling.go +++ b/cmd/kube-controller-manager/app/autoscaling.go @@ -84,7 +84,7 @@ func startHPAControllerWithMetricsClient(ctx ControllerContext, metricsClient me metricsClient, hpaClient.CoreV1(), ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerTolerance, - ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod.Duration, + ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerCPUInitializationPeriod.Duration, ctx.ComponentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay.Duration, ) go podautoscaler.NewHorizontalController( diff --git a/cmd/kube-controller-manager/app/options/hpacontroller.go b/cmd/kube-controller-manager/app/options/hpacontroller.go index f5acf567b1..55d460e2dc 100644 --- a/cmd/kube-controller-manager/app/options/hpacontroller.go +++ b/cmd/kube-controller-manager/app/options/hpacontroller.go @@ -30,7 +30,7 @@ type HPAControllerOptions struct { HorizontalPodAutoscalerDownscaleForbiddenWindow metav1.Duration HorizontalPodAutoscalerUpscaleForbiddenWindow metav1.Duration HorizontalPodAutoscalerSyncPeriod metav1.Duration - HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration + HorizontalPodAutoscalerCPUInitializationPeriod metav1.Duration HorizontalPodAutoscalerInitialReadinessDelay metav1.Duration } @@ -46,7 +46,7 @@ func (o *HPAControllerOptions) AddFlags(fs *pflag.FlagSet) { fs.DurationVar(&o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "horizontal-pod-autoscaler-downscale-delay", o.HorizontalPodAutoscalerDownscaleForbiddenWindow.Duration, "The period since last downscale, before another downscale can be performed in horizontal pod autoscaler.") fs.Float64Var(&o.HorizontalPodAutoscalerTolerance, "horizontal-pod-autoscaler-tolerance", o.HorizontalPodAutoscalerTolerance, "The minimum change (from 1.0) in the desired-to-actual metrics ratio for the horizontal pod autoscaler to consider scaling.") fs.BoolVar(&o.HorizontalPodAutoscalerUseRESTClients, "horizontal-pod-autoscaler-use-rest-clients", o.HorizontalPodAutoscalerUseRESTClients, "If set to true, causes the horizontal pod autoscaler controller to use REST clients through the kube-aggregator, instead of using the legacy metrics client through the API server proxy. This is required for custom metrics support in the horizontal pod autoscaler.") - fs.DurationVar(&o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "horizontal-pod-autoscaler-cpu-taint-period", o.HorizontalPodAutoscalerCPUTaintPeriod.Duration, "The period after pod start for which CPU samples are considered tainted by initialization.") + fs.DurationVar(&o.HorizontalPodAutoscalerCPUInitializationPeriod.Duration, "horizontal-pod-autoscaler-cpu-initialization-period", o.HorizontalPodAutoscalerCPUInitializationPeriod.Duration, "The period after pod start when CPU samples might be skipped.") fs.DurationVar(&o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "horizontal-pod-autoscaler-initial-readiness-delay", o.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.") } @@ -60,6 +60,8 @@ func (o *HPAControllerOptions) ApplyTo(cfg *componentconfig.HPAControllerConfigu cfg.HorizontalPodAutoscalerDownscaleForbiddenWindow = o.HorizontalPodAutoscalerDownscaleForbiddenWindow cfg.HorizontalPodAutoscalerTolerance = o.HorizontalPodAutoscalerTolerance cfg.HorizontalPodAutoscalerUseRESTClients = o.HorizontalPodAutoscalerUseRESTClients + cfg.HorizontalPodAutoscalerCPUInitializationPeriod = o.HorizontalPodAutoscalerCPUInitializationPeriod + cfg.HorizontalPodAutoscalerInitialReadinessDelay = o.HorizontalPodAutoscalerInitialReadinessDelay return nil } diff --git a/cmd/kube-controller-manager/app/options/options.go b/cmd/kube-controller-manager/app/options/options.go index 0e19fefc0a..862e0d9e82 100644 --- a/cmd/kube-controller-manager/app/options/options.go +++ b/cmd/kube-controller-manager/app/options/options.go @@ -42,6 +42,7 @@ import ( componentconfigv1alpha1 "k8s.io/kubernetes/pkg/apis/componentconfig/v1alpha1" "k8s.io/kubernetes/pkg/controller/garbagecollector" "k8s.io/kubernetes/pkg/master/ports" + // add the kubernetes feature gates _ "k8s.io/kubernetes/pkg/features" @@ -134,7 +135,7 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) { HorizontalPodAutoscalerSyncPeriod: componentConfig.HPAController.HorizontalPodAutoscalerSyncPeriod, HorizontalPodAutoscalerUpscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow, HorizontalPodAutoscalerDownscaleForbiddenWindow: componentConfig.HPAController.HorizontalPodAutoscalerDownscaleForbiddenWindow, - HorizontalPodAutoscalerCPUTaintPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUTaintPeriod, + HorizontalPodAutoscalerCPUInitializationPeriod: componentConfig.HPAController.HorizontalPodAutoscalerCPUInitializationPeriod, HorizontalPodAutoscalerInitialReadinessDelay: componentConfig.HPAController.HorizontalPodAutoscalerInitialReadinessDelay, HorizontalPodAutoscalerTolerance: componentConfig.HPAController.HorizontalPodAutoscalerTolerance, HorizontalPodAutoscalerUseRESTClients: componentConfig.HPAController.HorizontalPodAutoscalerUseRESTClients, diff --git a/cmd/kube-controller-manager/app/options/options_test.go b/cmd/kube-controller-manager/app/options/options_test.go index 73ceb19dc8..e44102e663 100644 --- a/cmd/kube-controller-manager/app/options/options_test.go +++ b/cmd/kube-controller-manager/app/options/options_test.go @@ -75,7 +75,7 @@ func TestAddFlags(t *testing.T) { "--horizontal-pod-autoscaler-downscale-delay=2m", "--horizontal-pod-autoscaler-sync-period=45s", "--horizontal-pod-autoscaler-upscale-delay=1m", - "--horizontal-pod-autoscaler-cpu-taint-period=90s", + "--horizontal-pod-autoscaler-cpu-initialization-period=90s", "--horizontal-pod-autoscaler-initial-readiness-delay=50s", "--http2-max-streams-per-connection=47", "--kube-api-burst=100", @@ -189,7 +189,7 @@ func TestAddFlags(t *testing.T) { HorizontalPodAutoscalerSyncPeriod: metav1.Duration{Duration: 45 * time.Second}, HorizontalPodAutoscalerUpscaleForbiddenWindow: metav1.Duration{Duration: 1 * time.Minute}, HorizontalPodAutoscalerDownscaleForbiddenWindow: metav1.Duration{Duration: 2 * time.Minute}, - HorizontalPodAutoscalerCPUTaintPeriod: metav1.Duration{Duration: 90 * time.Second}, + HorizontalPodAutoscalerCPUInitializationPeriod: metav1.Duration{Duration: 90 * time.Second}, HorizontalPodAutoscalerInitialReadinessDelay: metav1.Duration{Duration: 50 * time.Second}, HorizontalPodAutoscalerTolerance: 0.1, HorizontalPodAutoscalerUseRESTClients: true, diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 4ac9f51b36..0ca6753770 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -271,9 +271,9 @@ type HPAControllerConfiguration struct { // through the kube-aggregator when enabled, instead of using the legacy metrics client // through the API server proxy. HorizontalPodAutoscalerUseRESTClients bool - // HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU - // samples from the pod contaminated by initialization and disregard them. - HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration + // HorizontalPodAutoscalerCPUInitializationPeriod is the period after pod start when CPU samples + // might be skipped. + HorizontalPodAutoscalerCPUInitializationPeriod metav1.Duration // HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness // changes are treated as readiness being set for the first time. The only effect of this is that // HPA will disregard CPU samples from unready pods that had last readiness change during that diff --git a/pkg/apis/componentconfig/v1alpha1/defaults.go b/pkg/apis/componentconfig/v1alpha1/defaults.go index 1f0de2fcc9..f95097b8e6 100644 --- a/pkg/apis/componentconfig/v1alpha1/defaults.go +++ b/pkg/apis/componentconfig/v1alpha1/defaults.go @@ -89,10 +89,8 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *KubeControllerManagerCo if obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow == zero { obj.HPAController.HorizontalPodAutoscalerUpscaleForbiddenWindow = metav1.Duration{Duration: 3 * time.Minute} } - if obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod == zero { - // Assuming CPU is collected every minute and initialization takes another minute HPA should - // disregard samples from first two minutes as contaminated by initialization. - obj.HPAController.HorizontalPodAutoscalerCPUTaintPeriod = metav1.Duration{Duration: time.Minute} + if obj.HPAController.HorizontalPodAutoscalerCPUInitializationPeriod == zero { + obj.HPAController.HorizontalPodAutoscalerCPUInitializationPeriod = metav1.Duration{Duration: 5 * time.Minute} } if obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay == zero { obj.HPAController.HorizontalPodAutoscalerInitialReadinessDelay = metav1.Duration{Duration: 30 * time.Second} diff --git a/pkg/apis/componentconfig/v1alpha1/types.go b/pkg/apis/componentconfig/v1alpha1/types.go index 331de145c1..10877fe5fe 100644 --- a/pkg/apis/componentconfig/v1alpha1/types.go +++ b/pkg/apis/componentconfig/v1alpha1/types.go @@ -320,9 +320,9 @@ type HPAControllerConfiguration struct { // through the kube-aggregator when enabled, instead of using the legacy metrics client // through the API server proxy. HorizontalPodAutoscalerUseRESTClients *bool - // HorizontalPodAutoscalerCPUTaintPeriod is period after pod start for which HPA will consider CPU - // samples from the pod contaminated by initialization and disregard them. - HorizontalPodAutoscalerCPUTaintPeriod metav1.Duration + // HorizontalPodAutoscalerCPUInitializationPeriod is the period after pod start when CPU samples + // might be skipped. + HorizontalPodAutoscalerCPUInitializationPeriod metav1.Duration // HorizontalPodAutoscalerInitialReadinessDelay is period after pod start during which readiness // changes are treated as readiness being set for the first time. The only effect of this is that // HPA will disregard CPU samples from unready pods that had last readiness change during that diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go index 068e857e4f..ae53a33068 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.conversion.go @@ -606,7 +606,7 @@ func autoConvert_v1alpha1_HPAControllerConfiguration_To_componentconfig_HPAContr if err := v1.Convert_Pointer_bool_To_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil { return err } - out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerCPUInitializationPeriod = in.HorizontalPodAutoscalerCPUInitializationPeriod out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return nil } @@ -624,7 +624,7 @@ func autoConvert_componentconfig_HPAControllerConfiguration_To_v1alpha1_HPAContr if err := v1.Convert_bool_To_Pointer_bool(&in.HorizontalPodAutoscalerUseRESTClients, &out.HorizontalPodAutoscalerUseRESTClients, s); err != nil { return err } - out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerCPUInitializationPeriod = in.HorizontalPodAutoscalerCPUInitializationPeriod out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return nil } diff --git a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go index 1e2bedd6d9..c8cf52f007 100644 --- a/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/componentconfig/v1alpha1/zz_generated.deepcopy.go @@ -242,7 +242,7 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati *out = new(bool) **out = **in } - out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerCPUInitializationPeriod = in.HorizontalPodAutoscalerCPUInitializationPeriod out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return } diff --git a/pkg/apis/componentconfig/zz_generated.deepcopy.go b/pkg/apis/componentconfig/zz_generated.deepcopy.go index 482befef64..cfabafc1f3 100644 --- a/pkg/apis/componentconfig/zz_generated.deepcopy.go +++ b/pkg/apis/componentconfig/zz_generated.deepcopy.go @@ -232,7 +232,7 @@ func (in *HPAControllerConfiguration) DeepCopyInto(out *HPAControllerConfigurati out.HorizontalPodAutoscalerSyncPeriod = in.HorizontalPodAutoscalerSyncPeriod out.HorizontalPodAutoscalerUpscaleForbiddenWindow = in.HorizontalPodAutoscalerUpscaleForbiddenWindow out.HorizontalPodAutoscalerDownscaleForbiddenWindow = in.HorizontalPodAutoscalerDownscaleForbiddenWindow - out.HorizontalPodAutoscalerCPUTaintPeriod = in.HorizontalPodAutoscalerCPUTaintPeriod + out.HorizontalPodAutoscalerCPUInitializationPeriod = in.HorizontalPodAutoscalerCPUInitializationPeriod out.HorizontalPodAutoscalerInitialReadinessDelay = in.HorizontalPodAutoscalerInitialReadinessDelay return } diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index f1490e4d7c..dfc6a45ea8 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -292,8 +292,9 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa Phase: podPhase, Conditions: []v1.PodCondition{ { - Type: v1.PodReady, - Status: podReadiness, + Type: v1.PodReady, + Status: podReadiness, + LastTransitionTime: podStartTime, }, }, StartTime: &podStartTime, @@ -474,6 +475,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) (*fake.Clientset, *metricsfa Labels: labelSet, }, Timestamp: metav1.Time{Time: time.Now()}, + Window: metav1.Duration{Duration: time.Minute}, Containers: []metricsapi.ContainerMetrics{ { Name: "container", @@ -657,7 +659,7 @@ func (tc *testCase) setupController(t *testing.T) (*HorizontalController, inform return true, obj, nil }) - replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) defaultDownscaleForbiddenWindow := 5 * time.Minute diff --git a/pkg/controller/podautoscaler/legacy_horizontal_test.go b/pkg/controller/podautoscaler/legacy_horizontal_test.go index a327f7f61e..842f363acf 100644 --- a/pkg/controller/podautoscaler/legacy_horizontal_test.go +++ b/pkg/controller/podautoscaler/legacy_horizontal_test.go @@ -485,7 +485,7 @@ func (tc *legacyTestCase) runTest(t *testing.T) { return true, obj, nil }) - replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) informerFactory := informers.NewSharedInformerFactory(testClient, controller.NoResyncPeriodFunc()) defaultDownscaleForbiddenWindow := 5 * time.Minute diff --git a/pkg/controller/podautoscaler/legacy_replica_calculator_test.go b/pkg/controller/podautoscaler/legacy_replica_calculator_test.go index df42d7133f..486ee5049e 100644 --- a/pkg/controller/podautoscaler/legacy_replica_calculator_test.go +++ b/pkg/controller/podautoscaler/legacy_replica_calculator_test.go @@ -186,7 +186,7 @@ func (tc *legacyReplicaCalcTestCase) runTest(t *testing.T) { testClient := tc.prepareTestClient(t) metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort) - replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{"name": podNamePrefix}, diff --git a/pkg/controller/podautoscaler/metrics/interfaces.go b/pkg/controller/podautoscaler/metrics/interfaces.go index 943a164ae9..a3733371ff 100644 --- a/pkg/controller/podautoscaler/metrics/interfaces.go +++ b/pkg/controller/podautoscaler/metrics/interfaces.go @@ -24,9 +24,15 @@ import ( "k8s.io/apimachinery/pkg/labels" ) -// PodMetricsInfo contains pod metric values as a map from pod names to -// metric values (the metric values are expected to be the metric as a milli-value) -type PodMetricsInfo map[string]int64 +// PodMetric contains pod metric value (the metric values are expected to be the metric as a milli-value) +type PodMetric struct { + Timestamp time.Time + Window time.Duration + Value int64 +} + +// PodMetricsInfo contains pod metrics as a map from pod names to PodMetricsInfo +type PodMetricsInfo map[string]PodMetric // MetricsClient knows how to query a remote interface to retrieve container-level // resource metrics as well as pod-level arbitrary metrics diff --git a/pkg/controller/podautoscaler/metrics/legacy_metrics_client.go b/pkg/controller/podautoscaler/metrics/legacy_metrics_client.go index 11fdb21db5..cd4a1a18ac 100644 --- a/pkg/controller/podautoscaler/metrics/legacy_metrics_client.go +++ b/pkg/controller/podautoscaler/metrics/legacy_metrics_client.go @@ -35,10 +35,11 @@ import ( ) const ( - DefaultHeapsterNamespace = "kube-system" - DefaultHeapsterScheme = "http" - DefaultHeapsterService = "heapster" - DefaultHeapsterPort = "" // use the first exposed port on the service + DefaultHeapsterNamespace = "kube-system" + DefaultHeapsterScheme = "http" + DefaultHeapsterService = "heapster" + DefaultHeapsterPort = "" // use the first exposed port on the service + heapsterDefaultMetricWindow = time.Minute ) var heapsterQueryStart = -5 * time.Minute @@ -100,7 +101,11 @@ func (h *HeapsterMetricsClient) GetResourceMetric(resource v1.ResourceName, name } if !missing { - res[m.Name] = int64(podSum) + res[m.Name] = PodMetric{ + Timestamp: m.Timestamp.Time, + Window: m.Window.Duration, + Value: int64(podSum), + } } } @@ -159,7 +164,12 @@ func (h *HeapsterMetricsClient) GetRawMetric(metricName string, namespace string for i, podMetrics := range metrics.Items { val, podTimestamp, hadMetrics := collapseTimeSamples(podMetrics, time.Minute) if hadMetrics { - res[podNames[i]] = val + res[podNames[i]] = PodMetric{ + Timestamp: podTimestamp, + Window: heapsterDefaultMetricWindow, + Value: int64(val), + } + if timestamp == nil || podTimestamp.Before(*timestamp) { timestamp = &podTimestamp } diff --git a/pkg/controller/podautoscaler/metrics/legacy_metrics_client_test.go b/pkg/controller/podautoscaler/metrics/legacy_metrics_client_test.go index fd86e3db3b..1d7a78e066 100644 --- a/pkg/controller/podautoscaler/metrics/legacy_metrics_client_test.go +++ b/pkg/controller/podautoscaler/metrics/legacy_metrics_client_test.go @@ -68,6 +68,7 @@ type testCase struct { replicas int targetTimestamp int + window time.Duration reportedMetricsPoints [][]metricPoint reportedPodMetrics [][]int64 @@ -109,7 +110,8 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset { Name: fmt.Sprintf("%s-%d", podNamePrefix, i), Namespace: namespace, }, - Timestamp: metav1.Time{Time: fixedTimestamp.Add(time.Duration(tc.targetTimestamp) * time.Minute)}, + Timestamp: metav1.Time{Time: offsetTimestampBy(tc.targetTimestamp)}, + Window: metav1.Duration{Duration: tc.window}, Containers: []metricsapi.ContainerMetrics{}, } for j, cpu := range containers { @@ -138,7 +140,7 @@ func (tc *testCase) prepareTestClient(t *testing.T) *fake.Clientset { for _, reportedMetricPoints := range tc.reportedMetricsPoints { var heapsterMetricPoints []heapster.MetricPoint for _, reportedMetricPoint := range reportedMetricPoints { - timestamp := fixedTimestamp.Add(time.Duration(reportedMetricPoint.timestamp) * time.Minute) + timestamp := offsetTimestampBy(reportedMetricPoint.timestamp) if latestTimestamp.Before(timestamp) { latestTimestamp = timestamp } @@ -197,10 +199,20 @@ func (tc *testCase) verifyResults(t *testing.T, metrics PodMetricsInfo, timestam } assert.NoError(t, err, "there should be no error retrieving the metrics") assert.NotNil(t, metrics, "there should be metrics returned") + if len(metrics) != len(tc.desiredMetricValues) { + t.Errorf("Not equal:\nexpected: %v\nactual: %v", tc.desiredMetricValues, metrics) + } else { + for k, m := range metrics { + if !m.Timestamp.Equal(tc.desiredMetricValues[k].Timestamp) || + m.Window != tc.desiredMetricValues[k].Window || + m.Value != tc.desiredMetricValues[k].Value { + t.Errorf("Not equal:\nexpected: %v\nactual: %v", tc.desiredMetricValues, metrics) + break + } + } + } - assert.Equal(t, tc.desiredMetricValues, metrics, "the metrics values should be as expected") - - targetTimestamp := fixedTimestamp.Add(time.Duration(tc.targetTimestamp) * time.Minute) + targetTimestamp := offsetTimestampBy(tc.targetTimestamp) assert.True(t, targetTimestamp.Equal(timestamp), fmt.Sprintf("the timestamp should be as expected (%s) but was %s", targetTimestamp, timestamp)) } @@ -218,65 +230,86 @@ func (tc *testCase) runTest(t *testing.T) { } func TestCPU(t *testing.T) { + targetTimestamp := 1 + window := 30 * time.Second tc := testCase{ replicas: 3, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 5000, "test-pod-1": 5000, "test-pod-2": 5000, + "test-pod-0": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-1": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-2": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, }, resourceName: v1.ResourceCPU, - targetTimestamp: 1, + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: [][]int64{{5000}, {5000}, {5000}}, } tc.runTest(t) } func TestQPS(t *testing.T) { + targetTimestamp := 1 tc := testCase{ replicas: 3, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 10000, "test-pod-1": 20000, "test-pod-2": 10000, + "test-pod-0": PodMetric{Value: 10000, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, + "test-pod-1": PodMetric{Value: 20000, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, + "test-pod-2": PodMetric{Value: 10000, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, }, metricName: "qps", - targetTimestamp: 1, + targetTimestamp: targetTimestamp, reportedMetricsPoints: [][]metricPoint{{{10, 1}}, {{20, 1}}, {{10, 1}}}, } tc.runTest(t) } func TestQpsSumEqualZero(t *testing.T) { + targetTimestamp := 0 tc := testCase{ replicas: 3, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 0, "test-pod-1": 0, "test-pod-2": 0, + "test-pod-0": PodMetric{Value: 0, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, + "test-pod-1": PodMetric{Value: 0, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, + "test-pod-2": PodMetric{Value: 0, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, }, metricName: "qps", - targetTimestamp: 0, + targetTimestamp: targetTimestamp, reportedMetricsPoints: [][]metricPoint{{{0, 0}}, {{0, 0}}, {{0, 0}}}, } tc.runTest(t) } func TestCPUMoreMetrics(t *testing.T) { + targetTimestamp := 10 + window := 30 * time.Second tc := testCase{ replicas: 5, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 5000, "test-pod-1": 5000, "test-pod-2": 5000, - "test-pod-3": 5000, "test-pod-4": 5000, + "test-pod-0": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-1": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-2": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-3": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-4": PodMetric{Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, }, resourceName: v1.ResourceCPU, - targetTimestamp: 10, + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: [][]int64{{1000, 2000, 2000}, {5000}, {1000, 1000, 1000, 2000}, {4000, 1000}, {5000}}, } tc.runTest(t) } func TestCPUMissingMetrics(t *testing.T) { + targetTimestamp := 0 + window := 30 * time.Second tc := testCase{ replicas: 3, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 4000, + "test-pod-0": PodMetric{Value: 4000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, }, resourceName: v1.ResourceCPU, + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: [][]int64{{4000}}, } tc.runTest(t) @@ -315,13 +348,15 @@ func TestCPUEmptyMetrics(t *testing.T) { } func TestQpsEmptyEntries(t *testing.T) { + targetTimestamp := 4 tc := testCase{ replicas: 3, metricName: "qps", desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 4000000, "test-pod-2": 2000000, + "test-pod-0": PodMetric{Value: 4000000, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, + "test-pod-2": PodMetric{Value: 2000000, Timestamp: offsetTimestampBy(targetTimestamp), Window: heapsterDefaultMetricWindow}, }, - targetTimestamp: 4, + targetTimestamp: targetTimestamp, reportedMetricsPoints: [][]metricPoint{{{4000, 4}}, {}, {{2000, 4}}}, } tc.runTest(t) @@ -338,12 +373,17 @@ func TestCPUZeroReplicas(t *testing.T) { } func TestCPUEmptyMetricsForOnePod(t *testing.T) { + targetTimestamp := 0 + window := 30 * time.Second tc := testCase{ replicas: 3, resourceName: v1.ResourceCPU, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 100, "test-pod-1": 700, + "test-pod-0": PodMetric{Value: 100, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-1": PodMetric{Value: 700, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, }, + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: [][]int64{{100}, {300, 400}, {}}, } tc.runTest(t) @@ -364,3 +404,7 @@ func testCollapseTimeSamples(t *testing.T) { assert.InEpsilon(t, float64(75), val, 0.1, "collapsed sample value should be as expected") assert.True(t, timestamp.Equal(now), "timestamp should be the current time (the newest)") } + +func offsetTimestampBy(t int) time.Time { + return fixedTimestamp.Add(time.Duration(t) * time.Minute) +} diff --git a/pkg/controller/podautoscaler/metrics/rest_metrics_client.go b/pkg/controller/podautoscaler/metrics/rest_metrics_client.go index 0e4b2e583a..138001088e 100644 --- a/pkg/controller/podautoscaler/metrics/rest_metrics_client.go +++ b/pkg/controller/podautoscaler/metrics/rest_metrics_client.go @@ -33,6 +33,10 @@ import ( externalclient "k8s.io/metrics/pkg/client/external_metrics" ) +const ( + metricServerDefaultMetricWindow = time.Minute +) + func NewRESTMetricsClient(resourceClient resourceclient.PodMetricsesGetter, customClient customclient.CustomMetricsClient, externalClient externalclient.ExternalMetricsClient) MetricsClient { return &restMetricsClient{ &resourceMetricsClient{resourceClient}, @@ -84,7 +88,11 @@ func (c *resourceMetricsClient) GetResourceMetric(resource v1.ResourceName, name } if !missing { - res[m.Name] = int64(podSum) + res[m.Name] = PodMetric{ + Timestamp: m.Timestamp.Time, + Window: m.Window.Duration, + Value: int64(podSum), + } } } @@ -113,7 +121,17 @@ func (c *customMetricsClient) GetRawMetric(metricName string, namespace string, res := make(PodMetricsInfo, len(metrics.Items)) for _, m := range metrics.Items { - res[m.DescribedObject.Name] = m.Value.MilliValue() + window := metricServerDefaultMetricWindow + if m.WindowSeconds != nil { + window = time.Duration(*m.WindowSeconds) * time.Second + } + res[m.DescribedObject.Name] = PodMetric{ + Timestamp: m.Timestamp.Time, + Window: window, + Value: int64(m.Value.MilliValue()), + } + + m.Value.MilliValue() } timestamp := metrics.Items[0].Timestamp.Time diff --git a/pkg/controller/podautoscaler/metrics/rest_metrics_client_test.go b/pkg/controller/podautoscaler/metrics/rest_metrics_client_test.go index 624f2fd3a4..d8de168fc4 100644 --- a/pkg/controller/podautoscaler/metrics/rest_metrics_client_test.go +++ b/pkg/controller/podautoscaler/metrics/rest_metrics_client_test.go @@ -48,6 +48,7 @@ type restClientTestCase struct { // "timestamps" here are actually the offset in minutes from a base timestamp targetTimestamp int + window time.Duration reportedMetricPoints []metricPoint reportedPodMetrics [][]int64 singleObject *autoscalingapi.CrossVersionObjectReference @@ -86,7 +87,8 @@ func (tc *restClientTestCase) prepareTestClient(t *testing.T) (*metricsfake.Clie Namespace: namespace, Labels: podLabels, }, - Timestamp: metav1.Time{Time: fixedTimestamp.Add(time.Duration(tc.targetTimestamp) * time.Minute)}, + Timestamp: metav1.Time{Time: offsetTimestampBy(tc.targetTimestamp)}, + Window: metav1.Duration{Duration: tc.window}, Containers: []metricsapi.ContainerMetrics{}, } for j, cpu := range containers { @@ -115,7 +117,7 @@ func (tc *restClientTestCase) prepareTestClient(t *testing.T) (*metricsfake.Clie metrics := emapi.ExternalMetricValueList{} for _, metricPoint := range tc.reportedMetricPoints { - timestamp := fixedTimestamp.Add(time.Duration(metricPoint.timestamp) * time.Minute) + timestamp := offsetTimestampBy(metricPoint.timestamp) metric := emapi.ExternalMetricValue{ Value: *resource.NewMilliQuantity(int64(metricPoint.level), resource.DecimalSI), Timestamp: metav1.Time{Time: timestamp}, @@ -136,7 +138,7 @@ func (tc *restClientTestCase) prepareTestClient(t *testing.T) (*metricsfake.Clie assert.Equal(t, "pods", getForAction.GetResource().Resource, "type of object that we requested multiple metrics for should have been pods") for i, metricPoint := range tc.reportedMetricPoints { - timestamp := fixedTimestamp.Add(time.Duration(metricPoint.timestamp) * time.Minute) + timestamp := offsetTimestampBy(metricPoint.timestamp) metric := cmapi.MetricValue{ DescribedObject: v1.ObjectReference{ Kind: "Pod", @@ -168,7 +170,7 @@ func (tc *restClientTestCase) prepareTestClient(t *testing.T) (*metricsfake.Clie assert.Equal(t, groupResource.String(), getForAction.GetResource().Resource, "should have requested metrics for the resource matching the GroupKind passed in") assert.Equal(t, tc.singleObject.Name, name, "should have requested metrics for the object matching the name passed in") metricPoint := tc.reportedMetricPoints[0] - timestamp := fixedTimestamp.Add(time.Duration(metricPoint.timestamp) * time.Minute) + timestamp := offsetTimestampBy(metricPoint.timestamp) metrics := &cmapi.MetricValueList{ Items: []cmapi.MetricValue{ @@ -204,9 +206,20 @@ func (tc *restClientTestCase) verifyResults(t *testing.T, metrics PodMetricsInfo assert.NoError(t, err, "there should be no error retrieving the metrics") assert.NotNil(t, metrics, "there should be metrics returned") - assert.Equal(t, tc.desiredMetricValues, metrics, "the metrics values should be as expected") + if len(metrics) != len(tc.desiredMetricValues) { + t.Errorf("Not equal:\nexpected: %v\nactual: %v", tc.desiredMetricValues, metrics) + } else { + for k, m := range metrics { + if !m.Timestamp.Equal(tc.desiredMetricValues[k].Timestamp) || + m.Window != tc.desiredMetricValues[k].Window || + m.Value != tc.desiredMetricValues[k].Value { + t.Errorf("Not equal:\nexpected: %v\nactual: %v", tc.desiredMetricValues, metrics) + break + } + } + } - targetTimestamp := fixedTimestamp.Add(time.Duration(tc.targetTimestamp) * time.Minute) + targetTimestamp := offsetTimestampBy(tc.targetTimestamp) assert.True(t, targetTimestamp.Equal(timestamp), fmt.Sprintf("the timestamp should be as expected (%s) but was %s", targetTimestamp, timestamp)) } @@ -227,7 +240,7 @@ func (tc *restClientTestCase) runTest(t *testing.T) { val, timestamp, err := metricsClient.GetExternalMetric(tc.metricName, tc.namespace, tc.metricLabelSelector) info := make(PodMetricsInfo, len(val)) for i, metricVal := range val { - info[fmt.Sprintf("%v-val-%v", tc.metricName, i)] = metricVal + info[fmt.Sprintf("%v-val-%v", tc.metricName, i)] = PodMetric{Value: metricVal} } tc.verifyResults(t, info, timestamp, err) } else if tc.singleObject == nil { @@ -235,18 +248,23 @@ func (tc *restClientTestCase) runTest(t *testing.T) { tc.verifyResults(t, info, timestamp, err) } else { val, timestamp, err := metricsClient.GetObjectMetric(tc.metricName, tc.namespace, tc.singleObject, tc.metricLabelSelector) - info := PodMetricsInfo{tc.singleObject.Name: val} + info := PodMetricsInfo{tc.singleObject.Name: {Value: val}} tc.verifyResults(t, info, timestamp, err) } } func TestRESTClientCPU(t *testing.T) { + targetTimestamp := 1 + window := 30 * time.Second tc := restClientTestCase{ desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 5000, "test-pod-1": 5000, "test-pod-2": 5000, + "test-pod-0": {Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-1": {Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-2": {Value: 5000, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, }, resourceName: v1.ResourceCPU, - targetTimestamp: 1, + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: [][]int64{{5000}, {5000}, {5000}}, } tc.runTest(t) @@ -255,7 +273,7 @@ func TestRESTClientCPU(t *testing.T) { func TestRESTClientExternal(t *testing.T) { tc := restClientTestCase{ desiredMetricValues: PodMetricsInfo{ - "external-val-0": 10000, "external-val-1": 20000, "external-val-2": 10000, + "external-val-0": {Value: 10000}, "external-val-1": {Value: 20000}, "external-val-2": {Value: 10000}, }, metricSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}}, metricName: "external", @@ -266,12 +284,15 @@ func TestRESTClientExternal(t *testing.T) { } func TestRESTClientQPS(t *testing.T) { + targetTimestamp := 1 tc := restClientTestCase{ desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 10000, "test-pod-1": 20000, "test-pod-2": 10000, + "test-pod-0": {Value: 10000, Timestamp: offsetTimestampBy(targetTimestamp), Window: metricServerDefaultMetricWindow}, + "test-pod-1": {Value: 20000, Timestamp: offsetTimestampBy(targetTimestamp), Window: metricServerDefaultMetricWindow}, + "test-pod-2": {Value: 10000, Timestamp: offsetTimestampBy(targetTimestamp), Window: metricServerDefaultMetricWindow}, }, metricName: "qps", - targetTimestamp: 1, + targetTimestamp: targetTimestamp, reportedMetricPoints: []metricPoint{{10000, 1}, {20000, 1}, {10000, 1}}, } tc.runTest(t) @@ -279,7 +300,7 @@ func TestRESTClientQPS(t *testing.T) { func TestRESTClientSingleObject(t *testing.T) { tc := restClientTestCase{ - desiredMetricValues: PodMetricsInfo{"some-dep": 10}, + desiredMetricValues: PodMetricsInfo{"some-dep": {Value: 10}}, metricName: "queue-length", targetTimestamp: 1, reportedMetricPoints: []metricPoint{{10, 1}}, @@ -293,12 +314,15 @@ func TestRESTClientSingleObject(t *testing.T) { } func TestRESTClientQpsSumEqualZero(t *testing.T) { + targetTimestamp := 0 tc := restClientTestCase{ desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 0, "test-pod-1": 0, "test-pod-2": 0, + "test-pod-0": {Value: 0, Timestamp: offsetTimestampBy(targetTimestamp), Window: metricServerDefaultMetricWindow}, + "test-pod-1": {Value: 0, Timestamp: offsetTimestampBy(targetTimestamp), Window: metricServerDefaultMetricWindow}, + "test-pod-2": {Value: 0, Timestamp: offsetTimestampBy(targetTimestamp), Window: metricServerDefaultMetricWindow}, }, metricName: "qps", - targetTimestamp: 0, + targetTimestamp: targetTimestamp, reportedMetricPoints: []metricPoint{{0, 0}, {0, 0}, {0, 0}}, } tc.runTest(t) @@ -307,7 +331,7 @@ func TestRESTClientQpsSumEqualZero(t *testing.T) { func TestRESTClientExternalSumEqualZero(t *testing.T) { tc := restClientTestCase{ desiredMetricValues: PodMetricsInfo{ - "external-val-0": 0, "external-val-1": 0, "external-val-2": 0, + "external-val-0": {Value: 0}, "external-val-1": {Value: 0}, "external-val-2": {Value: 0}, }, metricSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"label": "value"}}, metricName: "external", @@ -349,11 +373,16 @@ func TestRESTClientCPUEmptyMetrics(t *testing.T) { } func TestRESTClientCPUEmptyMetricsForOnePod(t *testing.T) { + targetTimestamp := 1 + window := 30 * time.Second tc := restClientTestCase{ resourceName: v1.ResourceCPU, desiredMetricValues: PodMetricsInfo{ - "test-pod-0": 100, "test-pod-1": 700, + "test-pod-0": {Value: 100, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, + "test-pod-1": {Value: 700, Timestamp: offsetTimestampBy(targetTimestamp), Window: window}, }, + targetTimestamp: targetTimestamp, + window: window, reportedPodMetrics: [][]int64{{100}, {300, 400}, {}}, } tc.runTest(t) diff --git a/pkg/controller/podautoscaler/metrics/utilization.go b/pkg/controller/podautoscaler/metrics/utilization.go index 0417b9a0e3..8ee53a6c47 100644 --- a/pkg/controller/podautoscaler/metrics/utilization.go +++ b/pkg/controller/podautoscaler/metrics/utilization.go @@ -28,14 +28,14 @@ func GetResourceUtilizationRatio(metrics PodMetricsInfo, requests map[string]int requestsTotal := int64(0) numEntries := 0 - for podName, metricValue := range metrics { + for podName, metric := range metrics { request, hasRequest := requests[podName] if !hasRequest { // we check for missing requests elsewhere, so assuming missing requests == extraneous metrics continue } - metricsTotal += metricValue + metricsTotal += metric.Value requestsTotal += request numEntries++ } @@ -56,8 +56,8 @@ func GetResourceUtilizationRatio(metrics PodMetricsInfo, requests map[string]int // (returning that and the actual utilization) func GetMetricUtilizationRatio(metrics PodMetricsInfo, targetUtilization int64) (utilizationRatio float64, currentUtilization int64) { metricsTotal := int64(0) - for _, metricValue := range metrics { - metricsTotal += metricValue + for _, metric := range metrics { + metricsTotal += metric.Value } currentUtilization = metricsTotal / int64(len(metrics)) diff --git a/pkg/controller/podautoscaler/metrics/utilization_test.go b/pkg/controller/podautoscaler/metrics/utilization_test.go index 35e7df6eda..3d87745780 100644 --- a/pkg/controller/podautoscaler/metrics/utilization_test.go +++ b/pkg/controller/podautoscaler/metrics/utilization_test.go @@ -67,7 +67,7 @@ func (tc *metricUtilizationRatioTestCase) runTest(t *testing.T) { func TestGetResourceUtilizationRatioBaseCase(t *testing.T) { tc := resourceUtilizationRatioTestCase{ metrics: PodMetricsInfo{ - "test-pod-0": 50, "test-pod-1": 76, + "test-pod-0": {Value: 50}, "test-pod-1": {Value: 76}, }, requests: map[string]int64{ "test-pod-0": 100, "test-pod-1": 100, @@ -85,7 +85,7 @@ func TestGetResourceUtilizationRatioBaseCase(t *testing.T) { func TestGetResourceUtilizationRatioIgnorePodsWithNoRequest(t *testing.T) { tc := resourceUtilizationRatioTestCase{ metrics: PodMetricsInfo{ - "test-pod-0": 50, "test-pod-1": 76, "test-pod-no-request": 100, + "test-pod-0": {Value: 50}, "test-pod-1": {Value: 76}, "test-pod-no-request": {Value: 100}, }, requests: map[string]int64{ "test-pod-0": 100, "test-pod-1": 100, @@ -103,7 +103,7 @@ func TestGetResourceUtilizationRatioIgnorePodsWithNoRequest(t *testing.T) { func TestGetResourceUtilizationRatioExtraRequest(t *testing.T) { tc := resourceUtilizationRatioTestCase{ metrics: PodMetricsInfo{ - "test-pod-0": 50, "test-pod-1": 76, + "test-pod-0": {Value: 50}, "test-pod-1": {Value: 76}, }, requests: map[string]int64{ "test-pod-0": 100, "test-pod-1": 100, "test-pod-extra-request": 500, @@ -121,7 +121,7 @@ func TestGetResourceUtilizationRatioExtraRequest(t *testing.T) { func TestGetResourceUtilizationRatioNoRequests(t *testing.T) { tc := resourceUtilizationRatioTestCase{ metrics: PodMetricsInfo{ - "test-pod-0": 50, "test-pod-1": 76, + "test-pod-0": {Value: 50}, "test-pod-1": {Value: 76}, }, requests: map[string]int64{}, targetUtilization: 50, @@ -138,7 +138,7 @@ func TestGetResourceUtilizationRatioNoRequests(t *testing.T) { func TestGetMetricUtilizationRatioBaseCase(t *testing.T) { tc := metricUtilizationRatioTestCase{ metrics: PodMetricsInfo{ - "test-pod-0": 5000, "test-pod-1": 10000, + "test-pod-0": {Value: 5000}, "test-pod-1": {Value: 10000}, }, targetUtilization: 10000, expectedUtilizationRatio: .75, diff --git a/pkg/controller/podautoscaler/replica_calculator.go b/pkg/controller/podautoscaler/replica_calculator.go index de020153dc..6823a838a2 100644 --- a/pkg/controller/podautoscaler/replica_calculator.go +++ b/pkg/controller/podautoscaler/replica_calculator.go @@ -32,12 +32,10 @@ import ( ) const ( - // TODO(jbartosik): use actual value. - cpuSampleWindow = time.Minute // defaultTestingTolerance is default value for calculating when to // scale up/scale down. defaultTestingTolerance = 0.1 - defaultTestingCpuTaintAfterStart = 2 * time.Minute + defaultTestingCpuInitializationPeriod = 2 * time.Minute defaultTestingDelayOfInitialReadinessStatus = 10 * time.Second ) @@ -45,16 +43,16 @@ type ReplicaCalculator struct { metricsClient metricsclient.MetricsClient podsGetter v1coreclient.PodsGetter tolerance float64 - cpuTaintAfterStart time.Duration + cpuInitializationPeriod time.Duration delayOfInitialReadinessStatus time.Duration } -func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator { +func NewReplicaCalculator(metricsClient metricsclient.MetricsClient, podsGetter v1coreclient.PodsGetter, tolerance float64, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) *ReplicaCalculator { return &ReplicaCalculator{ metricsClient: metricsClient, podsGetter: podsGetter, tolerance: tolerance, - cpuTaintAfterStart: cpuTaintAfterStart, + cpuInitializationPeriod: cpuInitializationPeriod, delayOfInitialReadinessStatus: delayOfInitialReadinessStatus, } } @@ -77,7 +75,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti return 0, 0, 0, time.Time{}, fmt.Errorf("no pods returned by selector while calculating replica count") } - readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus) + readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus) removeMetricsForPods(metrics, ignoredPods) requests, err := calculatePodRequests(podList.Items, resource) if err != nil { @@ -108,12 +106,12 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti if usageRatio < 1.0 { // on a scale-down, treat missing pods as using 100% of the resource request for podName := range missingPods { - metrics[podName] = requests[podName] + metrics[podName] = metricsclient.PodMetric{Value: requests[podName]} } } else if usageRatio > 1.0 { // on a scale-up, treat missing pods as using 0% of the resource request for podName := range missingPods { - metrics[podName] = 0 + metrics[podName] = metricsclient.PodMetric{Value: 0} } } } @@ -121,7 +119,7 @@ func (c *ReplicaCalculator) GetResourceReplicas(currentReplicas int32, targetUti if rebalanceIgnored { // on a scale-up, treat unready pods as using 0% of the resource request for podName := range ignoredPods { - metrics[podName] = 0 + metrics[podName] = metricsclient.PodMetric{Value: 0} } } @@ -178,7 +176,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet return 0, 0, fmt.Errorf("no pods returned by selector while calculating replica count") } - readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuTaintAfterStart, c.delayOfInitialReadinessStatus) + readyPodCount, ignoredPods, missingPods := groupPods(podList.Items, metrics, resource, c.cpuInitializationPeriod, c.delayOfInitialReadinessStatus) removeMetricsForPods(metrics, ignoredPods) if len(metrics) == 0 { @@ -203,12 +201,12 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet if usageRatio < 1.0 { // on a scale-down, treat missing pods as using 100% of the resource request for podName := range missingPods { - metrics[podName] = targetUtilization + metrics[podName] = metricsclient.PodMetric{Value: targetUtilization} } } else { // on a scale-up, treat missing pods as using 0% of the resource request for podName := range missingPods { - metrics[podName] = 0 + metrics[podName] = metricsclient.PodMetric{Value: 0} } } } @@ -216,7 +214,7 @@ func (c *ReplicaCalculator) calcPlainMetricReplicas(metrics metricsclient.PodMet if rebalanceIgnored { // on a scale-up, treat unready pods as using 0% of the resource request for podName := range ignoredPods { - metrics[podName] = 0 + metrics[podName] = metricsclient.PodMetric{Value: 0} } } @@ -342,14 +340,15 @@ func (c *ReplicaCalculator) GetExternalPerPodMetricReplicas(currentReplicas int3 return replicaCount, utilization, timestamp, nil } -func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuTaintAfterStart, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) { +func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1.ResourceName, cpuInitializationPeriod, delayOfInitialReadinessStatus time.Duration) (readyPodCount int, ignoredPods sets.String, missingPods sets.String) { missingPods = sets.NewString() ignoredPods = sets.NewString() for _, pod := range pods { if pod.DeletionTimestamp != nil || pod.Status.Phase == v1.PodFailed { continue } - if _, found := metrics[pod.Name]; !found { + metric, found := metrics[pod.Name] + if !found { missingPods.Insert(pod.Name) continue } @@ -359,10 +358,13 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1. if condition == nil || pod.Status.StartTime == nil { ignorePod = true } else { - if condition.Status == v1.ConditionTrue { - ignorePod = pod.Status.StartTime.Add(cpuTaintAfterStart + cpuSampleWindow).After(time.Now()) + // Pod still within possible initialisation period. + if pod.Status.StartTime.Add(cpuInitializationPeriod).After(time.Now()) { + // Ignore sample if pod is unready or one window of metric wasn't collected since last state transition. + ignorePod = condition.Status == v1.ConditionFalse || metric.Timestamp.Before(condition.LastTransitionTime.Time.Add(metric.Window)) } else { - ignorePod = pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time) + // Ignore metric if pod is unready and it has never been ready. + ignorePod = condition.Status == v1.ConditionFalse && pod.Status.StartTime.Add(delayOfInitialReadinessStatus).After(condition.LastTransitionTime.Time) } } if ignorePod { @@ -370,7 +372,6 @@ func groupPods(pods []v1.Pod, metrics metricsclient.PodMetricsInfo, resource v1. continue } } - readyPodCount++ } return diff --git a/pkg/controller/podautoscaler/replica_calculator_test.go b/pkg/controller/podautoscaler/replica_calculator_test.go index 7c56f69f49..d8492c1c05 100644 --- a/pkg/controller/podautoscaler/replica_calculator_test.go +++ b/pkg/controller/podautoscaler/replica_calculator_test.go @@ -194,6 +194,7 @@ func (tc *replicaCalcTestCase) prepareTestMetricsClient() *metricsfake.Clientset Labels: map[string]string{"name": podNamePrefix}, }, Timestamp: metav1.Time{Time: tc.timestamp}, + Window: metav1.Duration{Duration: time.Minute}, Containers: make([]metricsapi.ContainerMetrics, numContainersPerPod), } @@ -337,7 +338,7 @@ func (tc *replicaCalcTestCase) runTest(t *testing.T) { testClient, testMetricsClient, testCMClient, testEMClient := tc.prepareTestClient(t) metricsClient := metricsclient.NewRESTMetricsClient(testMetricsClient.MetricsV1beta1(), testCMClient, testEMClient) - replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) + replicaCalc := NewReplicaCalculator(metricsClient, testClient.Core(), defaultTestingTolerance, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) selector, err := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ MatchLabels: map[string]string{"name": podNamePrefix}, @@ -1227,20 +1228,20 @@ func TestGroupPods(t *testing.T) { metrics metricsclient.PodMetricsInfo resource v1.ResourceName expectReadyPodCount int - expectUnreadyPods sets.String + expectIgnoredPods sets.String expectMissingPods sets.String }{ { "void", []v1.Pod{}, metricsclient.PodMetricsInfo{}, - v1.ResourceName(""), + v1.ResourceCPU, 0, sets.NewString(), sets.NewString(), }, { - "a ready pod", + "count in a ready pod - memory", []v1.Pod{ { ObjectMeta: metav1.ObjectMeta{ @@ -1252,15 +1253,15 @@ func TestGroupPods(t *testing.T) { }, }, metricsclient.PodMetricsInfo{ - "bentham": 1, + "bentham": metricsclient.PodMetric{Value: 1, Timestamp: time.Now(), Window: time.Minute}, }, - v1.ResourceName("hedons"), + v1.ResourceMemory, 1, sets.NewString(), sets.NewString(), }, { - "an unready pod", + "ignore a pod without ready condition - CPU", []v1.Pod{ { ObjectMeta: metav1.ObjectMeta{ @@ -1275,7 +1276,7 @@ func TestGroupPods(t *testing.T) { }, }, metricsclient.PodMetricsInfo{ - "lucretius": 1, + "lucretius": metricsclient.PodMetric{Value: 1}, }, v1.ResourceCPU, 0, @@ -1283,11 +1284,101 @@ func TestGroupPods(t *testing.T) { sets.NewString(), }, { - "a ready cpu pod", + "count in a ready pod with fresh metrics during initialization period - CPU", []v1.Pod{ { ObjectMeta: metav1.ObjectMeta{ - Name: "niccolo", + Name: "bentham", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-1 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-30 * time.Second)}, + Status: v1.ConditionTrue, + }, + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "bentham": metricsclient.PodMetric{Value: 1, Timestamp: time.Now(), Window: 30 * time.Second}, + }, + v1.ResourceCPU, + 1, + sets.NewString(), + sets.NewString(), + }, + { + "ignore a ready pod without fresh metrics during initialization period - CPU", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "bentham", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-1 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-30 * time.Second)}, + Status: v1.ConditionTrue, + }, + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "bentham": metricsclient.PodMetric{Value: 1, Timestamp: time.Now(), Window: 60 * time.Second}, + }, + v1.ResourceCPU, + 0, + sets.NewString("bentham"), + sets.NewString(), + }, + { + "ignore an unready pod during initialization period - CPU", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "lucretius", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-10 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-9*time.Minute - 54*time.Second)}, + Status: v1.ConditionFalse, + }, + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "lucretius": metricsclient.PodMetric{Value: 1}, + }, + v1.ResourceCPU, + 0, + sets.NewString("lucretius"), + sets.NewString(), + }, + { + "count in a ready pod without fresh metrics after initialization period - CPU", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "bentham", }, Status: v1.PodStatus{ Phase: v1.PodSucceeded, @@ -1305,7 +1396,68 @@ func TestGroupPods(t *testing.T) { }, }, metricsclient.PodMetricsInfo{ - "niccolo": 1, + "bentham": metricsclient.PodMetric{Value: 1, Timestamp: time.Now().Add(-2 * time.Minute), Window: time.Minute}, + }, + v1.ResourceCPU, + 1, + sets.NewString(), + sets.NewString(), + }, + + { + "count in an unready pod that was ready after initialization period - CPU", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "lucretius", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-10 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-9 * time.Minute)}, + Status: v1.ConditionFalse, + }, + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "lucretius": metricsclient.PodMetric{Value: 1}, + }, + v1.ResourceCPU, + 1, + sets.NewString(), + sets.NewString(), + }, + { + "ignore pod that has never been ready after initialization period - CPU", + []v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "lucretius", + }, + Status: v1.PodStatus{ + Phase: v1.PodSucceeded, + StartTime: &metav1.Time{ + Time: time.Now().Add(-10 * time.Minute), + }, + Conditions: []v1.PodCondition{ + { + Type: v1.PodReady, + LastTransitionTime: metav1.Time{Time: time.Now().Add(-9*time.Minute - 50*time.Second)}, + Status: v1.ConditionFalse, + }, + }, + }, + }, + }, + metricsclient.PodMetricsInfo{ + "lucretius": metricsclient.PodMetric{Value: 1}, }, v1.ResourceCPU, 1, @@ -1334,7 +1486,7 @@ func TestGroupPods(t *testing.T) { sets.NewString("epicurus"), }, { - "all together", + "several pods", []v1.Pod{ { ObjectMeta: metav1.ObjectMeta{ @@ -1378,8 +1530,8 @@ func TestGroupPods(t *testing.T) { }, }, metricsclient.PodMetricsInfo{ - "lucretius": 1, - "niccolo": 1, + "lucretius": metricsclient.PodMetric{Value: 1}, + "niccolo": metricsclient.PodMetric{Value: 1}, }, v1.ResourceCPU, 1, @@ -1388,12 +1540,12 @@ func TestGroupPods(t *testing.T) { }, } for _, tc := range tests { - readyPodCount, unreadyPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuTaintAfterStart, defaultTestingDelayOfInitialReadinessStatus) + readyPodCount, ignoredPods, missingPods := groupPods(tc.pods, tc.metrics, tc.resource, defaultTestingCpuInitializationPeriod, defaultTestingDelayOfInitialReadinessStatus) if readyPodCount != tc.expectReadyPodCount { t.Errorf("%s got readyPodCount %d, expected %d", tc.name, readyPodCount, tc.expectReadyPodCount) } - if !unreadyPods.Equal(tc.expectUnreadyPods) { - t.Errorf("%s got unreadyPods %v, expected %v", tc.name, unreadyPods, tc.expectUnreadyPods) + if !ignoredPods.Equal(tc.expectIgnoredPods) { + t.Errorf("%s got unreadyPods %v, expected %v", tc.name, ignoredPods, tc.expectIgnoredPods) } if !missingPods.Equal(tc.expectMissingPods) { t.Errorf("%s got missingPods %v, expected %v", tc.name, missingPods, tc.expectMissingPods)