Merge pull request #18458 from hongchaodeng/perf

Auto commit by PR queue bot
pull/6/head
k8s-merge-robot 2015-12-21 01:54:09 -08:00
commit 2efc738d5b
7 changed files with 420 additions and 131 deletions

View File

@ -39,6 +39,7 @@ kube::test::find_dirs() {
-o -path './test/e2e/*' \
-o -path './test/e2e_node/*' \
-o -path './test/integration/*' \
-o -path './test/component/scheduler/perf/*' \
\) -prune \
\) -name '*_test.go' -print0 | xargs -0n1 dirname | sed 's|^\./||' | sort -u
)

View File

@ -0,0 +1,75 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
<strong>
The latest release of this document can be found
[here](http://releases.k8s.io/release-1.1/docs/proposals/choosing-scheduler.md).
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
Scheduler Performance Test
======
Motivation
------
We already have a performance testing system -- Kubemark. However, Kubemark requires setting up and bootstrapping a whole cluster, which takes a lot of time.
We want to have a standard way to reproduce scheduling latency metrics result and benchmark scheduler as simple and fast as possible. We have the following goals:
- Save time on testing
- The test and benchmark can be run in a single box.
We only set up components necessary to scheduling without booting up a cluster.
- Profiling runtime metrics to find out bottleneck
- Write scheduler integration test but focus on performance measurement.
Take advantage of go profiling tools and collect fine-grained metrics,
like cpu-profiling, memory-profiling and block-profiling.
- Reproduce test result easily
- We want to have a known place to do the performance related test for scheduler.
Developers should just run one script to collect all the information they need.
Currently the test suite has the following:
- density test (by adding a new Go test)
- schedule 30k pods on 1000 (fake) nodes and 3k pods on 100 (fake) nodes
- print out scheduling rate every second
- let you learn the rate changes vs number of scheduled pods
- benchmark
- make use of `go test -bench` and report nanosecond/op.
- schedule b.N pods when the cluster has N nodes and P scheduled pods. Since it takes relatively long time to finish one round, b.N is small: 10 - 100.
How To Run
------
```
cd kubernetes/test/component/scheduler/perf
./test-performance.sh
```
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/test/component/scheduler/perf/README.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -0,0 +1,79 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package benchmark
import (
"testing"
"time"
)
// BenchmarkScheduling100Nodes0Pods benchmarks the scheduling rate
// when the cluster has 100 nodes and 0 scheduled pods
func BenchmarkScheduling100Nodes0Pods(b *testing.B) {
benchmarkScheduling(100, 0, b)
}
// BenchmarkScheduling100Nodes1000Pods benchmarks the scheduling rate
// when the cluster has 100 nodes and 1000 scheduled pods
func BenchmarkScheduling100Nodes1000Pods(b *testing.B) {
benchmarkScheduling(100, 1000, b)
}
// BenchmarkScheduling1000Nodes0Pods benchmarks the scheduling rate
// when the cluster has 1000 nodes and 0 scheduled pods
func BenchmarkScheduling1000Nodes0Pods(b *testing.B) {
benchmarkScheduling(1000, 0, b)
}
// BenchmarkScheduling1000Nodes1000Pods benchmarks the scheduling rate
// when the cluster has 1000 nodes and 1000 scheduled pods
func BenchmarkScheduling1000Nodes1000Pods(b *testing.B) {
benchmarkScheduling(1000, 1000, b)
}
// benchmarkScheduling benchmarks scheduling rate with specific number of nodes
// and specific number of pods already scheduled. Since an operation takes relatively
// long time, b.N should be small: 10 - 100.
func benchmarkScheduling(numNodes, numScheduledPods int, b *testing.B) {
schedulerConfigFactory, finalFunc := mustSetupScheduler()
defer finalFunc()
c := schedulerConfigFactory.Client
makeNodes(c, numNodes)
makePods(c, numScheduledPods)
for {
scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List()
if len(scheduled) >= numScheduledPods {
break
}
time.Sleep(1 * time.Second)
}
// start benchmark
b.ResetTimer()
makePods(c, b.N)
for {
// This can potentially affect performance of scheduler, since List() is done under mutex.
// TODO: Setup watch on apiserver and wait until all pods scheduled.
scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List()
if len(scheduled) >= numScheduledPods+b.N {
break
}
// Note: This might introduce slight deviation in accuracy of benchmark results.
// Since the total amount of time is relatively large, it might not be a concern.
time.Sleep(100 * time.Millisecond)
}
}

View File

@ -0,0 +1,61 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package benchmark
import (
"fmt"
"testing"
"time"
)
// TestSchedule100Node3KPods schedules 3k pods on 100 nodes.
func TestSchedule100Node3KPods(t *testing.T) {
schedulePods(100, 3000)
}
// TestSchedule1000Node30KPods schedules 30k pods on 1000 nodes.
func TestSchedule1000Node30KPods(t *testing.T) {
schedulePods(1000, 30000)
}
// schedulePods schedules specific number of pods on specific number of nodes.
// This is used to learn the scheduling throughput on various
// sizes of cluster and changes as more and more pods are scheduled.
// It won't stop until all pods are scheduled.
func schedulePods(numNodes, numPods int) {
schedulerConfigFactory, destroyFunc := mustSetupScheduler()
defer destroyFunc()
c := schedulerConfigFactory.Client
makeNodes(c, numNodes)
makePods(c, numPods)
prev := 0
start := time.Now()
for {
// This can potentially affect performance of scheduler, since List() is done under mutex.
// Listing 10000 pods is an expensive operation, so running it frequently may impact scheduler.
// TODO: Setup watch on apiserver and wait until all pods scheduled.
scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List()
fmt.Printf("%ds\trate: %d\ttotal: %d\n", time.Since(start)/time.Second, len(scheduled)-prev, len(scheduled))
if len(scheduled) >= numPods {
return
}
prev = len(scheduled)
time.Sleep(1 * time.Second)
}
}

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Copyright 2014 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
pushd "../../../.."
source "./hack/lib/util.sh"
source "./hack/lib/logging.sh"
source "./hack/lib/etcd.sh"
popd
cleanup() {
kube::etcd::cleanup
kube::log::status "performance test cleanup complete"
}
trap cleanup EXIT
kube::etcd::start
kube::log::status "performance test start"
# TODO: set log-dir and prof output dir.
DIR_BASENAME=$(basename `pwd`)
go test -c -o "${DIR_BASENAME}.test"
# We are using the benchmark suite to do profiling. Because it only runs a few pods and
# theoretically it has less variance.
"./${DIR_BASENAME}.test" -test.bench=. -test.run=xxxx -test.cpuprofile=prof.out -logtostderr=false
kube::log::status "benchmark tests finished"
# Running density tests. It might take a long time.
"./${DIR_BASENAME}.test" -test.run=. -test.timeout=60m
kube::log::status "density tests finished"

View File

@ -0,0 +1,158 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package benchmark
import (
"net/http"
"net/http/httptest"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/client/record"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/master"
"k8s.io/kubernetes/plugin/pkg/scheduler"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"k8s.io/kubernetes/test/integration/framework"
)
// mustSetupScheduler starts the following components:
// - k8s api server (a.k.a. master)
// - scheduler
// It returns scheduler config factory and destroyFunc which should be used to
// remove resources after finished.
// Notes on rate limiter:
// - The BindPodsRateLimiter is nil, meaning no rate limits.
// - client rate limit is set to 5000.
func mustSetupScheduler() (schedulerConfigFactory *factory.ConfigFactory, destroyFunc func()) {
framework.DeleteAllEtcdKeys()
var m *master.Master
masterConfig := framework.NewIntegrationTestMasterConfig()
m = master.New(masterConfig)
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
m.Handler.ServeHTTP(w, req)
}))
c := client.NewOrDie(&client.Config{
Host: s.URL,
GroupVersion: testapi.Default.GroupVersion(),
QPS: 5000.0,
Burst: 5000,
})
schedulerConfigFactory = factory.NewConfigFactory(c, nil)
schedulerConfig, err := schedulerConfigFactory.Create()
if err != nil {
panic("Couldn't create scheduler config")
}
eventBroadcaster := record.NewBroadcaster()
schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
eventBroadcaster.StartRecordingToSink(c.Events(""))
scheduler.New(schedulerConfig).Run()
destroyFunc = func() {
glog.Infof("destroying")
close(schedulerConfig.StopEverything)
s.Close()
glog.Infof("destroyed")
}
return
}
func makeNodes(c client.Interface, nodeCount int) {
glog.Infof("making %d nodes", nodeCount)
baseNode := &api.Node{
ObjectMeta: api.ObjectMeta{
GenerateName: "scheduler-test-node-",
},
Spec: api.NodeSpec{
ExternalID: "foobar",
},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
api.ResourceCPU: resource.MustParse("4"),
api.ResourceMemory: resource.MustParse("32Gi"),
},
Phase: api.NodeRunning,
Conditions: []api.NodeCondition{
{Type: api.NodeReady, Status: api.ConditionTrue},
},
},
}
for i := 0; i < nodeCount; i++ {
if _, err := c.Nodes().Create(baseNode); err != nil {
panic("error creating node: " + err.Error())
}
}
}
// makePods will setup specified number of scheduled pods.
// Currently it goes through scheduling path and it's very slow to setup large number of pods.
// TODO: Setup pods evenly on all nodes and quickly/non-linearly.
func makePods(c client.Interface, podCount int) {
glog.Infof("making %d pods", podCount)
basePod := &api.Pod{
ObjectMeta: api.ObjectMeta{
GenerateName: "scheduler-test-pod-",
},
Spec: api.PodSpec{
Containers: []api.Container{{
Name: "pause",
Image: "gcr.io/google_containers/pause:1.0",
Resources: api.ResourceRequirements{
Limits: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
api.ResourceMemory: resource.MustParse("500Mi"),
},
Requests: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
api.ResourceMemory: resource.MustParse("500Mi"),
},
},
}},
},
}
threads := 30
remaining := make(chan int, 1000)
go func() {
for i := 0; i < podCount; i++ {
remaining <- i
}
close(remaining)
}()
for i := 0; i < threads; i++ {
go func() {
for {
_, ok := <-remaining
if !ok {
return
}
for {
_, err := c.Pods("default").Create(basePod)
if err == nil {
break
}
}
}
}()
}
}

View File

@ -24,7 +24,6 @@ import (
"fmt"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
@ -274,133 +273,3 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore
}
}
}
func BenchmarkScheduling(b *testing.B) {
framework.DeleteAllEtcdKeys()
var m *master.Master
s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
m.Handler.ServeHTTP(w, req)
}))
defer s.Close()
masterConfig := framework.NewIntegrationTestMasterConfig()
m = master.New(masterConfig)
c := client.NewOrDie(&client.Config{
Host: s.URL,
GroupVersion: testapi.Default.GroupVersion(),
QPS: 5000.0,
Burst: 5000,
})
schedulerConfigFactory := factory.NewConfigFactory(c, nil)
schedulerConfig, err := schedulerConfigFactory.Create()
if err != nil {
b.Fatalf("Couldn't create scheduler config: %v", err)
}
eventBroadcaster := record.NewBroadcaster()
schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
eventBroadcaster.StartRecordingToSink(c.Events(""))
scheduler.New(schedulerConfig).Run()
defer close(schedulerConfig.StopEverything)
makeNNodes(c, 1000)
N := b.N
b.ResetTimer()
makeNPods(c, N)
for {
objs := schedulerConfigFactory.ScheduledPodLister.Store.List()
if len(objs) >= N {
fmt.Printf("%v pods scheduled.\n", len(objs))
/* // To prove that this actually works:
for _, o := range objs {
fmt.Printf("%s\n", o.(*api.Pod).Spec.NodeName)
}
*/
break
}
time.Sleep(time.Millisecond)
}
b.StopTimer()
}
func makeNNodes(c client.Interface, N int) {
baseNode := &api.Node{
ObjectMeta: api.ObjectMeta{
GenerateName: "scheduler-test-node-",
},
Spec: api.NodeSpec{
ExternalID: "foobar",
},
Status: api.NodeStatus{
Capacity: api.ResourceList{
api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI),
api.ResourceCPU: resource.MustParse("4"),
api.ResourceMemory: resource.MustParse("32Gi"),
},
Phase: api.NodeRunning,
Conditions: []api.NodeCondition{
{Type: api.NodeReady, Status: api.ConditionTrue},
},
},
}
for i := 0; i < N; i++ {
if _, err := c.Nodes().Create(baseNode); err != nil {
panic("error creating node: " + err.Error())
}
}
}
func makeNPods(c client.Interface, N int) {
basePod := &api.Pod{
ObjectMeta: api.ObjectMeta{
GenerateName: "scheduler-test-pod-",
},
Spec: api.PodSpec{
Containers: []api.Container{{
Name: "pause",
Image: "gcr.io/google_containers/pause:1.0",
Resources: api.ResourceRequirements{
Limits: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
api.ResourceMemory: resource.MustParse("500Mi"),
},
Requests: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
api.ResourceMemory: resource.MustParse("500Mi"),
},
},
}},
},
}
wg := sync.WaitGroup{}
threads := 30
wg.Add(threads)
remaining := make(chan int, N)
go func() {
for i := 0; i < N; i++ {
remaining <- i
}
close(remaining)
}()
for i := 0; i < threads; i++ {
go func() {
defer wg.Done()
for {
_, ok := <-remaining
if !ok {
return
}
for {
_, err := c.Pods("default").Create(basePod)
if err == nil {
break
}
}
}
}()
}
wg.Wait()
}