Refactor package controller

pull/6/head
Ananya Kumar 2015-07-27 18:21:37 -07:00 committed by System Administrator
parent a89aeeb55b
commit 47dd0bc6f9
12 changed files with 612 additions and 415 deletions

View File

@ -41,7 +41,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/nodecontroller"
replicationControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
replicationControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/controller/replication"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"

View File

@ -38,7 +38,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/nodecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/routecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/servicecontroller"
replicationControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
replicationControllerPkg "github.com/GoogleCloudPlatform/kubernetes/pkg/controller/replication"
"github.com/GoogleCloudPlatform/kubernetes/pkg/healthz"
"github.com/GoogleCloudPlatform/kubernetes/pkg/master/ports"
"github.com/GoogleCloudPlatform/kubernetes/pkg/namespace"

View File

@ -35,7 +35,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/nodecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/servicecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller/replication"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/cadvisor"
kubecontainer "github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/container"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubelet/dockertools"
@ -134,7 +134,7 @@ func runControllerManager(cl *client.Client) {
endpoints := service.NewEndpointController(cl)
go endpoints.Run(5, util.NeverStop)
controllerManager := controller.NewReplicationManager(cl, controller.BurstReplicas)
controllerManager := replication.NewReplicationManager(cl, replication.BurstReplicas)
go controllerManager.Run(5, util.NeverStop)
}

View File

@ -32,7 +32,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/nodecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/routecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/servicecontroller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller/replication"
"github.com/GoogleCloudPlatform/kubernetes/pkg/healthz"
"github.com/GoogleCloudPlatform/kubernetes/pkg/namespace"
"github.com/GoogleCloudPlatform/kubernetes/pkg/resourcequota"
@ -110,7 +110,7 @@ func (s *CMServer) Run(_ []string) error {
endpoints := s.createEndpointController(kubeClient)
go endpoints.Run(s.ConcurrentEndpointSyncs, util.NeverStop)
controllerManager := controller.NewReplicationManager(kubeClient, controller.BurstReplicas)
controllerManager := replication.NewReplicationManager(kubeClient, replication.BurstReplicas)
go controllerManager.Run(s.ConcurrentRCSyncs, util.NeverStop)
//TODO(jdef) should eventually support more cloud providers here

View File

@ -18,6 +18,7 @@ package controller
import (
"fmt"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/latest"
@ -25,83 +26,99 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller/framework"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
"github.com/golang/glog"
"sync/atomic"
)
const (
CreatedByAnnotation = "kubernetes.io/created-by"
updateRetries = 1
// If a watch drops a delete event for a pod, it'll take this long
// before a dormant controller waiting for those packets is woken up anyway. It is
// specifically targeted at the case where some problem prevents an update
// of expectations, without it the controller could stay asleep forever. This should
// be set based on the expected latency of watch events.
//
// Currently an controller can service (create *and* observe the watch events for said
// creation) about 10-20 pods a second, so it takes about 1 min to service
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
// latency/pod at the scale of 3000 pods over 100 nodes.
ExpectationsTimeout = 3 * time.Minute
)
// Expectations are a way for replication controllers to tell the rc manager what they expect. eg:
// RCExpectations: {
// rc1: expects 2 adds in 2 minutes
// rc2: expects 2 dels in 2 minutes
// rc3: expects -1 adds in 2 minutes => rc3's expectations have already been met
var (
KeyFunc = framework.DeletionHandlingMetaNamespaceKeyFunc
)
// Expectations are a way for controllers to tell the controller manager what they expect. eg:
// ControllerExpectations: {
// controller1: expects 2 adds in 2 minutes
// controller2: expects 2 dels in 2 minutes
// controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
// }
//
// Implementation:
// PodExpectation = pair of atomic counters to track pod creation/deletion
// RCExpectationsStore = TTLStore + a PodExpectation per rc
// ControllerExpectationsStore = TTLStore + a PodExpectation per controller
//
// * Once set expectations can only be lowered
// * An RC isn't synced till its expectations are either fulfilled, or expire
// * Rcs that don't set expectations will get woken up for every matching pod
// * A controller isn't synced till its expectations are either fulfilled, or expire
// * Controllers that don't set expectations will get woken up for every matching pod
// expKeyFunc to parse out the key from a PodExpectation
var expKeyFunc = func(obj interface{}) (string, error) {
// ExpKeyFunc to parse out the key from a PodExpectation
var ExpKeyFunc = func(obj interface{}) (string, error) {
if e, ok := obj.(*PodExpectations); ok {
return e.key, nil
}
return "", fmt.Errorf("Could not find key for obj %#v", obj)
}
// RCExpectationsManager is an interface that allows users to set and wait on expectations.
// ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
// Only abstracted out for testing.
type RCExpectationsManager interface {
GetExpectations(rc *api.ReplicationController) (*PodExpectations, bool, error)
SatisfiedExpectations(rc *api.ReplicationController) bool
DeleteExpectations(rcKey string)
ExpectCreations(rc *api.ReplicationController, adds int) error
ExpectDeletions(rc *api.ReplicationController, dels int) error
CreationObserved(rc *api.ReplicationController)
DeletionObserved(rc *api.ReplicationController)
// Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
// types of controllers, because the keys might conflict across types.
type ControllerExpectationsInterface interface {
GetExpectations(controllerKey string) (*PodExpectations, bool, error)
SatisfiedExpectations(controllerKey string) bool
DeleteExpectations(controllerKey string)
SetExpectations(controllerKey string, add, del int) error
ExpectCreations(controllerKey string, adds int) error
ExpectDeletions(controllerKey string, dels int) error
CreationObserved(controllerKey string)
DeletionObserved(controllerKey string)
}
// RCExpectations is a ttl cache mapping rcs to what they expect to see before being woken up for a sync.
type RCExpectations struct {
// ControllerExpectations is a ttl cache mapping controllers to what they expect to see before being woken up for a sync.
type ControllerExpectations struct {
cache.Store
}
// GetExpectations returns the PodExpectations of the given rc.
func (r *RCExpectations) GetExpectations(rc *api.ReplicationController) (*PodExpectations, bool, error) {
rcKey, err := rcKeyFunc(rc)
if err != nil {
return nil, false, err
}
if podExp, exists, err := r.GetByKey(rcKey); err == nil && exists {
// GetExpectations returns the PodExpectations of the given controller.
func (r *ControllerExpectations) GetExpectations(controllerKey string) (*PodExpectations, bool, error) {
if podExp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
return podExp.(*PodExpectations), true, nil
} else {
return nil, false, err
}
}
// DeleteExpectations deletes the expectations of the given RC from the TTLStore.
func (r *RCExpectations) DeleteExpectations(rcKey string) {
if podExp, exists, err := r.GetByKey(rcKey); err == nil && exists {
// DeleteExpectations deletes the expectations of the given controller from the TTLStore.
func (r *ControllerExpectations) DeleteExpectations(controllerKey string) {
if podExp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
if err := r.Delete(podExp); err != nil {
glog.V(2).Infof("Error deleting expectations for rc %v: %v", rcKey, err)
glog.V(2).Infof("Error deleting expectations for controller %v: %v", controllerKey, err)
}
}
}
// SatisfiedExpectations returns true if the replication manager has observed the required adds/dels
// for the given rc. Add/del counts are established by the rc at sync time, and updated as pods
// are observed by the replication manager's podController.
func (r *RCExpectations) SatisfiedExpectations(rc *api.ReplicationController) bool {
if podExp, exists, err := r.GetExpectations(rc); exists {
// SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
// Add/del counts are established by the controller at sync time, and updated as pods are observed by the controller
// manager.
func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool {
if podExp, exists, err := r.GetExpectations(controllerKey); exists {
if podExp.Fulfilled() {
return true
} else {
@ -111,54 +128,50 @@ func (r *RCExpectations) SatisfiedExpectations(rc *api.ReplicationController) bo
} else if err != nil {
glog.V(2).Infof("Error encountered while checking expectations %#v, forcing sync", err)
} else {
// When a new rc is created, it doesn't have expectations.
// When a new controller is created, it doesn't have expectations.
// When it doesn't see expected watch events for > TTL, the expectations expire.
// - In this case it wakes up, creates/deletes pods, and sets expectations again.
// When it has satisfied expectations and no pods need to be created/destroyed > TTL, the expectations expire.
// - In this case it continues without setting expectations till it needs to create/delete pods.
glog.V(4).Infof("Controller %v either never recorded expectations, or the ttl expired.", rc.Name)
glog.V(4).Infof("Controller %v either never recorded expectations, or the ttl expired.", controllerKey)
}
// Trigger a sync if we either encountered and error (which shouldn't happen since we're
// getting from local store) or this rc hasn't established expectations.
// getting from local store) or this controller hasn't established expectations.
return true
}
// setExpectations registers new expectations for the given rc. Forgets existing expectations.
func (r *RCExpectations) setExpectations(rc *api.ReplicationController, add, del int) error {
rcKey, err := rcKeyFunc(rc)
if err != nil {
return err
}
podExp := &PodExpectations{add: int64(add), del: int64(del), key: rcKey}
// SetExpectations registers new expectations for the given controller. Forgets existing expectations.
func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error {
podExp := &PodExpectations{add: int64(add), del: int64(del), key: controllerKey}
glog.V(4).Infof("Setting expectations %+v", podExp)
return r.Add(podExp)
}
func (r *RCExpectations) ExpectCreations(rc *api.ReplicationController, adds int) error {
return r.setExpectations(rc, adds, 0)
func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error {
return r.SetExpectations(controllerKey, adds, 0)
}
func (r *RCExpectations) ExpectDeletions(rc *api.ReplicationController, dels int) error {
return r.setExpectations(rc, 0, dels)
func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error {
return r.SetExpectations(controllerKey, 0, dels)
}
// Decrements the expectation counts of the given rc.
func (r *RCExpectations) lowerExpectations(rc *api.ReplicationController, add, del int) {
if podExp, exists, err := r.GetExpectations(rc); err == nil && exists {
// Decrements the expectation counts of the given controller.
func (r *ControllerExpectations) lowerExpectations(controllerKey string, add, del int) {
if podExp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
podExp.Seen(int64(add), int64(del))
// The expectations might've been modified since the update on the previous line.
glog.V(4).Infof("Lowering expectations %+v", podExp)
}
}
// CreationObserved atomically decrements the `add` expecation count of the given replication controller.
func (r *RCExpectations) CreationObserved(rc *api.ReplicationController) {
r.lowerExpectations(rc, 1, 0)
// CreationObserved atomically decrements the `add` expecation count of the given controller.
func (r *ControllerExpectations) CreationObserved(controllerKey string) {
r.lowerExpectations(controllerKey, 1, 0)
}
// DeletionObserved atomically decrements the `del` expectation count of the given replication controller.
func (r *RCExpectations) DeletionObserved(rc *api.ReplicationController) {
r.lowerExpectations(rc, 0, 1)
// DeletionObserved atomically decrements the `del` expectation count of the given controller.
func (r *ControllerExpectations) DeletionObserved(controllerKey string) {
r.lowerExpectations(controllerKey, 0, 1)
}
// Expectations are either fulfilled, or expire naturally.
@ -185,59 +198,74 @@ func (e *PodExpectations) Fulfilled() bool {
return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
}
// getExpectations returns the add and del expectations of the pod.
func (e *PodExpectations) getExpectations() (int64, int64) {
// GetExpectations returns the add and del expectations of the pod.
func (e *PodExpectations) GetExpectations() (int64, int64) {
return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
}
// NewRCExpectations returns a store for PodExpectations.
func NewRCExpectations() *RCExpectations {
return &RCExpectations{cache.NewTTLStore(expKeyFunc, ExpectationsTimeout)}
// NewControllerExpectations returns a store for PodExpectations.
func NewControllerExpectations() *ControllerExpectations {
return &ControllerExpectations{cache.NewTTLStore(ExpKeyFunc, ExpectationsTimeout)}
}
// PodControlInterface is an interface that knows how to add or delete pods
// created as an interface to allow testing.
type PodControlInterface interface {
// createReplica creates new replicated pods according to the spec.
createReplica(namespace string, controller *api.ReplicationController) error
// deletePod deletes the pod identified by podID.
deletePod(namespace string, podID string) error
// CreateReplica creates new replicated pods according to the spec.
CreateReplica(namespace string, controller *api.ReplicationController) error
// DeletePod deletes the pod identified by podID.
DeletePod(namespace string, podID string) error
}
// RealPodControl is the default implementation of PodControllerInterface.
type RealPodControl struct {
kubeClient client.Interface
recorder record.EventRecorder
KubeClient client.Interface
Recorder record.EventRecorder
}
func (r RealPodControl) createReplica(namespace string, controller *api.ReplicationController) error {
func getReplicaLabelSet(template *api.PodTemplateSpec) labels.Set {
desiredLabels := make(labels.Set)
for k, v := range controller.Spec.Template.Labels {
for k, v := range template.Labels {
desiredLabels[k] = v
}
return desiredLabels
}
func getReplicaAnnotationSet(template *api.PodTemplateSpec, object runtime.Object) (labels.Set, error) {
desiredAnnotations := make(labels.Set)
for k, v := range controller.Spec.Template.Annotations {
for k, v := range template.Annotations {
desiredAnnotations[k] = v
}
createdByRef, err := api.GetReference(controller)
createdByRef, err := api.GetReference(object)
if err != nil {
return fmt.Errorf("unable to get controller reference: %v", err)
return desiredAnnotations, fmt.Errorf("unable to get controller reference: %v", err)
}
createdByRefJson, err := latest.Codec.Encode(&api.SerializedReference{
Reference: *createdByRef,
})
if err != nil {
return fmt.Errorf("unable to serialize controller reference: %v", err)
return desiredAnnotations, fmt.Errorf("unable to serialize controller reference: %v", err)
}
desiredAnnotations[CreatedByAnnotation] = string(createdByRefJson)
return desiredAnnotations, nil
}
func getReplicaPrefix(controllerName string) string {
// use the dash (if the name isn't too long) to make the pod name a bit prettier
prefix := fmt.Sprintf("%s-", controller.Name)
prefix := fmt.Sprintf("%s-", controllerName)
if ok, _ := validation.ValidatePodName(prefix, true); !ok {
prefix = controller.Name
prefix = controllerName
}
return prefix
}
func (r RealPodControl) CreateReplica(namespace string, controller *api.ReplicationController) error {
desiredLabels := getReplicaLabelSet(controller.Spec.Template)
desiredAnnotations, err := getReplicaAnnotationSet(controller.Spec.Template, controller)
if err != nil {
return err
}
prefix := getReplicaPrefix(controller.Name)
pod := &api.Pod{
ObjectMeta: api.ObjectMeta{
@ -252,27 +280,27 @@ func (r RealPodControl) createReplica(namespace string, controller *api.Replicat
if labels.Set(pod.Labels).AsSelector().Empty() {
return fmt.Errorf("unable to create pod replica, no labels")
}
if newPod, err := r.kubeClient.Pods(namespace).Create(pod); err != nil {
r.recorder.Eventf(controller, "failedCreate", "Error creating: %v", err)
if newPod, err := r.KubeClient.Pods(namespace).Create(pod); err != nil {
r.Recorder.Eventf(controller, "failedCreate", "Error creating: %v", err)
return fmt.Errorf("unable to create pod replica: %v", err)
} else {
glog.V(4).Infof("Controller %v created pod %v", controller.Name, newPod.Name)
r.recorder.Eventf(controller, "successfulCreate", "Created pod: %v", newPod.Name)
r.Recorder.Eventf(controller, "successfulCreate", "Created pod: %v", newPod.Name)
}
return nil
}
func (r RealPodControl) deletePod(namespace, podID string) error {
return r.kubeClient.Pods(namespace).Delete(podID, nil)
func (r RealPodControl) DeletePod(namespace, podID string) error {
return r.KubeClient.Pods(namespace).Delete(podID, nil)
}
// activePods type allows custom sorting of pods so an rc can pick the best ones to delete.
type activePods []*api.Pod
// ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete.
type ActivePods []*api.Pod
func (s activePods) Len() int { return len(s) }
func (s activePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s ActivePods) Len() int { return len(s) }
func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s activePods) Less(i, j int) bool {
func (s ActivePods) Less(i, j int) bool {
// Unassigned < assigned
if s[i].Spec.NodeName == "" && s[j].Spec.NodeName != "" {
return true
@ -289,21 +317,8 @@ func (s activePods) Less(i, j int) bool {
return false
}
// overlappingControllers sorts a list of controllers by creation timestamp, using their names as a tie breaker.
type overlappingControllers []api.ReplicationController
func (o overlappingControllers) Len() int { return len(o) }
func (o overlappingControllers) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o overlappingControllers) Less(i, j int) bool {
if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
return o[i].Name < o[j].Name
}
return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
}
// filterActivePods returns pods that have not terminated.
func filterActivePods(pods []api.Pod) []*api.Pod {
// FilterActivePods returns pods that have not terminated.
func FilterActivePods(pods []api.Pod) []*api.Pod {
var result []*api.Pod
for i := range pods {
if api.PodSucceeded != pods[i].Status.Phase &&
@ -313,39 +328,3 @@ func filterActivePods(pods []api.Pod) []*api.Pod {
}
return result
}
// updateReplicaCount attempts to update the Status.Replicas of the given controller, with a single GET/PUT retry.
func updateReplicaCount(rcClient client.ReplicationControllerInterface, controller api.ReplicationController, numReplicas int) (updateErr error) {
// This is the steady state. It happens when the rc doesn't have any expectations, since
// we do a periodic relist every 30s. If the generations differ but the replicas are
// the same, a caller might've resized to the same replica count.
if controller.Status.Replicas == numReplicas &&
controller.Generation == controller.Status.ObservedGeneration {
return nil
}
// Save the generation number we acted on, otherwise we might wrongfully indicate
// that we've seen a spec update when we retry.
// TODO: This can clobber an update if we allow multiple agents to write to the
// same status.
generation := controller.Generation
var getErr error
for i, rc := 0, &controller; ; i++ {
glog.V(4).Infof("Updating replica count for rc: %v, %d->%d (need %d), sequence No: %v->%v",
controller.Name, controller.Status.Replicas, numReplicas, controller.Spec.Replicas, controller.Status.ObservedGeneration, generation)
rc.Status = api.ReplicationControllerStatus{Replicas: numReplicas, ObservedGeneration: generation}
_, updateErr = rcClient.Update(rc)
if updateErr == nil || i >= updateRetries {
return updateErr
}
// Update the controller with the latest resource version for the next poll
if rc, getErr = rcClient.Get(controller.Name); getErr != nil {
// If the GET fails we can't trust status.Replicas anymore. This error
// is bound to be more interesting than the update failure.
return getErr
}
}
// Failed 2 updates one of which was with the latest controller, return the update error
return
}

View File

@ -0,0 +1,292 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"fmt"
"math/rand"
"net/http/httptest"
"reflect"
"sort"
"sync"
"testing"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/securitycontext"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
)
// NewFakeControllerExpectationsLookup creates a fake store for PodExpectations.
func NewFakeControllerExpectationsLookup(ttl time.Duration) (*ControllerExpectations, *util.FakeClock) {
fakeTime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
fakeClock := &util.FakeClock{fakeTime}
ttlPolicy := &cache.TTLPolicy{ttl, fakeClock}
ttlStore := cache.NewFakeExpirationStore(
ExpKeyFunc, nil, ttlPolicy, fakeClock)
return &ControllerExpectations{ttlStore}, fakeClock
}
func newReplicationController(replicas int) *api.ReplicationController {
rc := &api.ReplicationController{
TypeMeta: api.TypeMeta{APIVersion: testapi.Version()},
ObjectMeta: api.ObjectMeta{
UID: util.NewUUID(),
Name: "foobar",
Namespace: api.NamespaceDefault,
ResourceVersion: "18",
},
Spec: api.ReplicationControllerSpec{
Replicas: replicas,
Selector: map[string]string{"foo": "bar"},
Template: &api.PodTemplateSpec{
ObjectMeta: api.ObjectMeta{
Labels: map[string]string{
"name": "foo",
"type": "production",
},
},
Spec: api.PodSpec{
Containers: []api.Container{
{
Image: "foo/bar",
TerminationMessagePath: api.TerminationMessagePathDefault,
ImagePullPolicy: api.PullIfNotPresent,
SecurityContext: securitycontext.ValidSecurityContextWithContainerDefaults(),
},
},
RestartPolicy: api.RestartPolicyAlways,
DNSPolicy: api.DNSDefault,
NodeSelector: map[string]string{
"baz": "blah",
},
},
},
},
}
return rc
}
// create count pods with the given phase for the given rc (same selectors and namespace), and add them to the store.
func newPodList(store cache.Store, count int, status api.PodPhase, rc *api.ReplicationController) *api.PodList {
pods := []api.Pod{}
for i := 0; i < count; i++ {
newPod := api.Pod{
ObjectMeta: api.ObjectMeta{
Name: fmt.Sprintf("pod%d", i),
Labels: rc.Spec.Selector,
Namespace: rc.Namespace,
},
Status: api.PodStatus{Phase: status},
}
if store != nil {
store.Add(&newPod)
}
pods = append(pods, newPod)
}
return &api.PodList{
Items: pods,
}
}
func TestControllerExpectations(t *testing.T) {
ttl := 30 * time.Second
e, fakeClock := NewFakeControllerExpectationsLookup(ttl)
// In practice we can't really have add and delete expectations since we only either create or
// delete replicas in one rc pass, and the rc goes to sleep soon after until the expectations are
// either fulfilled or timeout.
adds, dels := 10, 30
rc := newReplicationController(1)
// RC fires off adds and deletes at apiserver, then sets expectations
rcKey, err := KeyFunc(rc)
if err != nil {
t.Errorf("Couldn't get key for object %+v: %v", rc, err)
}
e.SetExpectations(rcKey, adds, dels)
var wg sync.WaitGroup
for i := 0; i < adds+1; i++ {
wg.Add(1)
go func() {
// In prod this can happen either because of a failed create by the rc
// or after having observed a create via informer
e.CreationObserved(rcKey)
wg.Done()
}()
}
wg.Wait()
// There are still delete expectations
if e.SatisfiedExpectations(rcKey) {
t.Errorf("Rc will sync before expectations are met")
}
for i := 0; i < dels+1; i++ {
wg.Add(1)
go func() {
e.DeletionObserved(rcKey)
wg.Done()
}()
}
wg.Wait()
// Expectations have been surpassed
if podExp, exists, err := e.GetExpectations(rcKey); err == nil && exists {
add, del := podExp.GetExpectations()
if add != -1 || del != -1 {
t.Errorf("Unexpected pod expectations %#v", podExp)
}
} else {
t.Errorf("Could not get expectations for rc, exists %v and err %v", exists, err)
}
if !e.SatisfiedExpectations(rcKey) {
t.Errorf("Expectations are met but the rc will not sync")
}
// Next round of rc sync, old expectations are cleared
e.SetExpectations(rcKey, 1, 2)
if podExp, exists, err := e.GetExpectations(rcKey); err == nil && exists {
add, del := podExp.GetExpectations()
if add != 1 || del != 2 {
t.Errorf("Unexpected pod expectations %#v", podExp)
}
} else {
t.Errorf("Could not get expectations for rc, exists %v and err %v", exists, err)
}
// Expectations have expired because of ttl
fakeClock.Time = fakeClock.Time.Add(ttl + 1)
if !e.SatisfiedExpectations(rcKey) {
t.Errorf("Expectations should have expired but didn't")
}
}
func TestCreateReplica(t *testing.T) {
ns := api.NamespaceDefault
body := runtime.EncodeOrDie(testapi.Codec(), &api.Pod{ObjectMeta: api.ObjectMeta{Name: "empty_pod"}})
fakeHandler := util.FakeHandler{
StatusCode: 200,
ResponseBody: string(body),
}
testServer := httptest.NewServer(&fakeHandler)
defer testServer.Close()
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
podControl := RealPodControl{
KubeClient: client,
Recorder: &record.FakeRecorder{},
}
controllerSpec := newReplicationController(1)
// Make sure createReplica sends a POST to the apiserver with a pod from the controllers pod template
podControl.CreateReplica(ns, controllerSpec)
expectedPod := api.Pod{
ObjectMeta: api.ObjectMeta{
Labels: controllerSpec.Spec.Template.Labels,
GenerateName: fmt.Sprintf("%s-", controllerSpec.Name),
},
Spec: controllerSpec.Spec.Template.Spec,
}
fakeHandler.ValidateRequest(t, testapi.ResourcePath("pods", api.NamespaceDefault, ""), "POST", nil)
actualPod, err := client.Codec.Decode([]byte(fakeHandler.RequestBody))
if err != nil {
t.Errorf("Unexpected error: %#v", err)
}
if !api.Semantic.DeepDerivative(&expectedPod, actualPod) {
t.Logf("Body: %s", fakeHandler.RequestBody)
t.Errorf("Unexpected mismatch. Expected\n %#v,\n Got:\n %#v", &expectedPod, actualPod)
}
}
func TestActivePodFiltering(t *testing.T) {
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController(0)
podList := newPodList(nil, 5, api.PodRunning, rc)
podList.Items[0].Status.Phase = api.PodSucceeded
podList.Items[1].Status.Phase = api.PodFailed
expectedNames := util.NewStringSet()
for _, pod := range podList.Items[2:] {
expectedNames.Insert(pod.Name)
}
got := FilterActivePods(podList.Items)
gotNames := util.NewStringSet()
for _, pod := range got {
gotNames.Insert(pod.Name)
}
if expectedNames.Difference(gotNames).Len() != 0 || gotNames.Difference(expectedNames).Len() != 0 {
t.Errorf("expected %v, got %v", expectedNames.List(), gotNames.List())
}
}
func TestSortingActivePods(t *testing.T) {
numPods := 5
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController(0)
podList := newPodList(nil, numPods, api.PodRunning, rc)
pods := make([]*api.Pod, len(podList.Items))
for i := range podList.Items {
pods[i] = &podList.Items[i]
}
// pods[0] is not scheduled yet.
pods[0].Spec.NodeName = ""
pods[0].Status.Phase = api.PodPending
// pods[1] is scheduled but pending.
pods[1].Spec.NodeName = "bar"
pods[1].Status.Phase = api.PodPending
// pods[2] is unknown.
pods[2].Spec.NodeName = "foo"
pods[2].Status.Phase = api.PodUnknown
// pods[3] is running but not ready.
pods[3].Spec.NodeName = "foo"
pods[3].Status.Phase = api.PodRunning
// pods[4] is running and ready.
pods[4].Spec.NodeName = "foo"
pods[4].Status.Phase = api.PodRunning
pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}
getOrder := func(pods []*api.Pod) []string {
names := make([]string, len(pods))
for i := range pods {
names[i] = pods[i].Name
}
return names
}
expected := getOrder(pods)
for i := 0; i < 20; i++ {
idx := rand.Perm(numPods)
randomizedPods := make([]*api.Pod, numPods)
for j := 0; j < numPods; j++ {
randomizedPods[j] = pods[idx[j]]
}
sort.Sort(ActivePods(randomizedPods))
actual := getOrder(randomizedPods)
if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v", expected, actual)
}
}
}

View File

@ -1,5 +1,5 @@
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -14,6 +14,6 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
// Package controller contains logic for watching and synchronizing
// replicationControllers.
// Package controller contains code for controllers (like the replication
// controller).
package controller

View File

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package replication contains logic for watching and synchronizing
// replication controllers.
package replication

View File

@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
package replication
import (
"reflect"
@ -26,6 +26,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller/framework"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
@ -36,10 +37,6 @@ import (
"github.com/golang/glog"
)
var (
rcKeyFunc = framework.DeletionHandlingMetaNamespaceKeyFunc
)
const (
// We'll attempt to recompute the required replicas of all replication controllers
// the have fulfilled their expectations at least this often. This recomputation
@ -52,18 +49,6 @@ const (
// final resting state of the pod.
PodRelistPeriod = 5 * time.Minute
// If a watch drops a delete event for a pod, it'll take this long
// before a dormant rc waiting for those packets is woken up anyway. It is
// specifically targeted at the case where some problem prevents an update
// of expectations, without it the RC could stay asleep forever. This should
// be set based on the expected latency of watch events.
//
// Currently an rc can service (create *and* observe the watch events for said
// creation) about 10-20 pods a second, so it takes about 1 min to service
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
// latency/pod at the scale of 3000 pods over 100 nodes.
ExpectationsTimeout = 3 * time.Minute
// Realistic value of the burstReplica field for the replication manager based off
// performance requirements for kubernetes 1.0.
BurstReplicas = 500
@ -71,13 +56,16 @@ const (
// We must avoid counting pods until the pod store has synced. If it hasn't synced, to
// avoid a hot loop, we'll wait this long between checks.
PodStoreSyncedPollPeriod = 100 * time.Millisecond
// The number of times we retry updating a replication controller's status.
statusUpdateRetries = 1
)
// ReplicationManager is responsible for synchronizing ReplicationController objects stored
// in the system with actual running pods.
type ReplicationManager struct {
kubeClient client.Interface
podControl PodControlInterface
podControl controller.PodControlInterface
// An rc is temporarily suspended after creating/deleting these many replicas.
// It resumes normal action after observing the watch events for them.
@ -90,9 +78,11 @@ type ReplicationManager struct {
podStoreSynced func() bool
// A TTLCache of pod creates/deletes each rc expects to see
expectations RCExpectationsManager
expectations controller.ControllerExpectationsInterface
// A store of replication controllers, populated by the rcController
rcStore cache.StoreToReplicationControllerLister
// A store of pods, populated by the podController
podStore cache.StoreToPodLister
// Watches changes to all replication controllers
@ -111,12 +101,12 @@ func NewReplicationManager(kubeClient client.Interface, burstReplicas int) *Repl
rm := &ReplicationManager{
kubeClient: kubeClient,
podControl: RealPodControl{
kubeClient: kubeClient,
recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replication-controller"}),
podControl: controller.RealPodControl{
KubeClient: kubeClient,
Recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replication-controller"}),
},
burstReplicas: burstReplicas,
expectations: NewRCExpectations(),
expectations: controller.NewControllerExpectations(),
queue: workqueue.New(),
}
@ -185,7 +175,7 @@ func NewReplicationManager(kubeClient client.Interface, burstReplicas int) *Repl
func (rm *ReplicationManager) SetEventRecorder(recorder record.EventRecorder) {
// TODO: Hack. We can't cleanly shutdown the event recorder, so benchmarks
// need to pass in a fake.
rm.podControl = RealPodControl{rm.kubeClient, recorder}
rm.podControl = controller.RealPodControl{rm.kubeClient, recorder}
}
// Run begins watching and syncing.
@ -222,7 +212,12 @@ func (rm *ReplicationManager) getPodControllers(pod *api.Pod) *api.ReplicationCo
func (rm *ReplicationManager) addPod(obj interface{}) {
pod := obj.(*api.Pod)
if rc := rm.getPodControllers(pod); rc != nil {
rm.expectations.CreationObserved(rc)
rcKey, err := controller.KeyFunc(rc)
if err != nil {
glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
return
}
rm.expectations.CreationObserved(rcKey)
rm.enqueueController(rc)
}
}
@ -263,24 +258,29 @@ func (rm *ReplicationManager) deletePod(obj interface{}) {
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("Couldn't get object from tombstone %+v, could take up to %v before a controller recreates a replica", obj, ExpectationsTimeout)
glog.Errorf("Couldn't get object from tombstone %+v, could take up to %v before a controller recreates a replica", obj, controller.ExpectationsTimeout)
return
}
pod, ok = tombstone.Obj.(*api.Pod)
if !ok {
glog.Errorf("Tombstone contained object that is not a pod %+v, could take up to %v before controller recreates a replica", obj, ExpectationsTimeout)
glog.Errorf("Tombstone contained object that is not a pod %+v, could take up to %v before controller recreates a replica", obj, controller.ExpectationsTimeout)
return
}
}
if rc := rm.getPodControllers(pod); rc != nil {
rm.expectations.DeletionObserved(rc)
rcKey, err := controller.KeyFunc(rc)
if err != nil {
glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
return
}
rm.expectations.DeletionObserved(rcKey)
rm.enqueueController(rc)
}
}
// obj could be an *api.ReplicationController, or a DeletionFinalStateUnknown marker item.
func (rm *ReplicationManager) enqueueController(obj interface{}) {
key, err := rcKeyFunc(obj)
key, err := controller.KeyFunc(obj)
if err != nil {
glog.Errorf("Couldn't get key for object %+v: %v", obj, err)
return
@ -314,24 +314,29 @@ func (rm *ReplicationManager) worker() {
}
// manageReplicas checks and updates replicas for the given replication controller.
func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller *api.ReplicationController) {
diff := len(filteredPods) - controller.Spec.Replicas
func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, rc *api.ReplicationController) {
diff := len(filteredPods) - rc.Spec.Replicas
rcKey, err := controller.KeyFunc(rc)
if err != nil {
glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
return
}
if diff < 0 {
diff *= -1
if diff > rm.burstReplicas {
diff = rm.burstReplicas
}
rm.expectations.ExpectCreations(controller, diff)
rm.expectations.ExpectCreations(rcKey, diff)
wait := sync.WaitGroup{}
wait.Add(diff)
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", rc.Namespace, rc.Name, rc.Spec.Replicas, diff)
for i := 0; i < diff; i++ {
go func() {
defer wait.Done()
if err := rm.podControl.createReplica(controller.Namespace, controller); err != nil {
if err := rm.podControl.CreateReplica(rc.Namespace, rc); err != nil {
// Decrement the expected number of creates because the informer won't observe this pod
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
rm.expectations.CreationObserved(controller)
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name)
rm.expectations.CreationObserved(rcKey)
util.HandleError(err)
}
}()
@ -341,14 +346,14 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
if diff > rm.burstReplicas {
diff = rm.burstReplicas
}
rm.expectations.ExpectDeletions(controller, diff)
glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
rm.expectations.ExpectDeletions(rcKey, diff)
glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", rc.Namespace, rc.Name, rc.Spec.Replicas, diff)
// No need to sort pods if we are about to delete all of them
if controller.Spec.Replicas != 0 {
if rc.Spec.Replicas != 0 {
// Sort the pods in the order such that not-ready < ready, unscheduled
// < scheduled, and pending < running. This ensures that we delete pods
// in the earlier stages whenever possible.
sort.Sort(activePods(filteredPods))
sort.Sort(controller.ActivePods(filteredPods))
}
wait := sync.WaitGroup{}
@ -356,10 +361,10 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
for i := 0; i < diff; i++ {
go func(ix int) {
defer wait.Done()
if err := rm.podControl.deletePod(controller.Namespace, filteredPods[ix].Name); err != nil {
if err := rm.podControl.DeletePod(rc.Namespace, filteredPods[ix].Name); err != nil {
// Decrement the expected number of deletes because the informer won't observe this deletion
glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
rm.expectations.DeletionObserved(controller)
glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", rc.Namespace, rc.Name)
rm.expectations.DeletionObserved(rcKey)
}
}(i)
}
@ -387,20 +392,25 @@ func (rm *ReplicationManager) syncReplicationController(key string) error {
rm.queue.Add(key)
return err
}
controller := *obj.(*api.ReplicationController)
rc := *obj.(*api.ReplicationController)
if !rm.podStoreSynced() {
// Sleep so we give the pod reflector goroutine a chance to run.
time.Sleep(PodStoreSyncedPollPeriod)
glog.Infof("Waiting for pods controller to sync, requeuing rc %v", controller.Name)
rm.enqueueController(&controller)
glog.Infof("Waiting for pods controller to sync, requeuing rc %v", rc.Name)
rm.enqueueController(&rc)
return nil
}
// Check the expectations of the rc before counting active pods, otherwise a new pod can sneak in
// and update the expectations after we've retrieved active pods from the store. If a new pod enters
// the store after we've checked the expectation, the rc sync is just deferred till the next relist.
rcNeedsSync := rm.expectations.SatisfiedExpectations(&controller)
podList, err := rm.podStore.Pods(controller.Namespace).List(labels.Set(controller.Spec.Selector).AsSelector())
rcKey, err := controller.KeyFunc(&rc)
if err != nil {
glog.Errorf("Couldn't get key for replication controller %#v: %v", rc, err)
return err
}
rcNeedsSync := rm.expectations.SatisfiedExpectations(rcKey)
podList, err := rm.podStore.Pods(rc.Namespace).List(labels.Set(rc.Spec.Selector).AsSelector())
if err != nil {
glog.Errorf("Error getting pods for rc %q: %v", key, err)
rm.queue.Add(key)
@ -408,17 +418,17 @@ func (rm *ReplicationManager) syncReplicationController(key string) error {
}
// TODO: Do this in a single pass, or use an index.
filteredPods := filterActivePods(podList.Items)
filteredPods := controller.FilterActivePods(podList.Items)
if rcNeedsSync {
rm.manageReplicas(filteredPods, &controller)
rm.manageReplicas(filteredPods, &rc)
}
// Always updates status as pods come up or die.
if err := updateReplicaCount(rm.kubeClient.ReplicationControllers(controller.Namespace), controller, len(filteredPods)); err != nil {
if err := updateReplicaCount(rm.kubeClient.ReplicationControllers(rc.Namespace), rc, len(filteredPods)); err != nil {
// Multiple things could lead to this update failing. Requeuing the controller ensures
// we retry with some fairness.
glog.V(2).Infof("Failed to update replica count for controller %v, requeuing", controller.Name)
rm.enqueueController(&controller)
glog.V(2).Infof("Failed to update replica count for controller %v, requeuing", rc.Name)
rm.enqueueController(&rc)
}
return nil
}

View File

@ -14,15 +14,13 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
package replication
import (
"fmt"
"math/rand"
"net/http"
"net/http/httptest"
"reflect"
"sort"
"sync"
"testing"
"time"
@ -31,8 +29,8 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/testclient"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
"github.com/GoogleCloudPlatform/kubernetes/pkg/securitycontext"
@ -60,7 +58,7 @@ func init() {
api.ForTesting_ReferencesAllowBlankSelfLinks = true
}
func (f *FakePodControl) createReplica(namespace string, spec *api.ReplicationController) error {
func (f *FakePodControl) CreateReplica(namespace string, spec *api.ReplicationController) error {
f.lock.Lock()
defer f.lock.Unlock()
if f.err != nil {
@ -70,7 +68,7 @@ func (f *FakePodControl) createReplica(namespace string, spec *api.ReplicationCo
return nil
}
func (f *FakePodControl) deletePod(namespace string, podName string) error {
func (f *FakePodControl) DeletePod(namespace string, podName string) error {
f.lock.Lock()
defer f.lock.Unlock()
if f.err != nil {
@ -79,7 +77,6 @@ func (f *FakePodControl) deletePod(namespace string, podName string) error {
f.deletePodName = append(f.deletePodName, podName)
return nil
}
func (f *FakePodControl) clear() {
f.lock.Lock()
defer f.lock.Unlock()
@ -88,7 +85,7 @@ func (f *FakePodControl) clear() {
}
func getKey(rc *api.ReplicationController, t *testing.T) string {
if key, err := rcKeyFunc(rc); err != nil {
if key, err := controller.KeyFunc(rc); err != nil {
t.Errorf("Unexpected error getting key for rc %v: %v", rc.Name, err)
return ""
} else {
@ -301,45 +298,6 @@ func TestSyncReplicationControllerCreates(t *testing.T) {
validateSyncReplication(t, &fakePodControl, 2, 0)
}
func TestCreateReplica(t *testing.T) {
ns := api.NamespaceDefault
body := runtime.EncodeOrDie(testapi.Codec(), &api.Pod{ObjectMeta: api.ObjectMeta{Name: "empty_pod"}})
fakeHandler := util.FakeHandler{
StatusCode: 200,
ResponseBody: string(body),
}
testServer := httptest.NewServer(&fakeHandler)
defer testServer.Close()
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
podControl := RealPodControl{
kubeClient: client,
recorder: &record.FakeRecorder{},
}
controllerSpec := newReplicationController(1)
// Make sure createReplica sends a POST to the apiserver with a pod from the controllers pod template
podControl.createReplica(ns, controllerSpec)
expectedPod := api.Pod{
ObjectMeta: api.ObjectMeta{
Labels: controllerSpec.Spec.Template.Labels,
GenerateName: fmt.Sprintf("%s-", controllerSpec.Name),
},
Spec: controllerSpec.Spec.Template.Spec,
}
fakeHandler.ValidateRequest(t, testapi.ResourcePath("pods", api.NamespaceDefault, ""), "POST", nil)
actualPod, err := client.Codec.Decode([]byte(fakeHandler.RequestBody))
if err != nil {
t.Errorf("Unexpected error: %#v", err)
}
if !api.Semantic.DeepDerivative(&expectedPod, actualPod) {
t.Logf("Body: %s", fakeHandler.RequestBody)
t.Errorf("Unexpected mismatch. Expected\n %#v,\n Got:\n %#v", &expectedPod, actualPod)
}
}
func TestStatusUpdatesWithoutReplicasChange(t *testing.T) {
// Setup a fake server to listen for requests, and run the rc manager in steady state
fakeHandler := util.FakeHandler{
@ -420,156 +378,6 @@ func TestControllerUpdateReplicas(t *testing.T) {
validateSyncReplication(t, &fakePodControl, 1, 0)
}
func TestActivePodFiltering(t *testing.T) {
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController(0)
podList := newPodList(nil, 5, api.PodRunning, rc)
podList.Items[0].Status.Phase = api.PodSucceeded
podList.Items[1].Status.Phase = api.PodFailed
expectedNames := util.NewStringSet()
for _, pod := range podList.Items[2:] {
expectedNames.Insert(pod.Name)
}
got := filterActivePods(podList.Items)
gotNames := util.NewStringSet()
for _, pod := range got {
gotNames.Insert(pod.Name)
}
if expectedNames.Difference(gotNames).Len() != 0 || gotNames.Difference(expectedNames).Len() != 0 {
t.Errorf("expected %v, got %v", expectedNames.List(), gotNames.List())
}
}
func TestSortingActivePods(t *testing.T) {
numPods := 5
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController(0)
podList := newPodList(nil, numPods, api.PodRunning, rc)
pods := make([]*api.Pod, len(podList.Items))
for i := range podList.Items {
pods[i] = &podList.Items[i]
}
// pods[0] is not scheduled yet.
pods[0].Spec.NodeName = ""
pods[0].Status.Phase = api.PodPending
// pods[1] is scheduled but pending.
pods[1].Spec.NodeName = "bar"
pods[1].Status.Phase = api.PodPending
// pods[2] is unknown.
pods[2].Spec.NodeName = "foo"
pods[2].Status.Phase = api.PodUnknown
// pods[3] is running but not ready.
pods[3].Spec.NodeName = "foo"
pods[3].Status.Phase = api.PodRunning
// pods[4] is running and ready.
pods[4].Spec.NodeName = "foo"
pods[4].Status.Phase = api.PodRunning
pods[4].Status.Conditions = []api.PodCondition{{Type: api.PodReady, Status: api.ConditionTrue}}
getOrder := func(pods []*api.Pod) []string {
names := make([]string, len(pods))
for i := range pods {
names[i] = pods[i].Name
}
return names
}
expected := getOrder(pods)
for i := 0; i < 20; i++ {
idx := rand.Perm(numPods)
randomizedPods := make([]*api.Pod, numPods)
for j := 0; j < numPods; j++ {
randomizedPods[j] = pods[idx[j]]
}
sort.Sort(activePods(randomizedPods))
actual := getOrder(randomizedPods)
if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %v, got %v", expected, actual)
}
}
}
// NewFakeRCExpectationsLookup creates a fake store for PodExpectations.
func NewFakeRCExpectationsLookup(ttl time.Duration) (*RCExpectations, *util.FakeClock) {
fakeTime := time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC)
fakeClock := &util.FakeClock{fakeTime}
ttlPolicy := &cache.TTLPolicy{ttl, fakeClock}
ttlStore := cache.NewFakeExpirationStore(
expKeyFunc, nil, ttlPolicy, fakeClock)
return &RCExpectations{ttlStore}, fakeClock
}
func TestRCExpectations(t *testing.T) {
ttl := 30 * time.Second
e, fakeClock := NewFakeRCExpectationsLookup(ttl)
// In practice we can't really have add and delete expectations since we only either create or
// delete replicas in one rc pass, and the rc goes to sleep soon after until the expectations are
// either fulfilled or timeout.
adds, dels := 10, 30
rc := newReplicationController(1)
// RC fires off adds and deletes at apiserver, then sets expectations
e.setExpectations(rc, adds, dels)
var wg sync.WaitGroup
for i := 0; i < adds+1; i++ {
wg.Add(1)
go func() {
// In prod this can happen either because of a failed create by the rc
// or after having observed a create via informer
e.CreationObserved(rc)
wg.Done()
}()
}
wg.Wait()
// There are still delete expectations
if e.SatisfiedExpectations(rc) {
t.Errorf("Rc will sync before expectations are met")
}
for i := 0; i < dels+1; i++ {
wg.Add(1)
go func() {
e.DeletionObserved(rc)
wg.Done()
}()
}
wg.Wait()
// Expectations have been surpassed
if podExp, exists, err := e.GetExpectations(rc); err == nil && exists {
add, del := podExp.getExpectations()
if add != -1 || del != -1 {
t.Errorf("Unexpected pod expectations %#v", podExp)
}
} else {
t.Errorf("Could not get expectations for rc, exists %v and err %v", exists, err)
}
if !e.SatisfiedExpectations(rc) {
t.Errorf("Expectations are met but the rc will not sync")
}
// Next round of rc sync, old expectations are cleared
e.setExpectations(rc, 1, 2)
if podExp, exists, err := e.GetExpectations(rc); err == nil && exists {
add, del := podExp.getExpectations()
if add != 1 || del != 2 {
t.Errorf("Unexpected pod expectations %#v", podExp)
}
} else {
t.Errorf("Could not get expectations for rc, exists %v and err %v", exists, err)
}
// Expectations have expired because of ttl
fakeClock.Time = fakeClock.Time.Add(ttl + 1)
if !e.SatisfiedExpectations(rc) {
t.Errorf("Expectations should have expired but didn't")
}
}
func TestSyncReplicationControllerDormancy(t *testing.T) {
// Setup a test server so we can lie about the current state of pods
fakeHandler := util.FakeHandler{
@ -600,9 +408,15 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
manager.syncReplicationController(getKey(controllerSpec, t))
validateSyncReplication(t, &fakePodControl, 0, 0)
// Get the key for the controller
rcKey, err := controller.KeyFunc(controllerSpec)
if err != nil {
t.Errorf("Couldn't get key for object %+v: %v", controllerSpec, err)
}
// Lowering expectations should lead to a sync that creates a replica, however the
// fakePodControl error will prevent this, leaving expectations at 0, 0
manager.expectations.CreationObserved(controllerSpec)
manager.expectations.CreationObserved(rcKey)
controllerSpec.Status.Replicas = 1
fakePodControl.clear()
fakePodControl.err = fmt.Errorf("Fake Error")
@ -920,6 +734,11 @@ func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int)
expectedPods := 0
pods := newPodList(nil, numReplicas, api.PodPending, controllerSpec)
rcKey, err := controller.KeyFunc(controllerSpec)
if err != nil {
t.Errorf("Couldn't get key for object %+v: %v", controllerSpec, err)
}
// Size up the controller, then size it down, and confirm the expected create/delete pattern
for _, replicas := range []int{numReplicas, 0} {
@ -949,11 +768,11 @@ func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int)
manager.addPod(&pods.Items[i])
}
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
podExp, exists, err := manager.expectations.GetExpectations(rcKey)
if !exists || err != nil {
t.Fatalf("Did not find expectations for rc.")
}
if add, _ := podExp.getExpectations(); add != 1 {
if add, _ := podExp.GetExpectations(); add != 1 {
t.Fatalf("Expectations are wrong %v", podExp)
}
} else {
@ -966,11 +785,11 @@ func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int)
manager.podStore.Store.Delete(&pods.Items[i])
manager.deletePod(&pods.Items[i])
}
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
podExp, exists, err := manager.expectations.GetExpectations(rcKey)
if !exists || err != nil {
t.Fatalf("Did not find expectations for rc.")
}
if _, del := podExp.getExpectations(); del != 1 {
if _, del := podExp.GetExpectations(); del != 1 {
t.Fatalf("Expectations are wrong %v", podExp)
}
}
@ -1010,12 +829,12 @@ func TestControllerBurstReplicas(t *testing.T) {
}
type FakeRCExpectations struct {
*RCExpectations
*controller.ControllerExpectations
satisfied bool
expSatisfied func()
}
func (fe FakeRCExpectations) SatisfiedExpectations(rc *api.ReplicationController) bool {
func (fe FakeRCExpectations) SatisfiedExpectations(controllerKey string) bool {
fe.expSatisfied()
return fe.satisfied
}
@ -1036,7 +855,7 @@ func TestRCSyncExpectations(t *testing.T) {
postExpectationsPod := pods.Items[1]
manager.expectations = FakeRCExpectations{
NewRCExpectations(), true, func() {
controller.NewControllerExpectations(), true, func() {
// If we check active pods before checking expectataions, the rc
// will create a new replica because it doesn't see this pod, but
// has fulfilled its expectations.
@ -1063,16 +882,22 @@ func TestDeleteControllerAndExpectations(t *testing.T) {
validateSyncReplication(t, &fakePodControl, 1, 0)
fakePodControl.clear()
// Get the RC key
rcKey, err := controller.KeyFunc(rc)
if err != nil {
t.Errorf("Couldn't get key for object %+v: %v", rc, err)
}
// This is to simulate a concurrent addPod, that has a handle on the expectations
// as the controller deletes it.
podExp, exists, err := manager.expectations.GetExpectations(rc)
podExp, exists, err := manager.expectations.GetExpectations(rcKey)
if !exists || err != nil {
t.Errorf("No expectations found for rc")
}
manager.rcStore.Delete(rc)
manager.syncReplicationController(getKey(rc, t))
if _, exists, err = manager.expectations.GetExpectations(rc); exists {
if _, exists, err = manager.expectations.GetExpectations(rcKey); exists {
t.Errorf("Found expectaions, expected none since the rc has been deleted.")
}

View File

@ -0,0 +1,72 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package replication
import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/golang/glog"
)
// updateReplicaCount attempts to update the Status.Replicas of the given controller, with a single GET/PUT retry.
func updateReplicaCount(rcClient client.ReplicationControllerInterface, controller api.ReplicationController, numReplicas int) (updateErr error) {
// This is the steady state. It happens when the rc doesn't have any expectations, since
// we do a periodic relist every 30s. If the generations differ but the replicas are
// the same, a caller might've resized to the same replica count.
if controller.Status.Replicas == numReplicas &&
controller.Generation == controller.Status.ObservedGeneration {
return nil
}
// Save the generation number we acted on, otherwise we might wrongfully indicate
// that we've seen a spec update when we retry.
// TODO: This can clobber an update if we allow multiple agents to write to the
// same status.
generation := controller.Generation
var getErr error
for i, rc := 0, &controller; ; i++ {
glog.V(4).Infof("Updating replica count for rc: %v, %d->%d (need %d), sequence No: %v->%v",
controller.Name, controller.Status.Replicas, numReplicas, controller.Spec.Replicas, controller.Status.ObservedGeneration, generation)
rc.Status = api.ReplicationControllerStatus{Replicas: numReplicas, ObservedGeneration: generation}
_, updateErr = rcClient.Update(rc)
if updateErr == nil || i >= statusUpdateRetries {
return updateErr
}
// Update the controller with the latest resource version for the next poll
if rc, getErr = rcClient.Get(controller.Name); getErr != nil {
// If the GET fails we can't trust status.Replicas anymore. This error
// is bound to be more interesting than the update failure.
return getErr
}
}
// Failed 2 updates one of which was with the latest controller, return the update error
return
}
// OverlappingControllers sorts a list of controllers by creation timestamp, using their names as a tie breaker.
type overlappingControllers []api.ReplicationController
func (o overlappingControllers) Len() int { return len(o) }
func (o overlappingControllers) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
func (o overlappingControllers) Less(i, j int) bool {
if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
return o[i].Name < o[j].Name
}
return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
}

View File

@ -30,7 +30,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/pkg/apiserver"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/record"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller"
"github.com/GoogleCloudPlatform/kubernetes/pkg/controller/replication"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/kubectl"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
@ -66,7 +66,7 @@ type MasterComponents struct {
// Restclient used to talk to the kubernetes master
RestClient *client.Client
// Replication controller manager
ControllerManager *controller.ReplicationManager
ControllerManager *replication.ReplicationManager
// Channel for stop signals to rc manager
rcStopCh chan struct{}
// Used to stop master components individually, and via MasterComponents.Stop
@ -99,7 +99,7 @@ func NewMasterComponents(c *Config) *MasterComponents {
}
restClient := client.NewOrDie(&client.Config{Host: s.URL, Version: testapi.Version(), QPS: c.QPS, Burst: c.Burst})
rcStopCh := make(chan struct{})
controllerManager := controller.NewReplicationManager(restClient, c.Burst)
controllerManager := replication.NewReplicationManager(restClient, c.Burst)
// TODO: Support events once we can cleanly shutdown an event recorder.
controllerManager.SetEventRecorder(&record.FakeRecorder{})