Adds support for PodCIDR allocation from the GCE cloud provider

If CIDRAllocatorType is set to `CloudCIDRAllocator`, then allocation of CIDR allocation instead is done by the external cloud provider and the node controller is only responsible for reflecting the allocation into the node spec. - Splits off the rangeAllocator from the cidr_allocator.go file. - Adds cloudCIDRAllocator, which is used when the cloud provider allocates the CIDR ranges externally. (GCE support only) - Updates RBAC permission for node controller to include PATCH
2017-02-27 00:33:55 -08:00 · 2017-02-27 00:33:55 -08:00 · f61590c221
parent 345c65847f
commit f61590c221
12 changed files with 614 additions and 439 deletions
--- a/cmd/kube-controller-manager/app/controllermanager.go
+++ b/cmd/kube-controller-manager/app/controllermanager.go
@ -477,6 +477,7 @@ func StartControllers(controllers map[string]InitFunc, s *options.CMServer, root
 			serviceCIDR,
 			int(s.NodeCIDRMaskSize),
 			s.AllocateNodeCIDRs,
+			nodecontroller.CIDRAllocatorType(s.CIDRAllocatorType),
 			s.EnableTaintManager,
 			utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions),
 		)
--- a/cmd/kube-controller-manager/app/options/options.go
+++ b/cmd/kube-controller-manager/app/options/options.go
@ -188,7 +188,10 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet, allControllers []string, disabled
 	fs.StringVar(&s.ClusterCIDR, "cluster-cidr", s.ClusterCIDR, "CIDR Range for Pods in cluster.")
 	fs.StringVar(&s.ServiceCIDR, "service-cluster-ip-range", s.ServiceCIDR, "CIDR Range for Services in cluster.")
 	fs.Int32Var(&s.NodeCIDRMaskSize, "node-cidr-mask-size", s.NodeCIDRMaskSize, "Mask size for node cidr in cluster.")
-	fs.BoolVar(&s.AllocateNodeCIDRs, "allocate-node-cidrs", false, "Should CIDRs for Pods be allocated and set on the cloud provider.")
+	fs.BoolVar(&s.AllocateNodeCIDRs, "allocate-node-cidrs", false,
+		"Should CIDRs for Pods be allocated and set on the cloud provider.")
+	fs.StringVar(&s.CIDRAllocatorType, "cidr-allocator-type", "RangeAllocator",
+		"Type of CIDR allocator to use")
 	fs.BoolVar(&s.ConfigureCloudRoutes, "configure-cloud-routes", true, "Should CIDRs allocated by allocate-node-cidrs be configured on the cloud provider.")
 	fs.StringVar(&s.Master, "master", s.Master, "The address of the Kubernetes API server (overrides any value in kubeconfig)")
 	fs.StringVar(&s.Kubeconfig, "kubeconfig", s.Kubeconfig, "Path to kubeconfig file with authorization and master location information.")
--- a/pkg/apis/componentconfig/types.go
+++ b/pkg/apis/componentconfig/types.go
@ -794,9 +794,11 @@ type KubeControllerManagerConfiguration struct {
 	ServiceCIDR string
 	// NodeCIDRMaskSize is the mask size for node cidr in cluster.
 	NodeCIDRMaskSize int32
-	// allocateNodeCIDRs enables CIDRs for Pods to be allocated and, if
+	// AllocateNodeCIDRs enables CIDRs for Pods to be allocated and, if
 	// ConfigureCloudRoutes is true, to be set on the cloud provider.
 	AllocateNodeCIDRs bool
+	// CIDRAllocatorType determines what kind of pod CIDR allocator will be used.
+	CIDRAllocatorType string
 	// configureCloudRoutes enables CIDRs allocated with allocateNodeCIDRs
 	// to be configured on the cloud provider.
 	ConfigureCloudRoutes bool
--- a/pkg/cloudprovider/providers/gce/gce_instances.go
+++ b/pkg/cloudprovider/providers/gce/gce_instances.go
@ -217,10 +217,10 @@ func (gce *GCECloud) CurrentNodeName(hostname string) (types.NodeName, error) {
 	return types.NodeName(hostname), nil
 }

-// PodCIDRs returns a list of CIDR ranges that are assigned to the
+// AliasRanges returns a list of CIDR ranges that are assigned to the
 // `node` for allocation to pods. Returns a list of the form
 // "<ip>/<netmask>".
-func (gce *GCECloud) PodCIDRs(nodeName types.NodeName) (cidrs []string, err error) {
+func (gce *GCECloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) {
 	var instance *gceInstance
 	instance, err = gce.getInstanceByName(mapNodeNameToInstanceName(nodeName))
 	if err != nil {
--- a/pkg/controller/node/BUILD
+++ b/pkg/controller/node/BUILD
@ -8,56 +8,6 @@ load(
    "go_test",
 )

-go_library(
-    name = "go_default_library",
-    srcs = [
-        "cidr_allocator.go",
-        "cidr_set.go",
-        "controller_utils.go",
-        "doc.go",
-        "metrics.go",
-        "nodecontroller.go",
-        "rate_limited_queue.go",
-        "taint_controller.go",
-        "timed_workers.go",
-    ],
-    tags = ["automanaged"],
-    deps = [
-        "//pkg/api:go_default_library",
-        "//pkg/api/v1:go_default_library",
-        "//pkg/client/clientset_generated/clientset:go_default_library",
-        "//pkg/client/informers/informers_generated/externalversions/core/v1:go_default_library",
-        "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library",
-        "//pkg/client/listers/core/v1:go_default_library",
-        "//pkg/client/listers/extensions/v1beta1:go_default_library",
-        "//pkg/cloudprovider:go_default_library",
-        "//pkg/controller:go_default_library",
-        "//pkg/kubelet/util/format:go_default_library",
-        "//pkg/util/metrics:go_default_library",
-        "//pkg/util/node:go_default_library",
-        "//pkg/util/system:go_default_library",
-        "//pkg/util/version:go_default_library",
-        "//vendor:github.com/golang/glog",
-        "//vendor:github.com/prometheus/client_golang/prometheus",
-        "//vendor:k8s.io/apimachinery/pkg/api/equality",
-        "//vendor:k8s.io/apimachinery/pkg/api/errors",
-        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
-        "//vendor:k8s.io/apimachinery/pkg/fields",
-        "//vendor:k8s.io/apimachinery/pkg/labels",
-        "//vendor:k8s.io/apimachinery/pkg/types",
-        "//vendor:k8s.io/apimachinery/pkg/util/errors",
-        "//vendor:k8s.io/apimachinery/pkg/util/runtime",
-        "//vendor:k8s.io/apimachinery/pkg/util/sets",
-        "//vendor:k8s.io/apimachinery/pkg/util/wait",
-        "//vendor:k8s.io/client-go/kubernetes/typed/core/v1",
-        "//vendor:k8s.io/client-go/pkg/api/v1",
-        "//vendor:k8s.io/client-go/tools/cache",
-        "//vendor:k8s.io/client-go/tools/record",
-        "//vendor:k8s.io/client-go/util/flowcontrol",
-        "//vendor:k8s.io/client-go/util/workqueue",
-    ],
-)
-
 go_test(
    name = "go_default_test",
    srcs = [
@ -96,6 +46,59 @@ go_test(
    ],
 )

+go_library(
+    name = "go_default_library",
+    srcs = [
+        "cidr_allocator.go",
+        "cidr_set.go",
+        "cloud_cidr_allocator.go",
+        "controller_utils.go",
+        "doc.go",
+        "metrics.go",
+        "nodecontroller.go",
+        "range_allocator.go",
+        "rate_limited_queue.go",
+        "taint_controller.go",
+        "timed_workers.go",
+    ],
+    tags = ["automanaged"],
+    deps = [
+        "//pkg/api:go_default_library",
+        "//pkg/api/v1:go_default_library",
+        "//pkg/client/clientset_generated/clientset:go_default_library",
+        "//pkg/client/informers/informers_generated/externalversions/core/v1:go_default_library",
+        "//pkg/client/informers/informers_generated/externalversions/extensions/v1beta1:go_default_library",
+        "//pkg/client/listers/core/v1:go_default_library",
+        "//pkg/client/listers/extensions/v1beta1:go_default_library",
+        "//pkg/cloudprovider:go_default_library",
+        "//pkg/cloudprovider/providers/gce:go_default_library",
+        "//pkg/controller:go_default_library",
+        "//pkg/kubelet/util/format:go_default_library",
+        "//pkg/util/metrics:go_default_library",
+        "//pkg/util/node:go_default_library",
+        "//pkg/util/system:go_default_library",
+        "//pkg/util/version:go_default_library",
+        "//vendor:github.com/golang/glog",
+        "//vendor:github.com/prometheus/client_golang/prometheus",
+        "//vendor:k8s.io/apimachinery/pkg/api/equality",
+        "//vendor:k8s.io/apimachinery/pkg/api/errors",
+        "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1",
+        "//vendor:k8s.io/apimachinery/pkg/fields",
+        "//vendor:k8s.io/apimachinery/pkg/labels",
+        "//vendor:k8s.io/apimachinery/pkg/types",
+        "//vendor:k8s.io/apimachinery/pkg/util/errors",
+        "//vendor:k8s.io/apimachinery/pkg/util/runtime",
+        "//vendor:k8s.io/apimachinery/pkg/util/sets",
+        "//vendor:k8s.io/apimachinery/pkg/util/wait",
+        "//vendor:k8s.io/client-go/kubernetes/typed/core/v1",
+        "//vendor:k8s.io/client-go/pkg/api/v1",
+        "//vendor:k8s.io/client-go/tools/cache",
+        "//vendor:k8s.io/client-go/tools/record",
+        "//vendor:k8s.io/client-go/util/flowcontrol",
+        "//vendor:k8s.io/client-go/util/workqueue",
+    ],
+)
+
 filegroup(
    name = "package-srcs",
    srcs = glob(["**"]),
--- a/pkg/controller/node/cidr_allocator.go
+++ b/pkg/controller/node/cidr_allocator.go
@ -18,259 +18,34 @@ package node

 import (
 	"errors"
-	"fmt"
 	"net"
-	"sync"

-	apierrors "k8s.io/apimachinery/pkg/api/errors"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-	"k8s.io/apimachinery/pkg/util/sets"
-	"k8s.io/apimachinery/pkg/util/wait"
-	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
-	clientv1 "k8s.io/client-go/pkg/api/v1"
-	"k8s.io/client-go/tools/record"
-	"k8s.io/kubernetes/pkg/api"
-	"k8s.io/kubernetes/pkg/api/v1"
-	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
-
-	"github.com/golang/glog"
+	v1 "k8s.io/kubernetes/pkg/api/v1"
 )

-// TODO: figure out the good setting for those constants.
-const (
-	// controls how many NodeSpec updates NC can process concurrently.
-	cidrUpdateWorkers   = 10
-	cidrUpdateQueueSize = 5000
-	// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update.
-	podCIDRUpdateRetry = 5
-)
-
-var errCIDRRangeNoCIDRsRemaining = errors.New("CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range")
+var errCIDRRangeNoCIDRsRemaining = errors.New(
+	"CIDR allocation failed; there are no remaining CIDRs left to allocate in the accepted range")

 type nodeAndCIDR struct {
 	cidr     *net.IPNet
 	nodeName string
 }

-// CIDRAllocator is an interface implemented by things that know how to allocate/occupy/recycle CIDR for nodes.
+// CIDRAllocatorType is the type of the allocator to use.
+type CIDRAllocatorType string
+
+const (
+	RangeAllocatorType CIDRAllocatorType = "RangeAllocator"
+	CloudAllocatorType CIDRAllocatorType = "CloudAllocator"
+)
+
+// CIDRAllocator is an interface implemented by things that know how to
+// allocate/occupy/recycle CIDR for nodes.
 type CIDRAllocator interface {
+	// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
+	// CIDR if it doesn't currently have one or mark the CIDR as used if
+	// the node already have one.
 	AllocateOrOccupyCIDR(node *v1.Node) error
+	// ReleaseCIDR releases the CIDR of the removed node
 	ReleaseCIDR(node *v1.Node) error
 }
-
-type rangeAllocator struct {
-	client      clientset.Interface
-	cidrs       *cidrSet
-	clusterCIDR *net.IPNet
-	maxCIDRs    int
-	// Channel that is used to pass updating Nodes with assigned CIDRs to the background
-	// This increases a throughput of CIDR assignment by not blocking on long operations.
-	nodeCIDRUpdateChannel chan nodeAndCIDR
-	recorder              record.EventRecorder
-	// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation
-	sync.Mutex
-	nodesInProcessing sets.String
-}
-
-// NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node
-// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size.
-// Caller must always pass in a list of existing nodes so the new allocator
-// can initialize its CIDR map. NodeList is only nil in testing.
-func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
-	eventBroadcaster := record.NewBroadcaster()
-	recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "cidrAllocator"})
-	eventBroadcaster.StartLogging(glog.Infof)
-	if client != nil {
-		glog.V(0).Infof("Sending events to api server.")
-		eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.Core().RESTClient()).Events("")})
-	} else {
-		glog.Fatalf("kubeClient is nil when starting NodeController")
-	}
-
-	ra := &rangeAllocator{
-		client:                client,
-		cidrs:                 newCIDRSet(clusterCIDR, subNetMaskSize),
-		clusterCIDR:           clusterCIDR,
-		nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize),
-		recorder:              recorder,
-		nodesInProcessing:     sets.NewString(),
-	}
-
-	if serviceCIDR != nil {
-		ra.filterOutServiceRange(serviceCIDR)
-	} else {
-		glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.")
-	}
-
-	if nodeList != nil {
-		for _, node := range nodeList.Items {
-			if node.Spec.PodCIDR == "" {
-				glog.Infof("Node %v has no CIDR, ignoring", node.Name)
-				continue
-			} else {
-				glog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR)
-			}
-			if err := ra.occupyCIDR(&node); err != nil {
-				// This will happen if:
-				// 1. We find garbage in the podCIDR field. Retrying is useless.
-				// 2. CIDR out of range: This means a node CIDR has changed.
-				// This error will keep crashing controller-manager.
-				return nil, err
-			}
-		}
-	}
-	for i := 0; i < cidrUpdateWorkers; i++ {
-		go func(stopChan <-chan struct{}) {
-			for {
-				select {
-				case workItem, ok := <-ra.nodeCIDRUpdateChannel:
-					if !ok {
-						glog.Warning("NodeCIDRUpdateChannel read returned false.")
-						return
-					}
-					ra.updateCIDRAllocation(workItem)
-				case <-stopChan:
-					return
-				}
-			}
-		}(wait.NeverStop)
-	}
-
-	return ra, nil
-}
-
-func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool {
-	r.Lock()
-	defer r.Unlock()
-	if r.nodesInProcessing.Has(nodeName) {
-		return false
-	}
-	r.nodesInProcessing.Insert(nodeName)
-	return true
-}
-
-func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) {
-	r.Lock()
-	defer r.Unlock()
-	r.nodesInProcessing.Delete(nodeName)
-}
-
-func (r *rangeAllocator) occupyCIDR(node *v1.Node) error {
-	defer r.removeNodeFromProcessing(node.Name)
-	if node.Spec.PodCIDR == "" {
-		return nil
-	}
-	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
-	if err != nil {
-		return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR)
-	}
-	if err := r.cidrs.occupy(podCIDR); err != nil {
-		return fmt.Errorf("failed to mark cidr as occupied: %v", err)
-	}
-	return nil
-}
-
-// AllocateOrOccupyCIDR looks at the given node, assigns it a valid CIDR
-// if it doesn't currently have one or mark the CIDR as used if the node already have one.
-// WARNING: If you're adding any return calls or defer any more work from this function
-// you have to handle correctly nodesInProcessing.
-func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
-	if node == nil {
-		return nil
-	}
-	if !r.insertNodeToProcessing(node.Name) {
-		glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
-		return nil
-	}
-	if node.Spec.PodCIDR != "" {
-		return r.occupyCIDR(node)
-	}
-	podCIDR, err := r.cidrs.allocateNext()
-	if err != nil {
-		r.removeNodeFromProcessing(node.Name)
-		recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
-		return fmt.Errorf("failed to allocate cidr: %v", err)
-	}
-
-	glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR)
-	r.nodeCIDRUpdateChannel <- nodeAndCIDR{
-		nodeName: node.Name,
-		cidr:     podCIDR,
-	}
-	return nil
-}
-
-// ReleaseCIDR releases the CIDR of the removed node
-func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
-	if node == nil || node.Spec.PodCIDR == "" {
-		return nil
-	}
-	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
-	if err != nil {
-		return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err)
-	}
-
-	glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR)
-	if err = r.cidrs.release(podCIDR); err != nil {
-		return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err)
-	}
-	return err
-}
-
-// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used,
-// so that they won't be assignable.
-func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
-	// Checks if service CIDR has a nonempty intersection with cluster CIDR. It is the case if either
-	// clusterCIDR contains serviceCIDR with clusterCIDR's Mask applied (this means that clusterCIDR contains serviceCIDR)
-	// or vice versa (which means that serviceCIDR contains clusterCIDR).
-	if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) {
-		return
-	}
-
-	if err := r.cidrs.occupy(serviceCIDR); err != nil {
-		glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err)
-	}
-}
-
-// Assigns CIDR to Node and sends an update to the API server.
-func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
-	var err error
-	var node *v1.Node
-	defer r.removeNodeFromProcessing(data.nodeName)
-	for rep := 0; rep < podCIDRUpdateRetry; rep++ {
-		// TODO: change it to using PATCH instead of full Node updates.
-		node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{})
-		if err != nil {
-			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
-			continue
-		}
-		if node.Spec.PodCIDR != "" {
-			glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR)
-			if node.Spec.PodCIDR != data.cidr.String() {
-				if err := r.cidrs.release(data.cidr); err != nil {
-					glog.Errorf("Error when releasing CIDR %v", data.cidr.String())
-				}
-			}
-			return nil
-		}
-		node.Spec.PodCIDR = data.cidr.String()
-		if _, err := r.client.Core().Nodes().Update(node); err != nil {
-			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err)
-		} else {
-			break
-		}
-	}
-	if err != nil {
-		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
-		// We accept the fact that we may leek CIDRs here. This is safer than releasing
-		// them in case when we don't know if request went through.
-		// NodeController restart will return all falsely allocated CIDRs to the pool.
-		if !apierrors.IsServerTimeout(err) {
-			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err)
-			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil {
-				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr)
-			}
-		}
-	}
-	return err
-}
--- a/pkg/controller/node/cloud_cidr_allocator.go
+++ b/pkg/controller/node/cloud_cidr_allocator.go
@ -0,0 +1,143 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package node
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/golang/glog"
+
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+
+	clientv1 "k8s.io/client-go/pkg/api/v1"
+	"k8s.io/client-go/tools/record"
+
+	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/v1"
+	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
+	"k8s.io/kubernetes/pkg/cloudprovider"
+	"k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
+	nodeutil "k8s.io/kubernetes/pkg/util/node"
+)
+
+// cloudCIDRAllocator allocates node CIDRs according to IP address aliases
+// assigned by the cloud provider. In this case, the allocation and
+// deallocation is delegated to the external provider, and the controller
+// merely takes the assignment and updates the node spec.
+type cloudCIDRAllocator struct {
+	lock sync.Mutex
+
+	client clientset.Interface
+	cloud  *gce.GCECloud
+
+	recorder record.EventRecorder
+}
+
+var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
+
+func NewCloudCIDRAllocator(
+	client clientset.Interface,
+	cloud cloudprovider.Interface) (ca CIDRAllocator, err error) {
+
+	gceCloud, ok := cloud.(*gce.GCECloud)
+	if !ok {
+		err = fmt.Errorf("cloudCIDRAllocator does not support %v provider", cloud.ProviderName())
+		return
+	}
+
+	ca = &cloudCIDRAllocator{
+		client: client,
+		cloud:  gceCloud,
+		recorder: record.NewBroadcaster().NewRecorder(
+			api.Scheme,
+			clientv1.EventSource{Component: "cidrAllocator"}),
+	}
+
+	glog.V(0).Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
+
+	return
+}
+
+func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
+	glog.V(2).Infof("Updating PodCIDR for node %v", node.Name)
+
+	cidrs, err := ca.cloud.AliasRanges(types.NodeName(node.Name))
+
+	if err != nil {
+		recordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
+		return fmt.Errorf("failed to allocate cidr: %v", err)
+	}
+
+	if len(cidrs) == 0 {
+		recordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
+		glog.V(2).Infof("Node %v has no CIDRs", node.Name)
+		return fmt.Errorf("failed to allocate cidr (none exist)")
+	}
+
+	node, err = ca.client.Core().Nodes().Get(node.Name, metav1.GetOptions{})
+	if err != nil {
+		glog.Errorf("Could not get Node object from Kubernetes: %v", err)
+		return err
+	}
+
+	podCIDR := cidrs[0]
+
+	if node.Spec.PodCIDR != "" {
+		if node.Spec.PodCIDR == podCIDR {
+			glog.V(3).Infof("Node %v has PodCIDR %v", node.Name, podCIDR)
+			return nil
+		}
+		glog.Errorf("PodCIDR cannot be reassigned, node %v spec has %v, but cloud provider has assigned %v",
+			node.Name, node.Spec.PodCIDR, podCIDR)
+		// We fall through and set the CIDR despite this error. This
+		// implements the same logic as implemented in the
+		// rangeAllocator.
+		//
+		// See https://github.com/kubernetes/kubernetes/pull/42147#discussion_r103357248
+	}
+
+	node.Spec.PodCIDR = cidrs[0]
+	if _, err := ca.client.Core().Nodes().Update(node); err == nil {
+		glog.V(2).Infof("Node %v PodCIDR set to %v", node.Name, podCIDR)
+	} else {
+		glog.Errorf("Could not update node %v PodCIDR to %v: %v",
+			node.Name, podCIDR, err)
+		return err
+	}
+
+	err = nodeutil.SetNodeCondition(ca.client, types.NodeName(node.Name), v1.NodeCondition{
+		Type:               v1.NodeNetworkUnavailable,
+		Status:             v1.ConditionFalse,
+		Reason:             "RouteCreated",
+		Message:            "NodeController create implicit route",
+		LastTransitionTime: metav1.Now(),
+	})
+	if err != nil {
+		glog.Errorf("Error setting route status for node %v: %v",
+			node.Name, err)
+	}
+
+	return err
+}
+
+func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
+	glog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
+		node.Name, node.Spec.PodCIDR)
+	return nil
+}
--- a/pkg/controller/node/nodecontroller.go
+++ b/pkg/controller/node/nodecontroller.go
@ -109,11 +109,13 @@ type nodeStatusData struct {

 type NodeController struct {
 	allocateNodeCIDRs bool
-	cloud             cloudprovider.Interface
-	clusterCIDR       *net.IPNet
-	serviceCIDR       *net.IPNet
-	knownNodeSet      map[string]*v1.Node
-	kubeClient        clientset.Interface
+	allocatorType     CIDRAllocatorType
+
+	cloud        cloudprovider.Interface
+	clusterCIDR  *net.IPNet
+	serviceCIDR  *net.IPNet
+	knownNodeSet map[string]*v1.Node
+	kubeClient   clientset.Interface
 	// Method for easy mocking in unittest.
 	lookupIP func(host string) ([]net.IP, error)
 	// Value used if sync_nodes_status=False. NodeController will not proactively
@ -162,9 +164,8 @@ type NodeController struct {

 	podInformerSynced cache.InformerSynced

-	// allocate/recycle CIDRs for node if allocateNodeCIDRs == true
 	cidrAllocator CIDRAllocator
-	// manages taints
+
 	taintManager *NoExecuteTaintManager

 	forcefullyDeletePod        func(*v1.Pod) error
@ -210,6 +211,7 @@ func NewNodeController(
 	serviceCIDR *net.IPNet,
 	nodeCIDRMaskSize int,
 	allocateNodeCIDRs bool,
+	allocatorType CIDRAllocatorType,
 	runTaintManager bool,
 	useTaintBasedEvictions bool) (*NodeController, error) {
 	eventBroadcaster := record.NewBroadcaster()
@ -254,6 +256,7 @@ func NewNodeController(
 		clusterCIDR:                     clusterCIDR,
 		serviceCIDR:                     serviceCIDR,
 		allocateNodeCIDRs:               allocateNodeCIDRs,
+		allocatorType:                   allocatorType,
 		forcefullyDeletePod:             func(p *v1.Pod) error { return forcefullyDeletePod(kubeClient, p) },
 		nodeExistsInCloudProvider:       func(nodeName types.NodeName) (bool, error) { return nodeExistsInCloudProvider(cloud, nodeName) },
 		evictionLimiterQPS:              evictionLimiterQPS,
@ -309,7 +312,6 @@ func NewNodeController(
 	})
 	nc.podInformerSynced = podInformer.Informer().HasSynced

-	nodeEventHandlerFuncs := cache.ResourceEventHandlerFuncs{}
 	if nc.allocateNodeCIDRs {
 		var nodeList *v1.NodeList
 		var err error
@ -328,147 +330,32 @@ func NewNodeController(
 		}); pollErr != nil {
 			return nil, fmt.Errorf("Failed to list all nodes in %v, cannot proceed without updating CIDR map", apiserverStartupGracePeriod)
 		}
-		nc.cidrAllocator, err = NewCIDRRangeAllocator(kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList)
+
+		switch nc.allocatorType {
+		case RangeAllocatorType:
+			nc.cidrAllocator, err = NewCIDRRangeAllocator(
+				kubeClient, clusterCIDR, serviceCIDR, nodeCIDRMaskSize, nodeList)
+		case CloudAllocatorType:
+			nc.cidrAllocator, err = NewCloudCIDRAllocator(kubeClient, cloud)
+		default:
+			return nil, fmt.Errorf("Invalid CIDR allocator type: %v", nc.allocatorType)
+		}
+
 		if err != nil {
 			return nil, err
 		}

-		nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{
-			AddFunc: func(originalObj interface{}) {
-				obj, err := api.Scheme.DeepCopy(originalObj)
-				if err != nil {
-					utilruntime.HandleError(err)
-					return
-				}
-				node := obj.(*v1.Node)
-
-				if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil {
-					utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err))
-				}
-				if nc.taintManager != nil {
-					nc.taintManager.NodeUpdated(nil, node)
-				}
-			},
-			UpdateFunc: func(oldNode, newNode interface{}) {
-				node := newNode.(*v1.Node)
-				prevNode := oldNode.(*v1.Node)
-				// If the PodCIDR is not empty we either:
-				// - already processed a Node that already had a CIDR after NC restarted
-				//   (cidr is marked as used),
-				// - already processed a Node successfully and allocated a CIDR for it
-				//   (cidr is marked as used),
-				// - already processed a Node but we did saw a "timeout" response and
-				//   request eventually got through in this case we haven't released
-				//   the allocated CIDR (cidr is still marked as used).
-				// There's a possible error here:
-				// - NC sees a new Node and assigns a CIDR X to it,
-				// - Update Node call fails with a timeout,
-				// - Node is updated by some other component, NC sees an update and
-				//   assigns CIDR Y to the Node,
-				// - Both CIDR X and CIDR Y are marked as used in the local cache,
-				//   even though Node sees only CIDR Y
-				// The problem here is that in in-memory cache we see CIDR X as marked,
-				// which prevents it from being assigned to any new node. The cluster
-				// state is correct.
-				// Restart of NC fixes the issue.
-				if node.Spec.PodCIDR == "" {
-					nodeCopy, err := api.Scheme.Copy(node)
-					if err != nil {
-						utilruntime.HandleError(err)
-						return
-					}
-
-					if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil {
-						utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err))
-					}
-				}
-				if nc.taintManager != nil {
-					nc.taintManager.NodeUpdated(prevNode, node)
-				}
-			},
-			DeleteFunc: func(originalObj interface{}) {
-				obj, err := api.Scheme.DeepCopy(originalObj)
-				if err != nil {
-					utilruntime.HandleError(err)
-					return
-				}
-
-				node, isNode := obj.(*v1.Node)
-				// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692
-				if !isNode {
-					deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
-					if !ok {
-						glog.Errorf("Received unexpected object: %v", obj)
-						return
-					}
-					node, ok = deletedState.Obj.(*v1.Node)
-					if !ok {
-						glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
-						return
-					}
-				}
-				if nc.taintManager != nil {
-					nc.taintManager.NodeUpdated(node, nil)
-				}
-				if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil {
-					glog.Errorf("Error releasing CIDR: %v", err)
-				}
-			},
-		}
-	} else {
-		nodeEventHandlerFuncs = cache.ResourceEventHandlerFuncs{
-			AddFunc: func(originalObj interface{}) {
-				obj, err := api.Scheme.DeepCopy(originalObj)
-				if err != nil {
-					utilruntime.HandleError(err)
-					return
-				}
-				node := obj.(*v1.Node)
-				if nc.taintManager != nil {
-					nc.taintManager.NodeUpdated(nil, node)
-				}
-			},
-			UpdateFunc: func(oldNode, newNode interface{}) {
-				node := newNode.(*v1.Node)
-				prevNode := oldNode.(*v1.Node)
-				if nc.taintManager != nil {
-					nc.taintManager.NodeUpdated(prevNode, node)
-
-				}
-			},
-			DeleteFunc: func(originalObj interface{}) {
-				obj, err := api.Scheme.DeepCopy(originalObj)
-				if err != nil {
-					utilruntime.HandleError(err)
-					return
-				}
-
-				node, isNode := obj.(*v1.Node)
-				// We can get DeletedFinalStateUnknown instead of *v1.Node here and we need to handle that correctly. #34692
-				if !isNode {
-					deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
-					if !ok {
-						glog.Errorf("Received unexpected object: %v", obj)
-						return
-					}
-					node, ok = deletedState.Obj.(*v1.Node)
-					if !ok {
-						glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
-						return
-					}
-				}
-				if nc.taintManager != nil {
-					nc.taintManager.NodeUpdated(node, nil)
-				}
-			},
-		}
+		nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
+			AddFunc:    nc.onNodeAdd,
+			UpdateFunc: nc.onNodeUpdate,
+			DeleteFunc: nc.onNodeDelete,
+		})
 	}

 	if nc.runTaintManager {
 		nc.taintManager = NewNoExecuteTaintManager(kubeClient)
 	}

-	nodeInformer.Informer().AddEventHandler(nodeEventHandlerFuncs)
 	nc.nodeLister = nodeInformer.Lister()
 	nc.nodeInformerSynced = nodeInformer.Informer().HasSynced

@ -546,6 +433,90 @@ func (nc *NodeController) doTaintingPass() {
 	}
 }

+func (nc *NodeController) onNodeAdd(originalObj interface{}) {
+	obj, err := api.Scheme.DeepCopy(originalObj)
+	if err != nil {
+		utilruntime.HandleError(err)
+		return
+	}
+	node := obj.(*v1.Node)
+
+	if err := nc.cidrAllocator.AllocateOrOccupyCIDR(node); err != nil {
+		utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err))
+	}
+	if nc.taintManager != nil {
+		nc.taintManager.NodeUpdated(nil, node)
+	}
+}
+
+func (nc *NodeController) onNodeUpdate(oldNode, newNode interface{}) {
+	node := newNode.(*v1.Node)
+	prevNode := oldNode.(*v1.Node)
+	// If the PodCIDR is not empty we either:
+	// - already processed a Node that already had a CIDR after NC restarted
+	//   (cidr is marked as used),
+	// - already processed a Node successfully and allocated a CIDR for it
+	//   (cidr is marked as used),
+	// - already processed a Node but we did saw a "timeout" response and
+	//   request eventually got through in this case we haven't released
+	//   the allocated CIDR (cidr is still marked as used).
+	// There's a possible error here:
+	// - NC sees a new Node and assigns a CIDR X to it,
+	// - Update Node call fails with a timeout,
+	// - Node is updated by some other component, NC sees an update and
+	//   assigns CIDR Y to the Node,
+	// - Both CIDR X and CIDR Y are marked as used in the local cache,
+	//   even though Node sees only CIDR Y
+	// The problem here is that in in-memory cache we see CIDR X as marked,
+	// which prevents it from being assigned to any new node. The cluster
+	// state is correct.
+	// Restart of NC fixes the issue.
+	if node.Spec.PodCIDR == "" {
+		nodeCopy, err := api.Scheme.Copy(node)
+		if err != nil {
+			utilruntime.HandleError(err)
+			return
+		}
+
+		if err := nc.cidrAllocator.AllocateOrOccupyCIDR(nodeCopy.(*v1.Node)); err != nil {
+			utilruntime.HandleError(fmt.Errorf("Error allocating CIDR: %v", err))
+		}
+	}
+	if nc.taintManager != nil {
+		nc.taintManager.NodeUpdated(prevNode, node)
+	}
+}
+
+func (nc *NodeController) onNodeDelete(originalObj interface{}) {
+	obj, err := api.Scheme.DeepCopy(originalObj)
+	if err != nil {
+		utilruntime.HandleError(err)
+		return
+	}
+
+	node, isNode := obj.(*v1.Node)
+	// We can get DeletedFinalStateUnknown instead of *v1.Node here and
+	// we need to handle that correctly. #34692
+	if !isNode {
+		deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
+		if !ok {
+			glog.Errorf("Received unexpected object: %v", obj)
+			return
+		}
+		node, ok = deletedState.Obj.(*v1.Node)
+		if !ok {
+			glog.Errorf("DeletedFinalStateUnknown contained non-Node object: %v", deletedState.Obj)
+			return
+		}
+	}
+	if nc.taintManager != nil {
+		nc.taintManager.NodeUpdated(node, nil)
+	}
+	if err := nc.cidrAllocator.ReleaseCIDR(node); err != nil {
+		glog.Errorf("Error releasing CIDR: %v", err)
+	}
+}
+
 // Run starts an asynchronous loop that monitors the status of cluster nodes.
 func (nc *NodeController) Run() {
 	go func() {
--- a/pkg/controller/node/nodecontroller_test.go
+++ b/pkg/controller/node/nodecontroller_test.go
@ -101,6 +101,7 @@ func NewNodeControllerFromClient(
 		serviceCIDR,
 		nodeCIDRMaskSize,
 		allocateNodeCIDRs,
+		RangeAllocatorType,
 		useTaints,
 		useTaints,
 	)
@ -549,9 +550,22 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
 	}

 	for _, item := range table {
-		nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
-			evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
-			testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false, false)
+		nodeController, _ := NewNodeControllerFromClient(
+			nil,
+			item.fakeNodeHandler,
+			evictionTimeout,
+			testRateLimiterQPS,
+			testRateLimiterQPS,
+			testLargeClusterThreshold,
+			testUnhealtyThreshold,
+			testNodeMonitorGracePeriod,
+			testNodeStartupGracePeriod,
+			testNodeMonitorPeriod,
+			nil,
+			nil,
+			0,
+			false,
+			false)
 		nodeController.now = func() metav1.Time { return fakeNow }
 		nodeController.recorder = testutil.NewFakeRecorder()
 		for _, ds := range item.daemonSets {
--- a/pkg/controller/node/range_allocator.go
+++ b/pkg/controller/node/range_allocator.go
@ -0,0 +1,262 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package node
+
+import (
+	"fmt"
+	"net"
+	"sync"
+
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
+	v1core "k8s.io/client-go/kubernetes/typed/core/v1"
+	clientv1 "k8s.io/client-go/pkg/api/v1"
+	"k8s.io/client-go/tools/record"
+	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/v1"
+	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
+
+	"github.com/golang/glog"
+)
+
+// TODO: figure out the good setting for those constants.
+const (
+	// controls how many NodeSpec updates NC can process concurrently.
+	cidrUpdateWorkers   = 10
+	cidrUpdateQueueSize = 5000
+	// podCIDRUpdateRetry controls the number of retries of writing Node.Spec.PodCIDR update.
+	podCIDRUpdateRetry = 5
+)
+
+type rangeAllocator struct {
+	client      clientset.Interface
+	cidrs       *cidrSet
+	clusterCIDR *net.IPNet
+	maxCIDRs    int
+	// Channel that is used to pass updating Nodes with assigned CIDRs to the background
+	// This increases a throughput of CIDR assignment by not blocking on long operations.
+	nodeCIDRUpdateChannel chan nodeAndCIDR
+	recorder              record.EventRecorder
+	// Keep a set of nodes that are currectly being processed to avoid races in CIDR allocation
+	sync.Mutex
+	nodesInProcessing sets.String
+}
+
+// NewCIDRRangeAllocator returns a CIDRAllocator to allocate CIDR for node
+// Caller must ensure subNetMaskSize is not less than cluster CIDR mask size.
+// Caller must always pass in a list of existing nodes so the new allocator
+// can initialize its CIDR map. NodeList is only nil in testing.
+func NewCIDRRangeAllocator(client clientset.Interface, clusterCIDR *net.IPNet, serviceCIDR *net.IPNet, subNetMaskSize int, nodeList *v1.NodeList) (CIDRAllocator, error) {
+	eventBroadcaster := record.NewBroadcaster()
+	recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "cidrAllocator"})
+	eventBroadcaster.StartLogging(glog.Infof)
+	if client != nil {
+		glog.V(0).Infof("Sending events to api server.")
+		eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(client.Core().RESTClient()).Events("")})
+	} else {
+		glog.Fatalf("kubeClient is nil when starting NodeController")
+	}
+
+	ra := &rangeAllocator{
+		client:                client,
+		cidrs:                 newCIDRSet(clusterCIDR, subNetMaskSize),
+		clusterCIDR:           clusterCIDR,
+		nodeCIDRUpdateChannel: make(chan nodeAndCIDR, cidrUpdateQueueSize),
+		recorder:              recorder,
+		nodesInProcessing:     sets.NewString(),
+	}
+
+	if serviceCIDR != nil {
+		ra.filterOutServiceRange(serviceCIDR)
+	} else {
+		glog.V(0).Info("No Service CIDR provided. Skipping filtering out service addresses.")
+	}
+
+	if nodeList != nil {
+		for _, node := range nodeList.Items {
+			if node.Spec.PodCIDR == "" {
+				glog.Infof("Node %v has no CIDR, ignoring", node.Name)
+				continue
+			} else {
+				glog.Infof("Node %v has CIDR %s, occupying it in CIDR map",
+					node.Name, node.Spec.PodCIDR)
+			}
+			if err := ra.occupyCIDR(&node); err != nil {
+				// This will happen if:
+				// 1. We find garbage in the podCIDR field. Retrying is useless.
+				// 2. CIDR out of range: This means a node CIDR has changed.
+				// This error will keep crashing controller-manager.
+				return nil, err
+			}
+		}
+	}
+	for i := 0; i < cidrUpdateWorkers; i++ {
+		go func(stopChan <-chan struct{}) {
+			for {
+				select {
+				case workItem, ok := <-ra.nodeCIDRUpdateChannel:
+					if !ok {
+						glog.Warning("NodeCIDRUpdateChannel read returned false.")
+						return
+					}
+					ra.updateCIDRAllocation(workItem)
+				case <-stopChan:
+					return
+				}
+			}
+		}(wait.NeverStop)
+	}
+
+	return ra, nil
+}
+
+func (r *rangeAllocator) insertNodeToProcessing(nodeName string) bool {
+	r.Lock()
+	defer r.Unlock()
+	if r.nodesInProcessing.Has(nodeName) {
+		return false
+	}
+	r.nodesInProcessing.Insert(nodeName)
+	return true
+}
+
+func (r *rangeAllocator) removeNodeFromProcessing(nodeName string) {
+	r.Lock()
+	defer r.Unlock()
+	r.nodesInProcessing.Delete(nodeName)
+}
+
+func (r *rangeAllocator) occupyCIDR(node *v1.Node) error {
+	defer r.removeNodeFromProcessing(node.Name)
+	if node.Spec.PodCIDR == "" {
+		return nil
+	}
+	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
+	if err != nil {
+		return fmt.Errorf("failed to parse node %s, CIDR %s", node.Name, node.Spec.PodCIDR)
+	}
+	if err := r.cidrs.occupy(podCIDR); err != nil {
+		return fmt.Errorf("failed to mark cidr as occupied: %v", err)
+	}
+	return nil
+}
+
+// WARNING: If you're adding any return calls or defer any more work from this
+// function you have to handle correctly nodesInProcessing.
+func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
+	if node == nil {
+		return nil
+	}
+	if !r.insertNodeToProcessing(node.Name) {
+		glog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
+		return nil
+	}
+	if node.Spec.PodCIDR != "" {
+		return r.occupyCIDR(node)
+	}
+	podCIDR, err := r.cidrs.allocateNext()
+	if err != nil {
+		r.removeNodeFromProcessing(node.Name)
+		recordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
+		return fmt.Errorf("failed to allocate cidr: %v", err)
+	}
+
+	glog.V(10).Infof("Putting node %s with CIDR %s into the work queue", node.Name, podCIDR)
+	r.nodeCIDRUpdateChannel <- nodeAndCIDR{
+		nodeName: node.Name,
+		cidr:     podCIDR,
+	}
+	return nil
+}
+
+func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
+	if node == nil || node.Spec.PodCIDR == "" {
+		return nil
+	}
+	_, podCIDR, err := net.ParseCIDR(node.Spec.PodCIDR)
+	if err != nil {
+		return fmt.Errorf("Failed to parse CIDR %s on Node %v: %v", node.Spec.PodCIDR, node.Name, err)
+	}
+
+	glog.V(4).Infof("release CIDR %s", node.Spec.PodCIDR)
+	if err = r.cidrs.release(podCIDR); err != nil {
+		return fmt.Errorf("Error when releasing CIDR %v: %v", node.Spec.PodCIDR, err)
+	}
+	return err
+}
+
+// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used,
+// so that they won't be assignable.
+func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
+	// Checks if service CIDR has a nonempty intersection with cluster
+	// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
+	// clusterCIDR's Mask applied (this means that clusterCIDR contains
+	// serviceCIDR) or vice versa (which means that serviceCIDR contains
+	// clusterCIDR).
+	if !r.clusterCIDR.Contains(serviceCIDR.IP.Mask(r.clusterCIDR.Mask)) && !serviceCIDR.Contains(r.clusterCIDR.IP.Mask(serviceCIDR.Mask)) {
+		return
+	}
+
+	if err := r.cidrs.occupy(serviceCIDR); err != nil {
+		glog.Errorf("Error filtering out service cidr %v: %v", serviceCIDR, err)
+	}
+}
+
+// Assigns CIDR to Node and sends an update to the API server.
+func (r *rangeAllocator) updateCIDRAllocation(data nodeAndCIDR) error {
+	var err error
+	var node *v1.Node
+	defer r.removeNodeFromProcessing(data.nodeName)
+	for rep := 0; rep < podCIDRUpdateRetry; rep++ {
+		// TODO: change it to using PATCH instead of full Node updates.
+		node, err = r.client.Core().Nodes().Get(data.nodeName, metav1.GetOptions{})
+		if err != nil {
+			glog.Errorf("Failed while getting node %v to retry updating Node.Spec.PodCIDR: %v", data.nodeName, err)
+			continue
+		}
+		if node.Spec.PodCIDR != "" {
+			glog.Errorf("Node %v already has allocated CIDR %v. Releasing assigned one if different.", node.Name, node.Spec.PodCIDR)
+			if node.Spec.PodCIDR != data.cidr.String() {
+				if err := r.cidrs.release(data.cidr); err != nil {
+					glog.Errorf("Error when releasing CIDR %v", data.cidr.String())
+				}
+			}
+			return nil
+		}
+		node.Spec.PodCIDR = data.cidr.String()
+		if _, err := r.client.Core().Nodes().Update(node); err != nil {
+			glog.Errorf("Failed while updating Node.Spec.PodCIDR (%d retries left): %v", podCIDRUpdateRetry-rep-1, err)
+		} else {
+			break
+		}
+	}
+	if err != nil {
+		recordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
+		// We accept the fact that we may leek CIDRs here. This is safer than releasing
+		// them in case when we don't know if request went through.
+		// NodeController restart will return all falsely allocated CIDRs to the pool.
+		if !apierrors.IsServerTimeout(err) {
+			glog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", data.nodeName, err)
+			if releaseErr := r.cidrs.release(data.cidr); releaseErr != nil {
+				glog.Errorf("Error releasing allocated CIDR for node %v: %v", data.nodeName, releaseErr)
+			}
+		}
+	}
+	return err
+}
--- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go
+++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/controller_policy.go
@ -168,7 +168,7 @@ func init() {
 		ObjectMeta: metav1.ObjectMeta{Name: saRolePrefix + "node-controller"},
 		Rules: []rbac.PolicyRule{
 			rbac.NewRule("get", "list", "update", "delete", "patch").Groups(legacyGroup).Resources("nodes").RuleOrDie(),
-			rbac.NewRule("update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(),
+			rbac.NewRule("patch", "update").Groups(legacyGroup).Resources("nodes/status").RuleOrDie(),
 			// used for pod eviction
 			rbac.NewRule("update").Groups(legacyGroup).Resources("pods/status").RuleOrDie(),
 			rbac.NewRule("list", "delete").Groups(legacyGroup).Resources("pods").RuleOrDie(),
--- a/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml
+++ b/plugin/pkg/auth/authorizer/rbac/bootstrappolicy/testdata/controller-roles.yaml
@ -561,6 +561,7 @@ items:
    resources:
    - nodes/status
    verbs:
+    - patch
    - update
  - apiGroups:
    - ""