Fix intializing IP/Port allocators when etcd is not reachable.

pull/6/head
Filip Grzadkowski 2015-06-16 16:34:12 +02:00
parent 7940c96b16
commit 4c0c7dd879
5 changed files with 30 additions and 8 deletions

View File

@ -76,10 +76,12 @@ func (c *Controller) Start() {
// run all of the controllers once prior to returning from Start.
if err := repairClusterIPs.RunOnce(); err != nil {
glog.Errorf("Unable to perform initial IP allocation check: %v", err)
// If we fail to repair cluster IPs apiserver is useless. We should restart and retry.
glog.Fatalf("Unable to perform initial IP allocation check: %v", err)
}
if err := repairNodePorts.RunOnce(); err != nil {
glog.Errorf("Unable to perform initial service nodePort check: %v", err)
// If we fail to repair node ports apiserver is useless. We should restart and retry.
glog.Fatalf("Unable to perform initial service nodePort check: %v", err)
}
if err := c.UpdateKubernetesService(); err != nil {
glog.Errorf("Unable to perform initial Kubernetes service initialization: %v", err)

View File

@ -17,11 +17,11 @@ limitations under the License.
package etcd
import (
"strings"
"testing"
"github.com/coreos/go-etcd/etcd"
"fmt"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
"github.com/GoogleCloudPlatform/kubernetes/pkg/registry/service/allocator"
@ -54,7 +54,7 @@ func key() string {
func TestEmpty(t *testing.T) {
storage, _, ecli := newStorage(t)
ecli.ExpectNotFoundGet(key())
if _, err := storage.Allocate(1); fmt.Sprintf("%v", err) != "cannot allocate resources of type serviceipallocation at this time" {
if _, err := storage.Allocate(1); !strings.Contains(err.Error(), "cannot allocate resources of type serviceipallocation at this time") {
t.Fatal(err)
}
}

View File

@ -78,7 +78,17 @@ func (c *Repair) RunOnce() error {
// and the release code must not release services that have had IPs allocated but not yet been created
// See #8295
latest, err := c.alloc.Get()
// If etcd server is not running we should wait for some time and fail only then. This is particularly
// important when we start apiserver and etcd at the same time.
var latest *api.RangeAllocation
var err error
for i := 0; i < 10; i++ {
if latest, err = c.alloc.Get(); err != nil {
time.Sleep(time.Second)
} else {
break
}
}
if err != nil {
return fmt.Errorf("unable to refresh the service IP block: %v", err)
}

View File

@ -18,11 +18,11 @@ package etcd
import (
"net"
"strings"
"testing"
"github.com/coreos/go-etcd/etcd"
"fmt"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/testapi"
"github.com/GoogleCloudPlatform/kubernetes/pkg/registry/service/allocator"
@ -66,7 +66,7 @@ func key() string {
func TestEmpty(t *testing.T) {
storage, _, ecli := newStorage(t)
ecli.ExpectNotFoundGet(key())
if err := storage.Allocate(net.ParseIP("192.168.1.2")); fmt.Sprintf("%v", err) != "cannot allocate resources of type serviceipallocation at this time" {
if err := storage.Allocate(net.ParseIP("192.168.1.2")); !strings.Contains(err.Error(), "cannot allocate resources of type serviceipallocation at this time") {
t.Fatal(err)
}
}

View File

@ -63,7 +63,17 @@ func (c *Repair) RunOnce() error {
// and the release code must not release services that have had ports allocated but not yet been created
// See #8295
latest, err := c.alloc.Get()
// If etcd server is not running we should wait for some time and fail only then. This is particularly
// important when we start apiserver and etcd at the same time.
var latest *api.RangeAllocation
var err error
for i := 0; i < 10; i++ {
if latest, err = c.alloc.Get(); err != nil {
time.Sleep(time.Second)
} else {
break
}
}
if err != nil {
return fmt.Errorf("unable to refresh the port block: %v", err)
}