From f8eb179c2dea76cb00d8c616389be1fd1efac8f2 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Thu, 22 Sep 2016 23:04:37 -0400 Subject: [PATCH 1/3] Create hostNetwork pods even if network plugin not ready We do now admit pods (unlike the first attempt), but now we will stop non-hostnetwork pods from starting if the network is not ready. Issue #35409 --- pkg/kubelet/kubelet.go | 7 ++++++- pkg/kubelet/kubelet_node_status.go | 3 ++- pkg/kubelet/runonce_test.go | 1 + pkg/kubelet/runtime.go | 15 +++++++++++---- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 7772489b8a..de7eec5030 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -1393,6 +1393,11 @@ func (kl *Kubelet) syncPod(o syncPodOptions) error { return errOuter } + // If the network plugin is not ready, only start the pod if it uses the host network + if rs := kl.runtimeState.networkErrors(); len(rs) != 0 && !podUsesHostNetwork(pod) { + return fmt.Errorf("network is not ready: %v", rs) + } + // Create Cgroups for the pod and apply resource parameters // to them if cgroup-per-qos flag is enabled. pcm := kl.containerManager.NewPodContainerManager() @@ -1644,7 +1649,7 @@ func (kl *Kubelet) syncLoop(updates <-chan kubetypes.PodUpdate, handler SyncHand defer housekeepingTicker.Stop() plegCh := kl.pleg.Watch() for { - if rs := kl.runtimeState.errors(); len(rs) != 0 { + if rs := kl.runtimeState.runtimeErrors(); len(rs) != 0 { glog.Infof("skipping pod synchronization - %v", rs) time.Sleep(5 * time.Second) continue diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index 8d3b8cb47d..981762ae05 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -577,7 +577,8 @@ func (kl *Kubelet) setNodeReadyCondition(node *api.Node) { // ref: https://github.com/kubernetes/kubernetes/issues/16961 currentTime := unversioned.NewTime(kl.clock.Now()) var newNodeReadyCondition api.NodeCondition - if rs := kl.runtimeState.errors(); len(rs) == 0 { + rs := append(kl.runtimeState.runtimeErrors(), kl.runtimeState.networkErrors()...) + if len(rs) == 0 { newNodeReadyCondition = api.NodeCondition{ Type: api.NodeReady, Status: api.ConditionTrue, diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 6e595ad9c7..6d7d266449 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -83,6 +83,7 @@ func TestRunOnce(t *testing.T) { kubeClient: &fake.Clientset{}, hostname: testKubeletHostname, nodeName: testKubeletHostname, + runtimeState: newRuntimeState(time.Second), } kb.containerManager = cm.NewStubContainerManager() diff --git a/pkg/kubelet/runtime.go b/pkg/kubelet/runtime.go index 90a83898a3..6cb74fe364 100644 --- a/pkg/kubelet/runtime.go +++ b/pkg/kubelet/runtime.go @@ -68,16 +68,13 @@ func (s *runtimeState) setInitError(err error) { s.initError = err } -func (s *runtimeState) errors() []string { +func (s *runtimeState) runtimeErrors() []string { s.RLock() defer s.RUnlock() var ret []string if s.initError != nil { ret = append(ret, s.initError.Error()) } - if s.networkError != nil { - ret = append(ret, s.networkError.Error()) - } if !s.lastBaseRuntimeSync.Add(s.baseRuntimeSyncThreshold).After(time.Now()) { ret = append(ret, "container runtime is down") } @@ -87,6 +84,16 @@ func (s *runtimeState) errors() []string { return ret } +func (s *runtimeState) networkErrors() []string { + s.RLock() + defer s.RUnlock() + var ret []string + if s.networkError != nil { + ret = append(ret, s.networkError.Error()) + } + return ret +} + func newRuntimeState( runtimeSyncThreshold time.Duration, ) *runtimeState { From 68c0b4268bdcffd9f2e570eda8ab6045c71e66d9 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Thu, 22 Sep 2016 23:05:28 -0400 Subject: [PATCH 2/3] Update bringup: don't pass in dummy pod-cidr We no longer pass in a "dummy" pod-cidr (10.123.45.0/29), and rely on reconcile-cidr=true instead (which is the default). --- cluster/aws/templates/configure-vm-aws.sh | 2 -- cluster/gce/configure-vm.sh | 2 -- cluster/gce/gci/configure-helper.sh | 5 +---- cluster/gce/trusty/configure-helper.sh | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/cluster/aws/templates/configure-vm-aws.sh b/cluster/aws/templates/configure-vm-aws.sh index 6abca6ed43..0be5203add 100755 --- a/cluster/aws/templates/configure-vm-aws.sh +++ b/cluster/aws/templates/configure-vm-aws.sh @@ -91,7 +91,6 @@ EOF if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then cat <>/etc/salt/minion.d/grains.conf kubelet_api_servers: '${KUBELET_APISERVER}' - cbr-cidr: 10.123.45.0/29 EOF else # If the kubelet is running disconnected from a master, give it a fixed @@ -110,7 +109,6 @@ salt-node-role() { grains: roles: - kubernetes-pool - cbr-cidr: 10.123.45.0/29 cloud: aws api_servers: '${API_SERVERS}' EOF diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh index cb827c2ed5..6035859e53 100755 --- a/cluster/gce/configure-vm.sh +++ b/cluster/gce/configure-vm.sh @@ -954,7 +954,6 @@ EOF if [[ ! -z "${KUBELET_APISERVER:-}" ]] && [[ ! -z "${KUBELET_CERT:-}" ]] && [[ ! -z "${KUBELET_KEY:-}" ]]; then cat <>/etc/salt/minion.d/grains.conf kubelet_api_servers: '${KUBELET_APISERVER}' - cbr-cidr: 10.123.45.0/29 EOF else # If the kubelet is running disconnected from a master, give it a fixed @@ -973,7 +972,6 @@ function salt-node-role() { grains: roles: - kubernetes-pool - cbr-cidr: 10.123.45.0/29 cloud: gce api_servers: '${KUBERNETES_MASTER_NAME}' EOF diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 18acc09b94..e96658f628 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -484,11 +484,8 @@ function start-kubelet { if [[ ! -z "${KUBELET_APISERVER:-}" && ! -z "${KUBELET_CERT:-}" && ! -z "${KUBELET_KEY:-}" ]]; then flags+=" --api-servers=https://${KUBELET_APISERVER}" flags+=" --register-schedulable=false" - # need at least a /29 pod cidr for now due to #32844 - # TODO: determine if we still allow non-hostnetwork pods to run on master, clean up master pod setup - # WARNING: potential ip range collision with 10.123.45.0/29 - flags+=" --pod-cidr=10.123.45.0/29" else + # Standalone mode (not widely used?) flags+=" --pod-cidr=${MASTER_IP_RANGE}" fi else # For nodes diff --git a/cluster/gce/trusty/configure-helper.sh b/cluster/gce/trusty/configure-helper.sh index d8647f18fe..f7b2715c92 100644 --- a/cluster/gce/trusty/configure-helper.sh +++ b/cluster/gce/trusty/configure-helper.sh @@ -155,7 +155,7 @@ assemble_kubelet_flags() { if [ ! -z "${KUBELET_APISERVER:-}" ] && \ [ ! -z "${KUBELET_CERT:-}" ] && \ [ ! -z "${KUBELET_KEY:-}" ]; then - KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --api-servers=https://${KUBELET_APISERVER} --register-schedulable=false --pod-cidr=10.123.45.0/29" + KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --api-servers=https://${KUBELET_APISERVER} --register-schedulable=false" else KUBELET_CMD_FLAGS="${KUBELET_CMD_FLAGS} --pod-cidr=${MASTER_IP_RANGE}" fi From 88628119ab7acc8f44006157729db1ffdb70f1dd Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Fri, 4 Nov 2016 00:46:33 -0400 Subject: [PATCH 3/3] kubelet: testing for hostNetwork with network errors --- pkg/kubelet/kubelet_test.go | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index c9787a885b..4ec06b148b 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -1054,6 +1054,48 @@ func TestPrivilegedContainerDisallowed(t *testing.T) { assert.Error(t, err, "expected pod infra creation to fail") } +func TestNetworkErrorsWithoutHostNetwork(t *testing.T) { + testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) + testKubelet.fakeCadvisor.On("VersionInfo").Return(&cadvisorapi.VersionInfo{}, nil) + testKubelet.fakeCadvisor.On("MachineInfo").Return(&cadvisorapi.MachineInfo{}, nil) + testKubelet.fakeCadvisor.On("ImagesFsInfo").Return(cadvisorapiv2.FsInfo{}, nil) + testKubelet.fakeCadvisor.On("RootFsInfo").Return(cadvisorapiv2.FsInfo{}, nil) + kubelet := testKubelet.kubelet + + kubelet.runtimeState.setNetworkState(fmt.Errorf("simulated network error")) + capabilities.SetForTests(capabilities.Capabilities{ + PrivilegedSources: capabilities.PrivilegedSources{ + HostNetworkSources: []string{kubetypes.ApiserverSource, kubetypes.FileSource}, + }, + }) + + pod := podWithUidNameNsSpec("12345678", "hostnetwork", "new", api.PodSpec{ + SecurityContext: &api.PodSecurityContext{ + HostNetwork: false, + }, + Containers: []api.Container{ + {Name: "foo"}, + }, + }) + + kubelet.podManager.SetPods([]*api.Pod{pod}) + err := kubelet.syncPod(syncPodOptions{ + pod: pod, + podStatus: &kubecontainer.PodStatus{}, + updateType: kubetypes.SyncPodUpdate, + }) + assert.Error(t, err, "expected pod with hostNetwork=false to fail when network in error") + + pod.Annotations[kubetypes.ConfigSourceAnnotationKey] = kubetypes.FileSource + pod.Spec.SecurityContext.HostNetwork = true + err = kubelet.syncPod(syncPodOptions{ + pod: pod, + podStatus: &kubecontainer.PodStatus{}, + updateType: kubetypes.SyncPodUpdate, + }) + assert.NoError(t, err, "expected pod with hostNetwork=true to succeed when network in error") +} + func TestFilterOutTerminatedPods(t *testing.T) { testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */) kubelet := testKubelet.kubelet