mirror of https://github.com/k3s-io/k3s
dockershim: don't spam logs with pod IP errors before networking is ready
GenericPLEG's 1s relist() loop races against pod network setup. It may be called after the infra container has started but before network setup is done, since PLEG and the runtime's SyncPod() run in different goroutines. Track network setup status and don't bother trying to read the pod's IP address if networking is not yet ready. See also: https://bugzilla.redhat.com/show_bug.cgi?id=1434950 Mar 22 12:18:17 ip-172-31-43-89 atomic-openshift-node: E0322 12:18:17.651013 25624 docker_manager.go:378] NetworkPlugin cni failed on the status hook for pod 'pausepods22' - Unexpected command output Device "eth0" does not exist.pull/6/head
parent
9e1385b450
commit
02200ba752
|
@ -48,6 +48,26 @@ const (
|
||||||
runtimeName = "docker"
|
runtimeName = "docker"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Returns whether the sandbox network is ready, and whether the sandbox is known
|
||||||
|
func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) {
|
||||||
|
ds.networkReadyLock.Lock()
|
||||||
|
defer ds.networkReadyLock.Unlock()
|
||||||
|
ready, ok := ds.networkReady[podSandboxID]
|
||||||
|
return ready, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) {
|
||||||
|
ds.networkReadyLock.Lock()
|
||||||
|
defer ds.networkReadyLock.Unlock()
|
||||||
|
ds.networkReady[podSandboxID] = ready
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ds *dockerService) clearNetworkReady(podSandboxID string) {
|
||||||
|
ds.networkReadyLock.Lock()
|
||||||
|
defer ds.networkReadyLock.Unlock()
|
||||||
|
delete(ds.networkReady, podSandboxID)
|
||||||
|
}
|
||||||
|
|
||||||
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
|
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
|
||||||
// the sandbox is in ready state.
|
// the sandbox is in ready state.
|
||||||
// For docker, PodSandbox is implemented by a container holding the network
|
// For docker, PodSandbox is implemented by a container holding the network
|
||||||
|
@ -82,6 +102,8 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
|
||||||
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
|
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ds.setNetworkReady(createResp.ID, false)
|
||||||
|
|
||||||
// Step 3: Create Sandbox Checkpoint.
|
// Step 3: Create Sandbox Checkpoint.
|
||||||
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
|
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
|
||||||
return createResp.ID, err
|
return createResp.ID, err
|
||||||
|
@ -114,6 +136,7 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
|
||||||
|
|
||||||
// Do not invoke network plugins if in hostNetwork mode.
|
// Do not invoke network plugins if in hostNetwork mode.
|
||||||
if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
|
if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
|
||||||
|
ds.setNetworkReady(createResp.ID, true)
|
||||||
return createResp.ID, nil
|
return createResp.ID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,12 +149,15 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
|
||||||
// recognized by the CNI standard yet.
|
// recognized by the CNI standard yet.
|
||||||
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
|
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
|
||||||
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
|
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
|
||||||
if err != nil {
|
if err == nil {
|
||||||
|
ds.setNetworkReady(createResp.ID, true)
|
||||||
|
} else {
|
||||||
// TODO(random-liu): Do we need to teardown network here?
|
// TODO(random-liu): Do we need to teardown network here?
|
||||||
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
|
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
|
||||||
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
|
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return createResp.ID, err
|
return createResp.ID, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,7 +225,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
|
||||||
errList := []error{}
|
errList := []error{}
|
||||||
if needNetworkTearDown {
|
if needNetworkTearDown {
|
||||||
cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
|
cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
|
||||||
if err := ds.network.TearDownPod(namespace, name, cID); err != nil {
|
err := ds.network.TearDownPod(namespace, name, cID)
|
||||||
|
if err == nil {
|
||||||
|
ds.setNetworkReady(podSandboxID, false)
|
||||||
|
} else {
|
||||||
errList = append(errList, err)
|
errList = append(errList, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -241,6 +270,8 @@ func (ds *dockerService) RemovePodSandbox(podSandboxID string) error {
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ds.clearNetworkReady(podSandboxID)
|
||||||
|
|
||||||
// Remove the checkpoint of the sandbox.
|
// Remove the checkpoint of the sandbox.
|
||||||
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
|
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
|
||||||
errs = append(errs, err)
|
errs = append(errs, err)
|
||||||
|
@ -258,9 +289,6 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
|
||||||
cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
|
cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
|
||||||
networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
|
networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// This might be a sandbox that somehow ended up without a default
|
|
||||||
// interface (eth0). We can't distinguish this from a more serious
|
|
||||||
// error, so callers should probably treat it as non-fatal.
|
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
if networkStatus == nil {
|
if networkStatus == nil {
|
||||||
|
@ -272,7 +300,7 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
|
||||||
// getIP returns the ip given the output of `docker inspect` on a pod sandbox,
|
// getIP returns the ip given the output of `docker inspect` on a pod sandbox,
|
||||||
// first interrogating any registered plugins, then simply trusting the ip
|
// first interrogating any registered plugins, then simply trusting the ip
|
||||||
// in the sandbox itself. We look for an ipv4 address before ipv6.
|
// in the sandbox itself. We look for an ipv4 address before ipv6.
|
||||||
func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, error) {
|
func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.ContainerJSON) (string, error) {
|
||||||
if sandbox.NetworkSettings == nil {
|
if sandbox.NetworkSettings == nil {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
@ -281,11 +309,18 @@ func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, erro
|
||||||
// reporting the IP.
|
// reporting the IP.
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
if IP, err := ds.getIPFromPlugin(sandbox); err != nil {
|
|
||||||
glog.Warningf("%v", err)
|
// Don't bother getting IP if the pod is known and networking isn't ready
|
||||||
} else if IP != "" {
|
ready, ok := ds.getNetworkReady(podSandboxID)
|
||||||
return IP, nil
|
if ok && !ready {
|
||||||
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ip, err := ds.getIPFromPlugin(sandbox)
|
||||||
|
if err == nil {
|
||||||
|
return ip, nil
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: trusting the docker ip is not a great idea. However docker uses
|
// TODO: trusting the docker ip is not a great idea. However docker uses
|
||||||
// eth0 by default and so does CNI, so if we find a docker IP here, we
|
// eth0 by default and so does CNI, so if we find a docker IP here, we
|
||||||
// conclude that the plugin must have failed setup, or forgotten its ip.
|
// conclude that the plugin must have failed setup, or forgotten its ip.
|
||||||
|
@ -294,9 +329,13 @@ func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, erro
|
||||||
if sandbox.NetworkSettings.IPAddress != "" {
|
if sandbox.NetworkSettings.IPAddress != "" {
|
||||||
return sandbox.NetworkSettings.IPAddress, nil
|
return sandbox.NetworkSettings.IPAddress, nil
|
||||||
}
|
}
|
||||||
|
if sandbox.NetworkSettings.GlobalIPv6Address != "" {
|
||||||
return sandbox.NetworkSettings.GlobalIPv6Address, nil
|
return sandbox.NetworkSettings.GlobalIPv6Address, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return "", fmt.Errorf("failed to read pod IP from plugin/docker: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
// PodSandboxStatus returns the status of the PodSandbox.
|
// PodSandboxStatus returns the status of the PodSandbox.
|
||||||
func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodSandboxStatus, error) {
|
func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodSandboxStatus, error) {
|
||||||
// Inspect the container.
|
// Inspect the container.
|
||||||
|
@ -317,7 +356,7 @@ func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodS
|
||||||
if r.State.Running {
|
if r.State.Running {
|
||||||
state = runtimeapi.PodSandboxState_SANDBOX_READY
|
state = runtimeapi.PodSandboxState_SANDBOX_READY
|
||||||
}
|
}
|
||||||
IP, err := ds.getIP(r)
|
IP, err := ds.getIP(podSandboxID, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,6 +133,8 @@ func TestSandboxStatus(t *testing.T) {
|
||||||
expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
|
expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
|
||||||
err = ds.StopPodSandbox(id)
|
err = ds.StopPodSandbox(id)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
// IP not valid after sandbox stop
|
||||||
|
expected.Network.Ip = ""
|
||||||
status, err = ds.PodSandboxStatus(id)
|
status, err = ds.PodSandboxStatus(id)
|
||||||
assert.Equal(t, expected, status)
|
assert.Equal(t, expected, status)
|
||||||
|
|
||||||
|
@ -143,6 +145,49 @@ func TestSandboxStatus(t *testing.T) {
|
||||||
assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status))
|
assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestSandboxStatusAfterRestart tests that retrieving sandbox status returns
|
||||||
|
// an IP address even if RunPodSandbox() was not yet called for this pod, as
|
||||||
|
// would happen on kubelet restart
|
||||||
|
func TestSandboxStatusAfterRestart(t *testing.T) {
|
||||||
|
ds, _, fClock := newTestDockerService()
|
||||||
|
config := makeSandboxConfig("foo", "bar", "1", 0)
|
||||||
|
|
||||||
|
// TODO: The following variables depend on the internal
|
||||||
|
// implementation of FakeDockerClient, and should be fixed.
|
||||||
|
fakeIP := "2.3.4.5"
|
||||||
|
|
||||||
|
state := runtimeapi.PodSandboxState_SANDBOX_READY
|
||||||
|
ct := int64(0)
|
||||||
|
hostNetwork := false
|
||||||
|
expected := &runtimeapi.PodSandboxStatus{
|
||||||
|
State: state,
|
||||||
|
CreatedAt: ct,
|
||||||
|
Metadata: config.Metadata,
|
||||||
|
Network: &runtimeapi.PodSandboxNetworkStatus{Ip: fakeIP},
|
||||||
|
Linux: &runtimeapi.LinuxPodSandboxStatus{Namespaces: &runtimeapi.Namespace{Options: &runtimeapi.NamespaceOption{HostNetwork: hostNetwork}}},
|
||||||
|
Labels: map[string]string{},
|
||||||
|
Annotations: map[string]string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the sandbox.
|
||||||
|
fClock.SetTime(time.Now())
|
||||||
|
expected.CreatedAt = fClock.Now().UnixNano()
|
||||||
|
|
||||||
|
createConfig, err := ds.makeSandboxDockerConfig(config, defaultSandboxImage)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
createResp, err := ds.client.CreateContainer(*createConfig)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
err = ds.client.StartContainer(createResp.ID)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Check status without RunPodSandbox() having set up networking
|
||||||
|
expected.Id = createResp.ID // ID is only known after the creation.
|
||||||
|
status, err := ds.PodSandboxStatus(createResp.ID)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, expected, status)
|
||||||
|
}
|
||||||
|
|
||||||
// TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod
|
// TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod
|
||||||
// calls are made when we run/stop a sandbox.
|
// calls are made when we run/stop a sandbox.
|
||||||
func TestNetworkPluginInvocation(t *testing.T) {
|
func TestNetworkPluginInvocation(t *testing.T) {
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/blang/semver"
|
"github.com/blang/semver"
|
||||||
|
@ -175,6 +176,7 @@ func NewDockerService(client libdocker.Interface, seccompProfileRoot string, pod
|
||||||
containerManager: cm.NewContainerManager(cgroupsName, client),
|
containerManager: cm.NewContainerManager(cgroupsName, client),
|
||||||
checkpointHandler: checkpointHandler,
|
checkpointHandler: checkpointHandler,
|
||||||
disableSharedPID: disableSharedPID,
|
disableSharedPID: disableSharedPID,
|
||||||
|
networkReady: make(map[string]bool),
|
||||||
}
|
}
|
||||||
|
|
||||||
// check docker version compatibility.
|
// check docker version compatibility.
|
||||||
|
@ -248,7 +250,12 @@ type dockerService struct {
|
||||||
podSandboxImage string
|
podSandboxImage string
|
||||||
streamingRuntime *streamingRuntime
|
streamingRuntime *streamingRuntime
|
||||||
streamingServer streaming.Server
|
streamingServer streaming.Server
|
||||||
|
|
||||||
network *network.PluginManager
|
network *network.PluginManager
|
||||||
|
// Map of podSandboxID :: network-is-ready
|
||||||
|
networkReady map[string]bool
|
||||||
|
networkReadyLock sync.Mutex
|
||||||
|
|
||||||
containerManager cm.ContainerManager
|
containerManager cm.ContainerManager
|
||||||
// cgroup driver used by Docker runtime.
|
// cgroup driver used by Docker runtime.
|
||||||
cgroupDriver string
|
cgroupDriver string
|
||||||
|
|
|
@ -46,8 +46,14 @@ func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *clock
|
||||||
fakeClock := clock.NewFakeClock(time.Time{})
|
fakeClock := clock.NewFakeClock(time.Time{})
|
||||||
c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23")
|
c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23")
|
||||||
pm := network.NewPluginManager(&network.NoopNetworkPlugin{})
|
pm := network.NewPluginManager(&network.NoopNetworkPlugin{})
|
||||||
return &dockerService{client: c, os: &containertest.FakeOS{}, network: pm,
|
return &dockerService{
|
||||||
legacyCleanup: legacyCleanupFlag{done: 1}, checkpointHandler: NewTestPersistentCheckpointHandler()}, c, fakeClock
|
client: c,
|
||||||
|
os: &containertest.FakeOS{},
|
||||||
|
network: pm,
|
||||||
|
legacyCleanup: legacyCleanupFlag{done: 1},
|
||||||
|
checkpointHandler: NewTestPersistentCheckpointHandler(),
|
||||||
|
networkReady: make(map[string]bool),
|
||||||
|
}, c, fakeClock
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {
|
func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {
|
||||||
|
|
Loading…
Reference in New Issue