mirror of https://github.com/k3s-io/k3s
Merge pull request #46823 from dcbw/fix-up-runtime-GetNetNS2
Automatic merge from submit-queue (batch tested with PRs 46441, 43987, 46921, 46823, 47276) kubelet/network: report but tolerate errors returned from GetNetNS() v2 Runtimes should never return "" and nil errors, since network plugin drivers need to treat netns differently in different cases. So return errors when we can't get the netns, and fix up the plugins to do the right thing. Namely, we don't need a NetNS on pod network teardown. We do need a netns for pod Status checks and for network setup. V2: don't return errors from getIP(), since they will block pod status :( Just log them. But even so, this still fixes the original problem by ensuring we don't log errors when the network isn't ready. @freehan @yujuhong Fixes: https://github.com/kubernetes/kubernetes/issues/42735 Fixes: https://github.com/kubernetes/kubernetes/issues/44307pull/6/head
commit
22dc980aa4
|
@ -48,12 +48,32 @@ const (
|
|||
runtimeName = "docker"
|
||||
)
|
||||
|
||||
// Returns whether the sandbox network is ready, and whether the sandbox is known
|
||||
func (ds *dockerService) getNetworkReady(podSandboxID string) (bool, bool) {
|
||||
ds.networkReadyLock.Lock()
|
||||
defer ds.networkReadyLock.Unlock()
|
||||
ready, ok := ds.networkReady[podSandboxID]
|
||||
return ready, ok
|
||||
}
|
||||
|
||||
func (ds *dockerService) setNetworkReady(podSandboxID string, ready bool) {
|
||||
ds.networkReadyLock.Lock()
|
||||
defer ds.networkReadyLock.Unlock()
|
||||
ds.networkReady[podSandboxID] = ready
|
||||
}
|
||||
|
||||
func (ds *dockerService) clearNetworkReady(podSandboxID string) {
|
||||
ds.networkReadyLock.Lock()
|
||||
defer ds.networkReadyLock.Unlock()
|
||||
delete(ds.networkReady, podSandboxID)
|
||||
}
|
||||
|
||||
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
|
||||
// the sandbox is in ready state.
|
||||
// For docker, PodSandbox is implemented by a container holding the network
|
||||
// namespace for the pod.
|
||||
// Note: docker doesn't use LogDirectory (yet).
|
||||
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (string, error) {
|
||||
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (id string, err error) {
|
||||
// Step 1: Pull the image for the sandbox.
|
||||
image := defaultSandboxImage
|
||||
podSandboxImage := ds.podSandboxImage
|
||||
|
@ -82,6 +102,15 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
|
|||
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
|
||||
}
|
||||
|
||||
ds.setNetworkReady(createResp.ID, false)
|
||||
defer func(e *error) {
|
||||
// Set networking ready depending on the error return of
|
||||
// the parent function
|
||||
if *e == nil {
|
||||
ds.setNetworkReady(createResp.ID, true)
|
||||
}
|
||||
}(&err)
|
||||
|
||||
// Step 3: Create Sandbox Checkpoint.
|
||||
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
|
||||
return createResp.ID, err
|
||||
|
@ -199,7 +228,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
|
|||
errList := []error{}
|
||||
if needNetworkTearDown {
|
||||
cID := kubecontainer.BuildContainerID(runtimeName, podSandboxID)
|
||||
if err := ds.network.TearDownPod(namespace, name, cID); err != nil {
|
||||
err := ds.network.TearDownPod(namespace, name, cID)
|
||||
if err == nil {
|
||||
ds.setNetworkReady(podSandboxID, false)
|
||||
} else {
|
||||
errList = append(errList, err)
|
||||
}
|
||||
}
|
||||
|
@ -241,6 +273,8 @@ func (ds *dockerService) RemovePodSandbox(podSandboxID string) error {
|
|||
errs = append(errs, err)
|
||||
}
|
||||
|
||||
ds.clearNetworkReady(podSandboxID)
|
||||
|
||||
// Remove the checkpoint of the sandbox.
|
||||
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
|
||||
errs = append(errs, err)
|
||||
|
@ -258,9 +292,6 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
|
|||
cID := kubecontainer.BuildContainerID(runtimeName, sandbox.ID)
|
||||
networkStatus, err := ds.network.GetPodNetworkStatus(metadata.Namespace, metadata.Name, cID)
|
||||
if err != nil {
|
||||
// This might be a sandbox that somehow ended up without a default
|
||||
// interface (eth0). We can't distinguish this from a more serious
|
||||
// error, so callers should probably treat it as non-fatal.
|
||||
return "", err
|
||||
}
|
||||
if networkStatus == nil {
|
||||
|
@ -272,29 +303,44 @@ func (ds *dockerService) getIPFromPlugin(sandbox *dockertypes.ContainerJSON) (st
|
|||
// getIP returns the ip given the output of `docker inspect` on a pod sandbox,
|
||||
// first interrogating any registered plugins, then simply trusting the ip
|
||||
// in the sandbox itself. We look for an ipv4 address before ipv6.
|
||||
func (ds *dockerService) getIP(sandbox *dockertypes.ContainerJSON) (string, error) {
|
||||
func (ds *dockerService) getIP(podSandboxID string, sandbox *dockertypes.ContainerJSON) string {
|
||||
if sandbox.NetworkSettings == nil {
|
||||
return "", nil
|
||||
return ""
|
||||
}
|
||||
if sharesHostNetwork(sandbox) {
|
||||
// For sandboxes using host network, the shim is not responsible for
|
||||
// reporting the IP.
|
||||
return "", nil
|
||||
return ""
|
||||
}
|
||||
if IP, err := ds.getIPFromPlugin(sandbox); err != nil {
|
||||
glog.Warningf("%v", err)
|
||||
} else if IP != "" {
|
||||
return IP, nil
|
||||
|
||||
// Don't bother getting IP if the pod is known and networking isn't ready
|
||||
ready, ok := ds.getNetworkReady(podSandboxID)
|
||||
if ok && !ready {
|
||||
return ""
|
||||
}
|
||||
|
||||
ip, err := ds.getIPFromPlugin(sandbox)
|
||||
if err == nil {
|
||||
return ip
|
||||
}
|
||||
|
||||
// TODO: trusting the docker ip is not a great idea. However docker uses
|
||||
// eth0 by default and so does CNI, so if we find a docker IP here, we
|
||||
// conclude that the plugin must have failed setup, or forgotten its ip.
|
||||
// This is not a sensible assumption for plugins across the board, but if
|
||||
// a plugin doesn't want this behavior, it can throw an error.
|
||||
if sandbox.NetworkSettings.IPAddress != "" {
|
||||
return sandbox.NetworkSettings.IPAddress, nil
|
||||
return sandbox.NetworkSettings.IPAddress
|
||||
}
|
||||
return sandbox.NetworkSettings.GlobalIPv6Address, nil
|
||||
if sandbox.NetworkSettings.GlobalIPv6Address != "" {
|
||||
return sandbox.NetworkSettings.GlobalIPv6Address
|
||||
}
|
||||
|
||||
// If all else fails, warn but don't return an error, as pod status
|
||||
// should generally not return anything except fatal errors
|
||||
// FIXME: handle network errors by restarting the pod somehow?
|
||||
glog.Warningf("failed to read pod IP from plugin/docker: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// PodSandboxStatus returns the status of the PodSandbox.
|
||||
|
@ -322,12 +368,8 @@ func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodS
|
|||
// TODO: Remove this when sandbox is available on windows
|
||||
// This is a workaround for windows, where sandbox is not in use, and pod IP is determined through containers belonging to the Pod.
|
||||
if IP = ds.determinePodIPBySandboxID(podSandboxID); IP == "" {
|
||||
IP, err = ds.getIP(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
IP = ds.getIP(podSandboxID, r)
|
||||
}
|
||||
}
|
||||
network := &runtimeapi.PodSandboxNetworkStatus{Ip: IP}
|
||||
hostNetwork := sharesHostNetwork(r)
|
||||
|
||||
// If the sandbox has no containerTypeLabelKey label, treat it as a legacy sandbox.
|
||||
|
@ -353,7 +395,9 @@ func (ds *dockerService) PodSandboxStatus(podSandboxID string) (*runtimeapi.PodS
|
|||
Metadata: metadata,
|
||||
Labels: labels,
|
||||
Annotations: annotations,
|
||||
Network: network,
|
||||
Network: &runtimeapi.PodSandboxNetworkStatus{
|
||||
Ip: IP,
|
||||
},
|
||||
Linux: &runtimeapi.LinuxPodSandboxStatus{
|
||||
Namespaces: &runtimeapi.Namespace{
|
||||
Options: &runtimeapi.NamespaceOption{
|
||||
|
|
|
@ -133,6 +133,8 @@ func TestSandboxStatus(t *testing.T) {
|
|||
expected.State = runtimeapi.PodSandboxState_SANDBOX_NOTREADY
|
||||
err = ds.StopPodSandbox(id)
|
||||
assert.NoError(t, err)
|
||||
// IP not valid after sandbox stop
|
||||
expected.Network.Ip = ""
|
||||
status, err = ds.PodSandboxStatus(id)
|
||||
assert.Equal(t, expected, status)
|
||||
|
||||
|
@ -143,6 +145,49 @@ func TestSandboxStatus(t *testing.T) {
|
|||
assert.Error(t, err, fmt.Sprintf("status of sandbox: %+v", status))
|
||||
}
|
||||
|
||||
// TestSandboxStatusAfterRestart tests that retrieving sandbox status returns
|
||||
// an IP address even if RunPodSandbox() was not yet called for this pod, as
|
||||
// would happen on kubelet restart
|
||||
func TestSandboxStatusAfterRestart(t *testing.T) {
|
||||
ds, _, fClock := newTestDockerService()
|
||||
config := makeSandboxConfig("foo", "bar", "1", 0)
|
||||
|
||||
// TODO: The following variables depend on the internal
|
||||
// implementation of FakeDockerClient, and should be fixed.
|
||||
fakeIP := "2.3.4.5"
|
||||
|
||||
state := runtimeapi.PodSandboxState_SANDBOX_READY
|
||||
ct := int64(0)
|
||||
hostNetwork := false
|
||||
expected := &runtimeapi.PodSandboxStatus{
|
||||
State: state,
|
||||
CreatedAt: ct,
|
||||
Metadata: config.Metadata,
|
||||
Network: &runtimeapi.PodSandboxNetworkStatus{Ip: fakeIP},
|
||||
Linux: &runtimeapi.LinuxPodSandboxStatus{Namespaces: &runtimeapi.Namespace{Options: &runtimeapi.NamespaceOption{HostNetwork: hostNetwork}}},
|
||||
Labels: map[string]string{},
|
||||
Annotations: map[string]string{},
|
||||
}
|
||||
|
||||
// Create the sandbox.
|
||||
fClock.SetTime(time.Now())
|
||||
expected.CreatedAt = fClock.Now().UnixNano()
|
||||
|
||||
createConfig, err := ds.makeSandboxDockerConfig(config, defaultSandboxImage)
|
||||
assert.NoError(t, err)
|
||||
|
||||
createResp, err := ds.client.CreateContainer(*createConfig)
|
||||
assert.NoError(t, err)
|
||||
err = ds.client.StartContainer(createResp.ID)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Check status without RunPodSandbox() having set up networking
|
||||
expected.Id = createResp.ID // ID is only known after the creation.
|
||||
status, err := ds.PodSandboxStatus(createResp.ID)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, expected, status)
|
||||
}
|
||||
|
||||
// TestNetworkPluginInvocation checks that the right SetUpPod and TearDownPod
|
||||
// calls are made when we run/stop a sandbox.
|
||||
func TestNetworkPluginInvocation(t *testing.T) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/blang/semver"
|
||||
|
@ -175,6 +176,7 @@ func NewDockerService(client libdocker.Interface, seccompProfileRoot string, pod
|
|||
containerManager: cm.NewContainerManager(cgroupsName, client),
|
||||
checkpointHandler: checkpointHandler,
|
||||
disableSharedPID: disableSharedPID,
|
||||
networkReady: make(map[string]bool),
|
||||
}
|
||||
|
||||
// check docker version compatibility.
|
||||
|
@ -248,7 +250,12 @@ type dockerService struct {
|
|||
podSandboxImage string
|
||||
streamingRuntime *streamingRuntime
|
||||
streamingServer streaming.Server
|
||||
|
||||
network *network.PluginManager
|
||||
// Map of podSandboxID :: network-is-ready
|
||||
networkReady map[string]bool
|
||||
networkReadyLock sync.Mutex
|
||||
|
||||
containerManager cm.ContainerManager
|
||||
// cgroup driver used by Docker runtime.
|
||||
cgroupDriver string
|
||||
|
@ -315,7 +322,7 @@ func (ds *dockerService) GetNetNS(podSandboxID string) (string, error) {
|
|||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return getNetworkNamespace(r), nil
|
||||
return getNetworkNamespace(r)
|
||||
}
|
||||
|
||||
// GetPodPortMappings returns the port mappings of the given podSandbox ID.
|
||||
|
|
|
@ -46,8 +46,14 @@ func newTestDockerService() (*dockerService, *libdocker.FakeDockerClient, *clock
|
|||
fakeClock := clock.NewFakeClock(time.Time{})
|
||||
c := libdocker.NewFakeDockerClient().WithClock(fakeClock).WithVersion("1.11.2", "1.23")
|
||||
pm := network.NewPluginManager(&network.NoopNetworkPlugin{})
|
||||
return &dockerService{client: c, os: &containertest.FakeOS{}, network: pm,
|
||||
legacyCleanup: legacyCleanupFlag{done: 1}, checkpointHandler: NewTestPersistentCheckpointHandler()}, c, fakeClock
|
||||
return &dockerService{
|
||||
client: c,
|
||||
os: &containertest.FakeOS{},
|
||||
network: pm,
|
||||
legacyCleanup: legacyCleanupFlag{done: 1},
|
||||
checkpointHandler: NewTestPersistentCheckpointHandler(),
|
||||
networkReady: make(map[string]bool),
|
||||
}, c, fakeClock
|
||||
}
|
||||
|
||||
func newTestDockerServiceWithVersionCache() (*dockerService, *libdocker.FakeDockerClient, *clock.FakeClock) {
|
||||
|
|
|
@ -266,14 +266,12 @@ func getApparmorSecurityOpts(sc *runtimeapi.LinuxContainerSecurityContext, separ
|
|||
return fmtOpts, nil
|
||||
}
|
||||
|
||||
func getNetworkNamespace(c *dockertypes.ContainerJSON) string {
|
||||
func getNetworkNamespace(c *dockertypes.ContainerJSON) (string, error) {
|
||||
if c.State.Pid == 0 {
|
||||
// Docker reports pid 0 for an exited container. We can't use it to
|
||||
// check the network namespace, so return an empty string instead.
|
||||
glog.V(4).Infof("Cannot find network namespace for the terminated container %q", c.ID)
|
||||
return ""
|
||||
// Docker reports pid 0 for an exited container.
|
||||
return "", fmt.Errorf("Cannot find network namespace for the terminated container %q", c.ID)
|
||||
}
|
||||
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid)
|
||||
return fmt.Sprintf(dockerNetNSFmt, c.State.Pid), nil
|
||||
}
|
||||
|
||||
// dockerFilter wraps around dockerfilters.Args and provides methods to modify
|
||||
|
|
|
@ -251,9 +251,11 @@ func (plugin *cniNetworkPlugin) TearDownPod(namespace string, name string, id ku
|
|||
if err := plugin.checkInitialized(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Lack of namespace should not be fatal on teardown
|
||||
netnsPath, err := plugin.host.GetNetNS(id.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
|
||||
glog.Warningf("CNI failed to retrieve network namespace path: %v", err)
|
||||
}
|
||||
|
||||
return plugin.deleteFromNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)
|
||||
|
|
|
@ -741,9 +741,9 @@ func podIsExited(p *kubecontainer.Pod) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID) (*libcni.RuntimeConf, error) {
|
||||
func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubecontainer.ContainerID, needNetNs bool) (*libcni.RuntimeConf, error) {
|
||||
netnsPath, err := plugin.host.GetNetNS(id.ID)
|
||||
if err != nil {
|
||||
if needNetNs && err != nil {
|
||||
glog.Errorf("Kubenet failed to retrieve network namespace path: %v", err)
|
||||
}
|
||||
|
||||
|
@ -755,7 +755,7 @@ func (plugin *kubenetNetworkPlugin) buildCNIRuntimeConf(ifName string, id kubeco
|
|||
}
|
||||
|
||||
func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) (cnitypes.Result, error) {
|
||||
rt, err := plugin.buildCNIRuntimeConf(ifName, id)
|
||||
rt, err := plugin.buildCNIRuntimeConf(ifName, id, true)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error building CNI config: %v", err)
|
||||
}
|
||||
|
@ -769,7 +769,7 @@ func (plugin *kubenetNetworkPlugin) addContainerToNetwork(config *libcni.Network
|
|||
}
|
||||
|
||||
func (plugin *kubenetNetworkPlugin) delContainerFromNetwork(config *libcni.NetworkConfig, ifName, namespace, name string, id kubecontainer.ContainerID) error {
|
||||
rt, err := plugin.buildCNIRuntimeConf(ifName, id)
|
||||
rt, err := plugin.buildCNIRuntimeConf(ifName, id, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error building CNI config: %v", err)
|
||||
}
|
||||
|
|
|
@ -144,6 +144,8 @@ type Host interface {
|
|||
// CNI plugin wrappers like kubenet.
|
||||
type NamespaceGetter interface {
|
||||
// GetNetNS returns network namespace information for the given containerID.
|
||||
// Runtimes should *never* return an empty namespace and nil error for
|
||||
// a container; if error is nil then the namespace string must be valid.
|
||||
GetNetNS(containerID string) (string, error)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue