mirror of https://github.com/k3s-io/k3s
Merge pull request #40448 from Random-Liu/work-around-container-create-conflict
Automatic merge from submit-queue (batch tested with PRs 40239, 40397, 40449, 40448, 40360) CRI: Work around container create conflict. Fixes https://github.com/kubernetes/kubernetes/issues/40443. This PR added a random suffix in the container name when we: * Failed to create the container because of "Conflict". * And failed to remove the container because of "No such container". @yujuhong @feiskyer /cc @kubernetes/sig-node-bugspull/6/head
commit
5f01179aff
|
@ -176,7 +176,9 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeapi
|
|||
|
||||
createConfig.HostConfig = hc
|
||||
createResp, err := ds.client.CreateContainer(createConfig)
|
||||
recoverFromConflictIfNeeded(ds.client, err)
|
||||
if err != nil {
|
||||
createResp, err = recoverFromCreationConflictIfNeeded(ds.client, createConfig, err)
|
||||
}
|
||||
|
||||
if createResp != nil {
|
||||
return createResp.ID, err
|
||||
|
|
|
@ -19,6 +19,7 @@ package dockershim
|
|||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -215,3 +216,69 @@ func TestContainerLogPath(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
assert.Equal(t, fakeOS.Removes, []string{kubeletContainerLogPath})
|
||||
}
|
||||
|
||||
// TestContainerCreationConflict tests the logic to work around docker container
|
||||
// creation naming conflict bug.
|
||||
func TestContainerCreationConflict(t *testing.T) {
|
||||
sConfig := makeSandboxConfig("foo", "bar", "1", 0)
|
||||
config := makeContainerConfig(sConfig, "pause", "iamimage", 0, map[string]string{}, map[string]string{})
|
||||
containerName := makeContainerName(sConfig, config)
|
||||
const sandboxId = "sandboxid"
|
||||
const containerId = "containerid"
|
||||
conflictError := fmt.Errorf("Error response from daemon: Conflict. The name \"/%s\" is already in use by container %s. You have to remove (or rename) that container to be able to reuse that name.",
|
||||
containerName, containerId)
|
||||
noContainerError := fmt.Errorf("Error response from daemon: No such container: %s", containerId)
|
||||
randomError := fmt.Errorf("random error")
|
||||
|
||||
for desc, test := range map[string]struct {
|
||||
createError error
|
||||
removeError error
|
||||
expectError error
|
||||
expectCalls []string
|
||||
expectFields int
|
||||
}{
|
||||
"no create error": {
|
||||
expectCalls: []string{"create"},
|
||||
expectFields: 6,
|
||||
},
|
||||
"random create error": {
|
||||
createError: randomError,
|
||||
expectError: randomError,
|
||||
expectCalls: []string{"create"},
|
||||
expectFields: 1,
|
||||
},
|
||||
"conflict create error with successful remove": {
|
||||
createError: conflictError,
|
||||
expectError: conflictError,
|
||||
expectCalls: []string{"create", "remove"},
|
||||
expectFields: 1,
|
||||
},
|
||||
"conflict create error with random remove error": {
|
||||
createError: conflictError,
|
||||
removeError: randomError,
|
||||
expectError: conflictError,
|
||||
expectCalls: []string{"create", "remove"},
|
||||
expectFields: 1,
|
||||
},
|
||||
"conflict create error with no such container remove error": {
|
||||
createError: conflictError,
|
||||
removeError: noContainerError,
|
||||
expectCalls: []string{"create", "remove", "create"},
|
||||
expectFields: 7,
|
||||
},
|
||||
} {
|
||||
t.Logf("TestCase: %s", desc)
|
||||
ds, fDocker, _ := newTestDockerService()
|
||||
|
||||
if test.createError != nil {
|
||||
fDocker.InjectError("create", test.createError)
|
||||
}
|
||||
if test.removeError != nil {
|
||||
fDocker.InjectError("remove", test.removeError)
|
||||
}
|
||||
name, err := ds.CreateContainer(sandboxId, config, sConfig)
|
||||
assert.Equal(t, test.expectError, err)
|
||||
assert.NoError(t, fDocker.AssertCalls(test.expectCalls))
|
||||
assert.Len(t, strings.Split(name, nameDelimiter), test.expectFields)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,7 +69,9 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
|
|||
return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
|
||||
}
|
||||
createResp, err := ds.client.CreateContainer(*createConfig)
|
||||
recoverFromConflictIfNeeded(ds.client, err)
|
||||
if err != nil {
|
||||
createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
|
||||
}
|
||||
|
||||
if err != nil || createResp == nil {
|
||||
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
|
||||
|
|
|
@ -39,7 +39,8 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
|
||||
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
|
||||
noContainerRE = regexp.MustCompile(`No such container: [0-9a-z]+`)
|
||||
)
|
||||
|
||||
// apiVersion implements kubecontainer.Version interface by implementing
|
||||
|
@ -295,22 +296,31 @@ func getUserFromImageUser(imageUser string) (*int64, string) {
|
|||
// create a new container named FOO. To work around this, we parse the error
|
||||
// message to identify failure caused by naming conflict, and try to remove
|
||||
// the old container FOO.
|
||||
// See #40443. Sometimes even removal may fail with "no such container" error.
|
||||
// In that case we have to create the container with a randomized name.
|
||||
// TODO(random-liu): Remove this work around after docker 1.11 is deprecated.
|
||||
// TODO(#33189): Monitor the tests to see if the fix is sufficent.
|
||||
func recoverFromConflictIfNeeded(client dockertools.DockerInterface, err error) {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
func recoverFromCreationConflictIfNeeded(client dockertools.DockerInterface, createConfig dockertypes.ContainerCreateConfig, err error) (*dockertypes.ContainerCreateResponse, error) {
|
||||
matches := conflictRE.FindStringSubmatch(err.Error())
|
||||
if len(matches) != 2 {
|
||||
return
|
||||
return nil, err
|
||||
}
|
||||
|
||||
id := matches[1]
|
||||
glog.Warningf("Unable to create pod sandbox due to conflict. Attempting to remove sandbox %q", id)
|
||||
if err := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
|
||||
glog.Errorf("Failed to remove the conflicting sandbox container: %v", err)
|
||||
if rmErr := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); rmErr == nil {
|
||||
glog.V(2).Infof("Successfully removed conflicting container %q", id)
|
||||
return nil, err
|
||||
} else {
|
||||
glog.V(2).Infof("Successfully removed conflicting sandbox %q", id)
|
||||
glog.Errorf("Failed to remove the conflicting container %q: %v", id, rmErr)
|
||||
// Return if the error is not "No such container" error.
|
||||
if !noContainerRE.MatchString(rmErr.Error()) {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// randomize the name to avoid conflict.
|
||||
createConfig.Name = randomizeName(createConfig.Name)
|
||||
glog.V(2).Infof("Create the container with randomized name %s", createConfig.Name)
|
||||
return client.CreateContainer(createConfig)
|
||||
}
|
||||
|
|
|
@ -236,3 +236,12 @@ func TestParsingCreationConflictError(t *testing.T) {
|
|||
require.Len(t, matches, 2)
|
||||
require.Equal(t, matches[1], "24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e")
|
||||
}
|
||||
|
||||
func TestParsingRemovalNoContainerError(t *testing.T) {
|
||||
// Expected error message from docker.
|
||||
match := "Error response from daemon: No such container: 96e914f31579e44fe49b239266385330a9b2125abeb9254badd9fca74580c95a"
|
||||
notMatch := "Error response from daemon: Other errors"
|
||||
|
||||
assert.True(t, noContainerRE.MatchString(match))
|
||||
assert.False(t, noContainerRE.MatchString(notMatch))
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package dockershim
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
|
@ -78,6 +79,15 @@ func makeContainerName(s *runtimeapi.PodSandboxConfig, c *runtimeapi.ContainerCo
|
|||
|
||||
}
|
||||
|
||||
// randomizeName randomizes the container name. This should only be used when we hit the
|
||||
// docker container name conflict bug.
|
||||
func randomizeName(name string) string {
|
||||
return strings.Join([]string{
|
||||
name,
|
||||
fmt.Sprintf("%08x", rand.Uint32()),
|
||||
}, nameDelimiter)
|
||||
}
|
||||
|
||||
func parseUint32(s string) (uint32, error) {
|
||||
n, err := strconv.ParseUint(s, 10, 32)
|
||||
if err != nil {
|
||||
|
@ -92,7 +102,9 @@ func parseSandboxName(name string) (*runtimeapi.PodSandboxMetadata, error) {
|
|||
name = strings.TrimPrefix(name, "/")
|
||||
|
||||
parts := strings.Split(name, nameDelimiter)
|
||||
if len(parts) != 6 {
|
||||
// Tolerate the random suffix.
|
||||
// TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated.
|
||||
if len(parts) != 6 && len(parts) != 7 {
|
||||
return nil, fmt.Errorf("failed to parse the sandbox name: %q", name)
|
||||
}
|
||||
if parts[0] != kubePrefix {
|
||||
|
@ -118,7 +130,9 @@ func parseContainerName(name string) (*runtimeapi.ContainerMetadata, error) {
|
|||
name = strings.TrimPrefix(name, "/")
|
||||
|
||||
parts := strings.Split(name, nameDelimiter)
|
||||
if len(parts) != 6 {
|
||||
// Tolerate the random suffix.
|
||||
// TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated.
|
||||
if len(parts) != 6 && len(parts) != 7 {
|
||||
return nil, fmt.Errorf("failed to parse the container name: %q", name)
|
||||
}
|
||||
if parts[0] != kubePrefix {
|
||||
|
|
|
@ -82,3 +82,25 @@ func TestNonParsableContainerNames(t *testing.T) {
|
|||
_, err = parseContainerName("k8s_frontend_foo_bar_iamuid_notanumber")
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestParseRandomizedNames(t *testing.T) {
|
||||
// Test randomized sandbox name.
|
||||
sConfig := makeSandboxConfig("foo", "bar", "iamuid", 3)
|
||||
sActualName := randomizeName(makeSandboxName(sConfig))
|
||||
sActualMetadata, err := parseSandboxName(sActualName)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, sConfig.Metadata, sActualMetadata)
|
||||
|
||||
// Test randomized container name.
|
||||
name, attempt := "pause", uint32(5)
|
||||
config := &runtimeapi.ContainerConfig{
|
||||
Metadata: &runtimeapi.ContainerMetadata{
|
||||
Name: name,
|
||||
Attempt: attempt,
|
||||
},
|
||||
}
|
||||
actualName := randomizeName(makeContainerName(sConfig, config))
|
||||
actualMetadata, err := parseContainerName(actualName)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, config.Metadata, actualMetadata)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue