Merge pull request #40448 from Random-Liu/work-around-container-create-conflict

Automatic merge from submit-queue (batch tested with PRs 40239, 40397, 40449, 40448, 40360)

CRI: Work around container create conflict.

Fixes https://github.com/kubernetes/kubernetes/issues/40443.

This PR added a random suffix in the container name when we:
* Failed to create the container because of "Conflict".
* And failed to remove the container because of "No such container".

@yujuhong @feiskyer 
/cc @kubernetes/sig-node-bugs
pull/6/head
Kubernetes Submit Queue 2017-01-26 14:23:43 -08:00 committed by GitHub
commit 5f01179aff
7 changed files with 140 additions and 14 deletions

View File

@ -176,7 +176,9 @@ func (ds *dockerService) CreateContainer(podSandboxID string, config *runtimeapi
createConfig.HostConfig = hc
createResp, err := ds.client.CreateContainer(createConfig)
recoverFromConflictIfNeeded(ds.client, err)
if err != nil {
createResp, err = recoverFromCreationConflictIfNeeded(ds.client, createConfig, err)
}
if createResp != nil {
return createResp.ID, err

View File

@ -19,6 +19,7 @@ package dockershim
import (
"fmt"
"path/filepath"
"strings"
"testing"
"time"
@ -215,3 +216,69 @@ func TestContainerLogPath(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, fakeOS.Removes, []string{kubeletContainerLogPath})
}
// TestContainerCreationConflict tests the logic to work around docker container
// creation naming conflict bug.
func TestContainerCreationConflict(t *testing.T) {
sConfig := makeSandboxConfig("foo", "bar", "1", 0)
config := makeContainerConfig(sConfig, "pause", "iamimage", 0, map[string]string{}, map[string]string{})
containerName := makeContainerName(sConfig, config)
const sandboxId = "sandboxid"
const containerId = "containerid"
conflictError := fmt.Errorf("Error response from daemon: Conflict. The name \"/%s\" is already in use by container %s. You have to remove (or rename) that container to be able to reuse that name.",
containerName, containerId)
noContainerError := fmt.Errorf("Error response from daemon: No such container: %s", containerId)
randomError := fmt.Errorf("random error")
for desc, test := range map[string]struct {
createError error
removeError error
expectError error
expectCalls []string
expectFields int
}{
"no create error": {
expectCalls: []string{"create"},
expectFields: 6,
},
"random create error": {
createError: randomError,
expectError: randomError,
expectCalls: []string{"create"},
expectFields: 1,
},
"conflict create error with successful remove": {
createError: conflictError,
expectError: conflictError,
expectCalls: []string{"create", "remove"},
expectFields: 1,
},
"conflict create error with random remove error": {
createError: conflictError,
removeError: randomError,
expectError: conflictError,
expectCalls: []string{"create", "remove"},
expectFields: 1,
},
"conflict create error with no such container remove error": {
createError: conflictError,
removeError: noContainerError,
expectCalls: []string{"create", "remove", "create"},
expectFields: 7,
},
} {
t.Logf("TestCase: %s", desc)
ds, fDocker, _ := newTestDockerService()
if test.createError != nil {
fDocker.InjectError("create", test.createError)
}
if test.removeError != nil {
fDocker.InjectError("remove", test.removeError)
}
name, err := ds.CreateContainer(sandboxId, config, sConfig)
assert.Equal(t, test.expectError, err)
assert.NoError(t, fDocker.AssertCalls(test.expectCalls))
assert.Len(t, strings.Split(name, nameDelimiter), test.expectFields)
}
}

View File

@ -69,7 +69,9 @@ func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (str
return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
}
createResp, err := ds.client.CreateContainer(*createConfig)
recoverFromConflictIfNeeded(ds.client, err)
if err != nil {
createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
}
if err != nil || createResp == nil {
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)

View File

@ -39,7 +39,8 @@ const (
)
var (
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
conflictRE = regexp.MustCompile(`Conflict. (?:.)+ is already in use by container ([0-9a-z]+)`)
noContainerRE = regexp.MustCompile(`No such container: [0-9a-z]+`)
)
// apiVersion implements kubecontainer.Version interface by implementing
@ -295,22 +296,31 @@ func getUserFromImageUser(imageUser string) (*int64, string) {
// create a new container named FOO. To work around this, we parse the error
// message to identify failure caused by naming conflict, and try to remove
// the old container FOO.
// See #40443. Sometimes even removal may fail with "no such container" error.
// In that case we have to create the container with a randomized name.
// TODO(random-liu): Remove this work around after docker 1.11 is deprecated.
// TODO(#33189): Monitor the tests to see if the fix is sufficent.
func recoverFromConflictIfNeeded(client dockertools.DockerInterface, err error) {
if err == nil {
return
}
func recoverFromCreationConflictIfNeeded(client dockertools.DockerInterface, createConfig dockertypes.ContainerCreateConfig, err error) (*dockertypes.ContainerCreateResponse, error) {
matches := conflictRE.FindStringSubmatch(err.Error())
if len(matches) != 2 {
return
return nil, err
}
id := matches[1]
glog.Warningf("Unable to create pod sandbox due to conflict. Attempting to remove sandbox %q", id)
if err := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
glog.Errorf("Failed to remove the conflicting sandbox container: %v", err)
if rmErr := client.RemoveContainer(id, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); rmErr == nil {
glog.V(2).Infof("Successfully removed conflicting container %q", id)
return nil, err
} else {
glog.V(2).Infof("Successfully removed conflicting sandbox %q", id)
glog.Errorf("Failed to remove the conflicting container %q: %v", id, rmErr)
// Return if the error is not "No such container" error.
if !noContainerRE.MatchString(rmErr.Error()) {
return nil, err
}
}
// randomize the name to avoid conflict.
createConfig.Name = randomizeName(createConfig.Name)
glog.V(2).Infof("Create the container with randomized name %s", createConfig.Name)
return client.CreateContainer(createConfig)
}

View File

@ -236,3 +236,12 @@ func TestParsingCreationConflictError(t *testing.T) {
require.Len(t, matches, 2)
require.Equal(t, matches[1], "24666ab8c814d16f986449e504ea0159468ddf8da01897144a770f66dce0e14e")
}
func TestParsingRemovalNoContainerError(t *testing.T) {
// Expected error message from docker.
match := "Error response from daemon: No such container: 96e914f31579e44fe49b239266385330a9b2125abeb9254badd9fca74580c95a"
notMatch := "Error response from daemon: Other errors"
assert.True(t, noContainerRE.MatchString(match))
assert.False(t, noContainerRE.MatchString(notMatch))
}

View File

@ -18,6 +18,7 @@ package dockershim
import (
"fmt"
"math/rand"
"strconv"
"strings"
@ -78,6 +79,15 @@ func makeContainerName(s *runtimeapi.PodSandboxConfig, c *runtimeapi.ContainerCo
}
// randomizeName randomizes the container name. This should only be used when we hit the
// docker container name conflict bug.
func randomizeName(name string) string {
return strings.Join([]string{
name,
fmt.Sprintf("%08x", rand.Uint32()),
}, nameDelimiter)
}
func parseUint32(s string) (uint32, error) {
n, err := strconv.ParseUint(s, 10, 32)
if err != nil {
@ -92,7 +102,9 @@ func parseSandboxName(name string) (*runtimeapi.PodSandboxMetadata, error) {
name = strings.TrimPrefix(name, "/")
parts := strings.Split(name, nameDelimiter)
if len(parts) != 6 {
// Tolerate the random suffix.
// TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated.
if len(parts) != 6 && len(parts) != 7 {
return nil, fmt.Errorf("failed to parse the sandbox name: %q", name)
}
if parts[0] != kubePrefix {
@ -118,7 +130,9 @@ func parseContainerName(name string) (*runtimeapi.ContainerMetadata, error) {
name = strings.TrimPrefix(name, "/")
parts := strings.Split(name, nameDelimiter)
if len(parts) != 6 {
// Tolerate the random suffix.
// TODO(random-liu): Remove 7 field case when docker 1.11 is deprecated.
if len(parts) != 6 && len(parts) != 7 {
return nil, fmt.Errorf("failed to parse the container name: %q", name)
}
if parts[0] != kubePrefix {

View File

@ -82,3 +82,25 @@ func TestNonParsableContainerNames(t *testing.T) {
_, err = parseContainerName("k8s_frontend_foo_bar_iamuid_notanumber")
assert.Error(t, err)
}
func TestParseRandomizedNames(t *testing.T) {
// Test randomized sandbox name.
sConfig := makeSandboxConfig("foo", "bar", "iamuid", 3)
sActualName := randomizeName(makeSandboxName(sConfig))
sActualMetadata, err := parseSandboxName(sActualName)
assert.NoError(t, err)
assert.Equal(t, sConfig.Metadata, sActualMetadata)
// Test randomized container name.
name, attempt := "pause", uint32(5)
config := &runtimeapi.ContainerConfig{
Metadata: &runtimeapi.ContainerMetadata{
Name: name,
Attempt: attempt,
},
}
actualName := randomizeName(makeContainerName(sConfig, config))
actualMetadata, err := parseContainerName(actualName)
assert.NoError(t, err)
assert.Equal(t, config.Metadata, actualMetadata)
}