mirror of https://github.com/k3s-io/k3s
dockershim: remove corrupt checkpoints immediately upon detection
parent
81fa823a6c
commit
a4bc7707d4
|
@ -174,7 +174,6 @@ pkg/kubelet/container/testing
|
|||
pkg/kubelet/custommetrics
|
||||
pkg/kubelet/dockershim
|
||||
pkg/kubelet/dockershim/cm
|
||||
pkg/kubelet/dockershim/errors
|
||||
pkg/kubelet/dockershim/libdocker
|
||||
pkg/kubelet/dockershim/remote
|
||||
pkg/kubelet/dockershim/testing
|
||||
|
|
|
@ -47,7 +47,6 @@ go_library(
|
|||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/dockershim/cm:go_default_library",
|
||||
"//pkg/kubelet/dockershim/errors:go_default_library",
|
||||
"//pkg/kubelet/dockershim/libdocker:go_default_library",
|
||||
"//pkg/kubelet/dockershim/metrics:go_default_library",
|
||||
"//pkg/kubelet/leaky:go_default_library",
|
||||
|
@ -142,7 +141,6 @@ filegroup(
|
|||
srcs = [
|
||||
":package-srcs",
|
||||
"//pkg/kubelet/dockershim/cm:all-srcs",
|
||||
"//pkg/kubelet/dockershim/errors:all-srcs",
|
||||
"//pkg/kubelet/dockershim/libdocker:all-srcs",
|
||||
"//pkg/kubelet/dockershim/metrics:all-srcs",
|
||||
"//pkg/kubelet/dockershim/remote:all-srcs",
|
||||
|
|
|
@ -23,7 +23,6 @@ import (
|
|||
"path/filepath"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
||||
utilstore "k8s.io/kubernetes/pkg/kubelet/util/store"
|
||||
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
|
||||
hashutil "k8s.io/kubernetes/pkg/util/hash"
|
||||
|
@ -113,12 +112,14 @@ func (handler *PersistentCheckpointHandler) GetCheckpoint(podSandboxID string) (
|
|||
//TODO: unmarhsal into a struct with just Version, check version, unmarshal into versioned type.
|
||||
err = json.Unmarshal(blob, &checkpoint)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to unmarshal checkpoint %q. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
|
||||
return &checkpoint, errors.CorruptCheckpointError
|
||||
glog.Errorf("Failed to unmarshal checkpoint %q, removing checkpoint. Checkpoint content: %q. ErrMsg: %v", podSandboxID, string(blob), err)
|
||||
handler.RemoveCheckpoint(podSandboxID)
|
||||
return nil, fmt.Errorf("failed to unmarshal checkpoint")
|
||||
}
|
||||
if checkpoint.CheckSum != calculateChecksum(checkpoint) {
|
||||
glog.Errorf("Checksum of checkpoint %q is not valid", podSandboxID)
|
||||
return &checkpoint, errors.CorruptCheckpointError
|
||||
glog.Errorf("Checksum of checkpoint %q is not valid, removing checkpoint", podSandboxID)
|
||||
handler.RemoveCheckpoint(podSandboxID)
|
||||
return nil, fmt.Errorf("checkpoint is corrupted")
|
||||
}
|
||||
return &checkpoint, nil
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ import (
|
|||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
|
@ -193,20 +192,10 @@ func (ds *dockerService) StopPodSandbox(podSandboxID string) error {
|
|||
// actions will only have sandbox ID and not have pod namespace and name information.
|
||||
// Return error if encounter any unexpected error.
|
||||
if checkpointErr != nil {
|
||||
if libdocker.IsContainerNotFoundError(statusErr) && checkpointErr == errors.CheckpointNotFoundError {
|
||||
if libdocker.IsContainerNotFoundError(statusErr) {
|
||||
glog.Warningf("Both sandbox container and checkpoint for id %q could not be found. "+
|
||||
"Proceed without further sandbox information.", podSandboxID)
|
||||
} else {
|
||||
if checkpointErr == errors.CorruptCheckpointError {
|
||||
// Remove the corrupted checkpoint so that the next
|
||||
// StopPodSandbox call can proceed. This may indicate that
|
||||
// some resources won't be reclaimed.
|
||||
// TODO (#43021): Fix this properly.
|
||||
glog.Warningf("Removing corrupted checkpoint %q: %+v", podSandboxID, *checkpoint)
|
||||
if err := ds.checkpointHandler.RemoveCheckpoint(podSandboxID); err != nil {
|
||||
glog.Warningf("Unable to remove corrupted checkpoint %q: %v", podSandboxID, err)
|
||||
}
|
||||
}
|
||||
return utilerrors.NewAggregate([]error{
|
||||
fmt.Errorf("failed to get checkpoint for sandbox %q: %v", podSandboxID, checkpointErr),
|
||||
fmt.Errorf("failed to get sandbox status: %v", statusErr)})
|
||||
|
@ -488,13 +477,6 @@ func (ds *dockerService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]
|
|||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(id)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to retrieve checkpoint for sandbox %q: %v", id, err)
|
||||
|
||||
if err == errors.CorruptCheckpointError {
|
||||
glog.Warningf("Removing corrupted checkpoint %q: %+v", id, *checkpoint)
|
||||
if err := ds.checkpointHandler.RemoveCheckpoint(id); err != nil {
|
||||
glog.Warningf("Unable to remove corrupted checkpoint %q: %v", id, err)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
result = append(result, checkpointToRuntimeAPISandbox(id, checkpoint))
|
||||
|
|
|
@ -38,7 +38,6 @@ import (
|
|||
kubecm "k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/cm"
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network/cni"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
|
||||
|
@ -366,10 +365,6 @@ func (ds *dockerService) GetPodPortMappings(podSandboxID string) ([]*hostport.Po
|
|||
checkpoint, err := ds.checkpointHandler.GetCheckpoint(podSandboxID)
|
||||
// Return empty portMappings if checkpoint is not found
|
||||
if err != nil {
|
||||
if err == errors.CheckpointNotFoundError {
|
||||
glog.Warningf("Failed to retrieve checkpoint for sandbox %q: %v", podSandboxID, err)
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
|
|
@ -1,25 +0,0 @@
|
|||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
load(
|
||||
"@io_bazel_rules_go//go:def.bzl",
|
||||
"go_library",
|
||||
)
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["errors.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/errors",
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
)
|
|
@ -1,22 +0,0 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package errors
|
||||
|
||||
import "fmt"
|
||||
|
||||
var CorruptCheckpointError = fmt.Errorf("checkpoint is corrupted.")
|
||||
var CheckpointNotFoundError = fmt.Errorf("checkpoint is not found.")
|
|
@ -9,7 +9,6 @@ go_library(
|
|||
name = "go_default_library",
|
||||
srcs = ["util.go"],
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/dockershim/testing",
|
||||
deps = ["//pkg/kubelet/dockershim/errors:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
|
|
|
@ -17,9 +17,8 @@ limitations under the License.
|
|||
package testing
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/errors"
|
||||
)
|
||||
|
||||
// MemStore is an implementation of CheckpointStore interface which stores checkpoint in memory.
|
||||
|
@ -44,7 +43,7 @@ func (mstore *MemStore) Read(key string) ([]byte, error) {
|
|||
defer mstore.Unlock()
|
||||
data, ok := mstore.mem[key]
|
||||
if !ok {
|
||||
return nil, errors.CheckpointNotFoundError
|
||||
return nil, fmt.Errorf("checkpoint is not found")
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue