mirror of https://github.com/k3s-io/k3s
Kubelet:
Centralize Capacity discovery of standard resources in Container manager. Have storage derive node capacity from container manager. Move certain cAdvisor interfaces to the cAdvisor package in the process. This patch fixes a bug in container manager where it was writing to a map without synchronization. Signed-off-by: Vishnu kannan <vishnuk@google.com>pull/6/head
parent
cb1f5605a6
commit
82f7820066
|
@ -225,3 +225,16 @@ func (cc *cadvisorClient) getFsInfo(label string) (cadvisorapiv2.FsInfo, error)
|
|||
func (cc *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) {
|
||||
return cc.WatchForEvents(request)
|
||||
}
|
||||
|
||||
// HasDedicatedImageFs returns true if the imagefs has a dedicated device.
|
||||
func (cc *cadvisorClient) HasDedicatedImageFs() (bool, error) {
|
||||
imageFsInfo, err := cc.ImagesFsInfo()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
rootFsInfo, err := cc.RootFsInfo()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return imageFsInfo.Device != rootFsInfo.Device, nil
|
||||
}
|
||||
|
|
|
@ -76,3 +76,7 @@ func (cu *cadvisorUnsupported) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
|
|||
func (cu *cadvisorUnsupported) WatchEvents(request *events.Request) (*events.EventChannel, error) {
|
||||
return nil, unsupportedErr
|
||||
}
|
||||
|
||||
func (cu *cadvisorUnsupported) HasDedicatedImageFs() (bool, error) {
|
||||
return false, unsupportedErr
|
||||
}
|
||||
|
|
|
@ -73,3 +73,7 @@ func (cu *cadvisorClient) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
|
|||
func (cu *cadvisorClient) WatchEvents(request *events.Request) (*events.EventChannel, error) {
|
||||
return &events.EventChannel{}, nil
|
||||
}
|
||||
|
||||
func (cu *cadvisorClient) HasDedicatedImageFs() (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
|
|
@ -73,3 +73,7 @@ func (c *Fake) RootFsInfo() (cadvisorapiv2.FsInfo, error) {
|
|||
func (c *Fake) WatchEvents(request *events.Request) (*events.EventChannel, error) {
|
||||
return new(events.EventChannel), nil
|
||||
}
|
||||
|
||||
func (c *Fake) HasDedicatedImageFs() (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
|
|
@ -83,3 +83,8 @@ func (c *Mock) WatchEvents(request *events.Request) (*events.EventChannel, error
|
|||
args := c.Called()
|
||||
return args.Get(0).(*events.EventChannel), args.Error(1)
|
||||
}
|
||||
|
||||
func (c *Mock) HasDedicatedImageFs() (bool, error) {
|
||||
args := c.Called()
|
||||
return args.Get(0).(bool), args.Error(1)
|
||||
}
|
||||
|
|
|
@ -41,4 +41,7 @@ type Interface interface {
|
|||
|
||||
// Get events streamed through passedChannel that fit the request.
|
||||
WatchEvents(request *events.Request) (*events.EventChannel, error)
|
||||
|
||||
// HasDedicatedImageFs returns true iff a dedicated image filesystem exists for storing images.
|
||||
HasDedicatedImageFs() (bool, error)
|
||||
}
|
||||
|
|
|
@ -60,6 +60,9 @@ type ContainerManager interface {
|
|||
// GetNodeAllocatable returns the amount of compute resources that have to be reserved from scheduling.
|
||||
GetNodeAllocatableReservation() v1.ResourceList
|
||||
|
||||
// GetCapacity returns the amount of compute resources tracked by container manager available on the node.
|
||||
GetCapacity() v1.ResourceList
|
||||
|
||||
// UpdateQOSCgroups performs housekeeping updates to ensure that the top
|
||||
// level QoS containers have their desired state in a thread-safe way
|
||||
UpdateQOSCgroups() error
|
||||
|
|
|
@ -224,6 +224,25 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
|||
} else {
|
||||
return nil, err
|
||||
}
|
||||
rootfs, err := cadvisorInterface.RootFsInfo()
|
||||
if err != nil {
|
||||
capacity[v1.ResourceStorageScratch] = resource.MustParse("0Gi")
|
||||
} else {
|
||||
for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) {
|
||||
capacity[rName] = rCap
|
||||
}
|
||||
}
|
||||
|
||||
if hasDedicatedImageFs, _ := cadvisorInterface.HasDedicatedImageFs(); hasDedicatedImageFs {
|
||||
imagesfs, err := cadvisorInterface.ImagesFsInfo()
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to get Image filesystem information: %v", err)
|
||||
} else {
|
||||
for rName, rCap := range cadvisor.StorageOverlayCapacityFromFsInfo(imagesfs) {
|
||||
capacity[rName] = rCap
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cgroupRoot := nodeConfig.CgroupRoot
|
||||
cgroupManager := NewCgroupManager(subsystems, nodeConfig.CgroupDriver)
|
||||
|
|
|
@ -58,6 +58,10 @@ func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList
|
|||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) GetCapacity() v1.ResourceList {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
|
||||
return &podContainerManagerStub{}
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import (
|
|||
clientv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
||||
"k8s.io/kubernetes/pkg/kubelet/events"
|
||||
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
|
||||
)
|
||||
|
@ -184,17 +183,6 @@ func (cm *containerManagerImpl) getNodeAllocatableAbsolute() v1.ResourceList {
|
|||
// GetNodeAllocatable returns amount of compute or storage resource that have to be reserved on this node from scheduling.
|
||||
func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
evictionReservation := hardEvictionReservation(cm.HardEvictionThresholds, cm.capacity)
|
||||
if _, ok := cm.capacity[v1.ResourceStorage]; !ok {
|
||||
if cm.cadvisorInterface != nil {
|
||||
if rootfs, err := cm.cadvisorInterface.RootFsInfo(); err == nil {
|
||||
for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) {
|
||||
cm.capacity[rName] = rCap
|
||||
}
|
||||
} else {
|
||||
glog.Warning("Error getting rootfs info: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
result := make(v1.ResourceList)
|
||||
for k := range cm.capacity {
|
||||
value := resource.NewQuantity(0, resource.DecimalSI)
|
||||
|
|
|
@ -1301,7 +1301,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() {
|
|||
glog.Fatalf("Failed to start cAdvisor %v", err)
|
||||
}
|
||||
// eviction manager must start after cadvisor because it needs to know if the container runtime has a dedicated imagefs
|
||||
kl.evictionManager.Start(kl, kl.GetActivePods, kl.podResourcesAreReclaimed, kl, evictionMonitoringPeriod)
|
||||
kl.evictionManager.Start(kl.cadvisor, kl.GetActivePods, kl.podResourcesAreReclaimed, kl, evictionMonitoringPeriod)
|
||||
}
|
||||
|
||||
// Run starts the kubelet reacting to config updates
|
||||
|
|
|
@ -45,19 +45,6 @@ func (kl *Kubelet) GetContainerInfo(podFullName string, podUID types.UID, contai
|
|||
return &ci, nil
|
||||
}
|
||||
|
||||
// HasDedicatedImageFs returns true if the imagefs has a dedicated device.
|
||||
func (kl *Kubelet) HasDedicatedImageFs() (bool, error) {
|
||||
imageFsInfo, err := kl.ImagesFsInfo()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
rootFsInfo, err := kl.RootFsInfo()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return imageFsInfo.Device != rootFsInfo.Device, nil
|
||||
}
|
||||
|
||||
// GetContainerInfoV2 returns stats (from Cadvisor) for containers.
|
||||
func (kl *Kubelet) GetContainerInfoV2(name string, options cadvisorapiv2.RequestOptions) (map[string]cadvisorapiv2.ContainerInfo, error) {
|
||||
return kl.cadvisor.ContainerInfoV2(name, options)
|
||||
|
|
|
@ -24,7 +24,6 @@ import (
|
|||
"github.com/stretchr/testify/require"
|
||||
|
||||
cadvisorapi "github.com/google/cadvisor/info/v1"
|
||||
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
kubecontainertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
|
||||
|
@ -251,34 +250,3 @@ func TestGetRawContainerInfoSubcontainers(t *testing.T) {
|
|||
assert.Len(t, result, 2)
|
||||
mockCadvisor.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestHasDedicatedImageFs(t *testing.T) {
|
||||
testCases := map[string]struct {
|
||||
imageFsInfo cadvisorapiv2.FsInfo
|
||||
rootFsInfo cadvisorapiv2.FsInfo
|
||||
expected bool
|
||||
}{
|
||||
"has-dedicated-image-fs": {
|
||||
imageFsInfo: cadvisorapiv2.FsInfo{Device: "123"},
|
||||
rootFsInfo: cadvisorapiv2.FsInfo{Device: "456"},
|
||||
expected: true,
|
||||
},
|
||||
"has-unified-image-fs": {
|
||||
imageFsInfo: cadvisorapiv2.FsInfo{Device: "123"},
|
||||
rootFsInfo: cadvisorapiv2.FsInfo{Device: "123"},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
for testName, testCase := range testCases {
|
||||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
defer testKubelet.Cleanup()
|
||||
kubelet := testKubelet.kubelet
|
||||
mockCadvisor := testKubelet.fakeCadvisor
|
||||
mockCadvisor.On("Start").Return(nil)
|
||||
mockCadvisor.On("ImagesFsInfo").Return(testCase.imageFsInfo, nil)
|
||||
mockCadvisor.On("RootFsInfo").Return(testCase.rootFsInfo, nil)
|
||||
actual, err := kubelet.HasDedicatedImageFs()
|
||||
assert.NoError(t, err, "test [%s]", testName)
|
||||
assert.Equal(t, testCase.expected, actual, "test [%s]", testName)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -549,6 +549,7 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
|
|||
node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(
|
||||
int64(kl.maxPods), resource.DecimalSI)
|
||||
}
|
||||
|
||||
if node.Status.NodeInfo.BootID != "" &&
|
||||
node.Status.NodeInfo.BootID != info.BootID {
|
||||
// TODO: This requires a transaction, either both node status is updated
|
||||
|
@ -557,25 +558,16 @@ func (kl *Kubelet) setNodeStatusMachineInfo(node *v1.Node) {
|
|||
"Node %s has been rebooted, boot id: %s", kl.nodeName, info.BootID)
|
||||
}
|
||||
node.Status.NodeInfo.BootID = info.BootID
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
rootfs, err := kl.GetCachedRootFsInfo()
|
||||
if err != nil {
|
||||
node.Status.Capacity[v1.ResourceStorageScratch] = resource.MustParse("0Gi")
|
||||
} else {
|
||||
for rName, rCap := range cadvisor.StorageScratchCapacityFromFsInfo(rootfs) {
|
||||
node.Status.Capacity[rName] = rCap
|
||||
}
|
||||
}
|
||||
|
||||
if hasDedicatedImageFs, _ := kl.HasDedicatedImageFs(); hasDedicatedImageFs {
|
||||
imagesfs, err := kl.ImagesFsInfo()
|
||||
if err != nil {
|
||||
node.Status.Capacity[v1.ResourceStorageOverlay] = resource.MustParse("0Gi")
|
||||
} else {
|
||||
for rName, rCap := range cadvisor.StorageOverlayCapacityFromFsInfo(imagesfs) {
|
||||
node.Status.Capacity[rName] = rCap
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
|
||||
// TODO: all the node resources should use GetCapacity instead of deriving the
|
||||
// capacity for every node status request
|
||||
initialCapacity := kl.containerManager.GetCapacity()
|
||||
if initialCapacity != nil {
|
||||
node.Status.Capacity[v1.ResourceStorageScratch] = initialCapacity[v1.ResourceStorageScratch]
|
||||
imageCapacity, ok := initialCapacity[v1.ResourceStorageOverlay]
|
||||
if ok {
|
||||
node.Status.Capacity[v1.ResourceStorageOverlay] = imageCapacity
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -115,12 +115,17 @@ func applyNodeStatusPatch(originalNode *v1.Node, patch []byte) (*v1.Node, error)
|
|||
type localCM struct {
|
||||
cm.ContainerManager
|
||||
allocatable v1.ResourceList
|
||||
capacity v1.ResourceList
|
||||
}
|
||||
|
||||
func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList {
|
||||
return lcm.allocatable
|
||||
}
|
||||
|
||||
func (lcm *localCM) GetCapacity() v1.ResourceList {
|
||||
return lcm.capacity
|
||||
}
|
||||
|
||||
func TestUpdateNewNodeStatus(t *testing.T) {
|
||||
// generate one more than maxImagesInNodeStatus in inputImageList
|
||||
inputImageList, expectedImageList := generateTestingImageList(maxImagesInNodeStatus + 1)
|
||||
|
@ -134,6 +139,10 @@ func TestUpdateNewNodeStatus(t *testing.T) {
|
|||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
kubeClient := testKubelet.fakeKubeClient
|
||||
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}
|
||||
|
@ -251,6 +260,18 @@ func TestUpdateNewNodeOutOfDiskStatusWithTransitionFrequency(t *testing.T) {
|
|||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
defer testKubelet.Cleanup()
|
||||
kubelet := testKubelet.kubelet
|
||||
kubelet.containerManager = &localCM{
|
||||
ContainerManager: cm.NewStubContainerManager(),
|
||||
allocatable: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
|
||||
kubeClient := testKubelet.fakeKubeClient
|
||||
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}
|
||||
kubeClient.ReactionChain = fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{existingNode}}).ReactionChain
|
||||
|
@ -319,6 +340,10 @@ func TestUpdateExistingNodeStatus(t *testing.T) {
|
|||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
|
||||
kubeClient := testKubelet.fakeKubeClient
|
||||
|
@ -503,6 +528,18 @@ func TestUpdateExistingNodeOutOfDiskStatusWithTransitionFrequency(t *testing.T)
|
|||
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
|
||||
defer testKubelet.Cleanup()
|
||||
kubelet := testKubelet.kubelet
|
||||
kubelet.containerManager = &localCM{
|
||||
ContainerManager: cm.NewStubContainerManager(),
|
||||
allocatable: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(20E9, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
|
||||
clock := testKubelet.fakeClock
|
||||
// Do not set nano second, because apiserver function doesn't support nano second. (Only support
|
||||
// RFC3339).
|
||||
|
@ -658,6 +695,10 @@ func TestUpdateNodeStatusWithRuntimeStateError(t *testing.T) {
|
|||
v1.ResourceCPU: *resource.NewMilliQuantity(200, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(100E6, resource.BinarySI),
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
|
||||
clock := testKubelet.fakeClock
|
||||
|
@ -1113,6 +1154,10 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) {
|
|||
allocatable: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(40000, resource.DecimalSI),
|
||||
},
|
||||
capacity: v1.ResourceList{
|
||||
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
|
||||
v1.ResourceMemory: *resource.NewQuantity(10E9, resource.BinarySI),
|
||||
},
|
||||
}
|
||||
kubeClient := testKubelet.fakeKubeClient
|
||||
existingNode := v1.Node{ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname}}
|
||||
|
|
Loading…
Reference in New Issue