mirror of https://github.com/k3s-io/k3s
415 lines
13 KiB
Go
415 lines
13 KiB
Go
/*
|
|
Copyright 2015 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package images
|
|
|
|
import (
|
|
goerrors "errors"
|
|
"fmt"
|
|
"math"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"k8s.io/klog"
|
|
|
|
"k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/util/errors"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
"k8s.io/client-go/tools/record"
|
|
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
|
"k8s.io/kubernetes/pkg/kubelet/container"
|
|
"k8s.io/kubernetes/pkg/kubelet/events"
|
|
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
|
|
)
|
|
|
|
// StatsProvider is an interface for fetching stats used during image garbage
|
|
// collection.
|
|
type StatsProvider interface {
|
|
// ImageFsStats returns the stats of the image filesystem.
|
|
ImageFsStats() (*statsapi.FsStats, error)
|
|
}
|
|
|
|
// ImageGCManager is an interface for managing lifecycle of all images.
|
|
// Implementation is thread-safe.
|
|
type ImageGCManager interface {
|
|
// Applies the garbage collection policy. Errors include being unable to free
|
|
// enough space as per the garbage collection policy.
|
|
GarbageCollect() error
|
|
|
|
// Start async garbage collection of images.
|
|
Start()
|
|
|
|
GetImageList() ([]container.Image, error)
|
|
|
|
// Delete all unused images.
|
|
DeleteUnusedImages() error
|
|
}
|
|
|
|
// ImageGCPolicy is a policy for garbage collecting images. Policy defines an allowed band in
|
|
// which garbage collection will be run.
|
|
type ImageGCPolicy struct {
|
|
// Any usage above this threshold will always trigger garbage collection.
|
|
// This is the highest usage we will allow.
|
|
HighThresholdPercent int
|
|
|
|
// Any usage below this threshold will never trigger garbage collection.
|
|
// This is the lowest threshold we will try to garbage collect to.
|
|
LowThresholdPercent int
|
|
|
|
// Minimum age at which an image can be garbage collected.
|
|
MinAge time.Duration
|
|
}
|
|
|
|
type realImageGCManager struct {
|
|
// Container runtime
|
|
runtime container.Runtime
|
|
|
|
// Records of images and their use.
|
|
imageRecords map[string]*imageRecord
|
|
imageRecordsLock sync.Mutex
|
|
|
|
// The image garbage collection policy in use.
|
|
policy ImageGCPolicy
|
|
|
|
// statsProvider provides stats used during image garbage collection.
|
|
statsProvider StatsProvider
|
|
|
|
// Recorder for Kubernetes events.
|
|
recorder record.EventRecorder
|
|
|
|
// Reference to this node.
|
|
nodeRef *v1.ObjectReference
|
|
|
|
// Track initialization
|
|
initialized bool
|
|
|
|
// imageCache is the cache of latest image list.
|
|
imageCache imageCache
|
|
|
|
// sandbox image exempted from GC
|
|
sandboxImage string
|
|
}
|
|
|
|
// imageCache caches latest result of ListImages.
|
|
type imageCache struct {
|
|
// sync.RWMutex is the mutex protects the image cache.
|
|
sync.RWMutex
|
|
// images is the image cache.
|
|
images []container.Image
|
|
}
|
|
|
|
// set updates image cache.
|
|
func (i *imageCache) set(images []container.Image) {
|
|
i.Lock()
|
|
defer i.Unlock()
|
|
i.images = images
|
|
}
|
|
|
|
// get gets a sorted (by image size) image list from image cache.
|
|
// There is a potentical data race in this function. See PR #60448
|
|
// Because there is deepcopy function available currently, move sort
|
|
// function inside this function
|
|
func (i *imageCache) get() []container.Image {
|
|
i.Lock()
|
|
defer i.Unlock()
|
|
sort.Sort(sliceutils.ByImageSize(i.images))
|
|
return i.images
|
|
}
|
|
|
|
// Information about the images we track.
|
|
type imageRecord struct {
|
|
// Time when this image was first detected.
|
|
firstDetected time.Time
|
|
|
|
// Time when we last saw this image being used.
|
|
lastUsed time.Time
|
|
|
|
// Size of the image in bytes.
|
|
size int64
|
|
}
|
|
|
|
// NewImageGCManager instantiates a new ImageGCManager object.
|
|
func NewImageGCManager(runtime container.Runtime, statsProvider StatsProvider, recorder record.EventRecorder, nodeRef *v1.ObjectReference, policy ImageGCPolicy, sandboxImage string) (ImageGCManager, error) {
|
|
// Validate policy.
|
|
if policy.HighThresholdPercent < 0 || policy.HighThresholdPercent > 100 {
|
|
return nil, fmt.Errorf("invalid HighThresholdPercent %d, must be in range [0-100]", policy.HighThresholdPercent)
|
|
}
|
|
if policy.LowThresholdPercent < 0 || policy.LowThresholdPercent > 100 {
|
|
return nil, fmt.Errorf("invalid LowThresholdPercent %d, must be in range [0-100]", policy.LowThresholdPercent)
|
|
}
|
|
if policy.LowThresholdPercent > policy.HighThresholdPercent {
|
|
return nil, fmt.Errorf("LowThresholdPercent %d can not be higher than HighThresholdPercent %d", policy.LowThresholdPercent, policy.HighThresholdPercent)
|
|
}
|
|
im := &realImageGCManager{
|
|
runtime: runtime,
|
|
policy: policy,
|
|
imageRecords: make(map[string]*imageRecord),
|
|
statsProvider: statsProvider,
|
|
recorder: recorder,
|
|
nodeRef: nodeRef,
|
|
initialized: false,
|
|
sandboxImage: sandboxImage,
|
|
}
|
|
|
|
return im, nil
|
|
}
|
|
|
|
func (im *realImageGCManager) Start() {
|
|
go wait.Until(func() {
|
|
// Initial detection make detected time "unknown" in the past.
|
|
var ts time.Time
|
|
if im.initialized {
|
|
ts = time.Now()
|
|
}
|
|
_, err := im.detectImages(ts)
|
|
if err != nil {
|
|
klog.Warningf("[imageGCManager] Failed to monitor images: %v", err)
|
|
} else {
|
|
im.initialized = true
|
|
}
|
|
}, 5*time.Minute, wait.NeverStop)
|
|
|
|
// Start a goroutine periodically updates image cache.
|
|
// TODO(random-liu): Merge this with the previous loop.
|
|
go wait.Until(func() {
|
|
images, err := im.runtime.ListImages()
|
|
if err != nil {
|
|
klog.Warningf("[imageGCManager] Failed to update image list: %v", err)
|
|
} else {
|
|
im.imageCache.set(images)
|
|
}
|
|
}, 30*time.Second, wait.NeverStop)
|
|
|
|
}
|
|
|
|
// Get a list of images on this node
|
|
func (im *realImageGCManager) GetImageList() ([]container.Image, error) {
|
|
return im.imageCache.get(), nil
|
|
}
|
|
|
|
func (im *realImageGCManager) detectImages(detectTime time.Time) (sets.String, error) {
|
|
imagesInUse := sets.NewString()
|
|
|
|
// Always consider the container runtime pod sandbox image in use
|
|
imageRef, err := im.runtime.GetImageRef(container.ImageSpec{Image: im.sandboxImage})
|
|
if err == nil && imageRef != "" {
|
|
imagesInUse.Insert(imageRef)
|
|
}
|
|
|
|
images, err := im.runtime.ListImages()
|
|
if err != nil {
|
|
return imagesInUse, err
|
|
}
|
|
pods, err := im.runtime.GetPods(true)
|
|
if err != nil {
|
|
return imagesInUse, err
|
|
}
|
|
|
|
// Make a set of images in use by containers.
|
|
for _, pod := range pods {
|
|
for _, container := range pod.Containers {
|
|
klog.V(5).Infof("Pod %s/%s, container %s uses image %s(%s)", pod.Namespace, pod.Name, container.Name, container.Image, container.ImageID)
|
|
imagesInUse.Insert(container.ImageID)
|
|
}
|
|
}
|
|
|
|
// Add new images and record those being used.
|
|
now := time.Now()
|
|
currentImages := sets.NewString()
|
|
im.imageRecordsLock.Lock()
|
|
defer im.imageRecordsLock.Unlock()
|
|
for _, image := range images {
|
|
klog.V(5).Infof("Adding image ID %s to currentImages", image.ID)
|
|
currentImages.Insert(image.ID)
|
|
|
|
// New image, set it as detected now.
|
|
if _, ok := im.imageRecords[image.ID]; !ok {
|
|
klog.V(5).Infof("Image ID %s is new", image.ID)
|
|
im.imageRecords[image.ID] = &imageRecord{
|
|
firstDetected: detectTime,
|
|
}
|
|
}
|
|
|
|
// Set last used time to now if the image is being used.
|
|
if isImageUsed(image.ID, imagesInUse) {
|
|
klog.V(5).Infof("Setting Image ID %s lastUsed to %v", image.ID, now)
|
|
im.imageRecords[image.ID].lastUsed = now
|
|
}
|
|
|
|
klog.V(5).Infof("Image ID %s has size %d", image.ID, image.Size)
|
|
im.imageRecords[image.ID].size = image.Size
|
|
}
|
|
|
|
// Remove old images from our records.
|
|
for image := range im.imageRecords {
|
|
if !currentImages.Has(image) {
|
|
klog.V(5).Infof("Image ID %s is no longer present; removing from imageRecords", image)
|
|
delete(im.imageRecords, image)
|
|
}
|
|
}
|
|
|
|
return imagesInUse, nil
|
|
}
|
|
|
|
func (im *realImageGCManager) GarbageCollect() error {
|
|
// Get disk usage on disk holding images.
|
|
fsStats, err := im.statsProvider.ImageFsStats()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var capacity, available int64
|
|
if fsStats.CapacityBytes != nil {
|
|
capacity = int64(*fsStats.CapacityBytes)
|
|
}
|
|
if fsStats.AvailableBytes != nil {
|
|
available = int64(*fsStats.AvailableBytes)
|
|
}
|
|
|
|
if available > capacity {
|
|
klog.Warningf("available %d is larger than capacity %d", available, capacity)
|
|
available = capacity
|
|
}
|
|
|
|
// Check valid capacity.
|
|
if capacity == 0 {
|
|
err := goerrors.New("invalid capacity 0 on image filesystem")
|
|
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.InvalidDiskCapacity, err.Error())
|
|
return err
|
|
}
|
|
|
|
// If over the max threshold, free enough to place us at the lower threshold.
|
|
usagePercent := 100 - int(available*100/capacity)
|
|
if usagePercent >= im.policy.HighThresholdPercent {
|
|
amountToFree := capacity*int64(100-im.policy.LowThresholdPercent)/100 - available
|
|
klog.Infof("[imageGCManager]: Disk usage on image filesystem is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes down to the low threshold (%d%%).", usagePercent, im.policy.HighThresholdPercent, amountToFree, im.policy.LowThresholdPercent)
|
|
freed, err := im.freeSpace(amountToFree, time.Now())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if freed < amountToFree {
|
|
err := fmt.Errorf("failed to garbage collect required amount of images. Wanted to free %d bytes, but freed %d bytes", amountToFree, freed)
|
|
im.recorder.Eventf(im.nodeRef, v1.EventTypeWarning, events.FreeDiskSpaceFailed, err.Error())
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (im *realImageGCManager) DeleteUnusedImages() error {
|
|
klog.Infof("attempting to delete unused images")
|
|
_, err := im.freeSpace(math.MaxInt64, time.Now())
|
|
return err
|
|
}
|
|
|
|
// Tries to free bytesToFree worth of images on the disk.
|
|
//
|
|
// Returns the number of bytes free and an error if any occurred. The number of
|
|
// bytes freed is always returned.
|
|
// Note that error may be nil and the number of bytes free may be less
|
|
// than bytesToFree.
|
|
func (im *realImageGCManager) freeSpace(bytesToFree int64, freeTime time.Time) (int64, error) {
|
|
imagesInUse, err := im.detectImages(freeTime)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
im.imageRecordsLock.Lock()
|
|
defer im.imageRecordsLock.Unlock()
|
|
|
|
// Get all images in eviction order.
|
|
images := make([]evictionInfo, 0, len(im.imageRecords))
|
|
for image, record := range im.imageRecords {
|
|
if isImageUsed(image, imagesInUse) {
|
|
klog.V(5).Infof("Image ID %s is being used", image)
|
|
continue
|
|
}
|
|
images = append(images, evictionInfo{
|
|
id: image,
|
|
imageRecord: *record,
|
|
})
|
|
}
|
|
sort.Sort(byLastUsedAndDetected(images))
|
|
|
|
// Delete unused images until we've freed up enough space.
|
|
var deletionErrors []error
|
|
spaceFreed := int64(0)
|
|
for _, image := range images {
|
|
klog.V(5).Infof("Evaluating image ID %s for possible garbage collection", image.id)
|
|
// Images that are currently in used were given a newer lastUsed.
|
|
if image.lastUsed.Equal(freeTime) || image.lastUsed.After(freeTime) {
|
|
klog.V(5).Infof("Image ID %s has lastUsed=%v which is >= freeTime=%v, not eligible for garbage collection", image.id, image.lastUsed, freeTime)
|
|
continue
|
|
}
|
|
|
|
// Avoid garbage collect the image if the image is not old enough.
|
|
// In such a case, the image may have just been pulled down, and will be used by a container right away.
|
|
|
|
if freeTime.Sub(image.firstDetected) < im.policy.MinAge {
|
|
klog.V(5).Infof("Image ID %s has age %v which is less than the policy's minAge of %v, not eligible for garbage collection", image.id, freeTime.Sub(image.firstDetected), im.policy.MinAge)
|
|
continue
|
|
}
|
|
|
|
// Remove image. Continue despite errors.
|
|
klog.Infof("[imageGCManager]: Removing image %q to free %d bytes", image.id, image.size)
|
|
err := im.runtime.RemoveImage(container.ImageSpec{Image: image.id})
|
|
if err != nil {
|
|
deletionErrors = append(deletionErrors, err)
|
|
continue
|
|
}
|
|
delete(im.imageRecords, image.id)
|
|
spaceFreed += image.size
|
|
|
|
if spaceFreed >= bytesToFree {
|
|
break
|
|
}
|
|
}
|
|
|
|
if len(deletionErrors) > 0 {
|
|
return spaceFreed, fmt.Errorf("wanted to free %d bytes, but freed %d bytes space with errors in image deletion: %v", bytesToFree, spaceFreed, errors.NewAggregate(deletionErrors))
|
|
}
|
|
return spaceFreed, nil
|
|
}
|
|
|
|
type evictionInfo struct {
|
|
id string
|
|
imageRecord
|
|
}
|
|
|
|
type byLastUsedAndDetected []evictionInfo
|
|
|
|
func (ev byLastUsedAndDetected) Len() int { return len(ev) }
|
|
func (ev byLastUsedAndDetected) Swap(i, j int) { ev[i], ev[j] = ev[j], ev[i] }
|
|
func (ev byLastUsedAndDetected) Less(i, j int) bool {
|
|
// Sort by last used, break ties by detected.
|
|
if ev[i].lastUsed.Equal(ev[j].lastUsed) {
|
|
return ev[i].firstDetected.Before(ev[j].firstDetected)
|
|
}
|
|
return ev[i].lastUsed.Before(ev[j].lastUsed)
|
|
}
|
|
|
|
func isImageUsed(imageID string, imagesInUse sets.String) bool {
|
|
// Check the image ID.
|
|
if _, ok := imagesInUse[imageID]; ok {
|
|
return true
|
|
}
|
|
return false
|
|
}
|