2015-03-01 03:49:12 +00:00
/ *
2015-05-01 16:19:44 +00:00
Copyright 2015 The Kubernetes Authors All rights reserved .
2015-03-01 03:49:12 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubelet
import (
2015-03-16 03:37:19 +00:00
"fmt"
2015-03-01 03:49:12 +00:00
"sort"
"sync"
"time"
2015-08-05 22:05:17 +00:00
"github.com/golang/glog"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2015-09-03 21:40:58 +00:00
"k8s.io/kubernetes/pkg/client/record"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
2015-09-26 00:29:08 +00:00
"k8s.io/kubernetes/pkg/kubelet/container"
2015-12-02 08:53:56 +00:00
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
2015-09-09 17:45:01 +00:00
"k8s.io/kubernetes/pkg/util/sets"
2016-02-02 10:57:06 +00:00
"k8s.io/kubernetes/pkg/util/wait"
2015-03-01 03:49:12 +00:00
)
// Manages lifecycle of all images.
//
2015-03-16 03:37:19 +00:00
// Implementation is thread-safe.
2015-03-01 03:49:12 +00:00
type imageManager interface {
2015-03-16 03:37:19 +00:00
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect ( ) error
2015-03-01 03:49:12 +00:00
2015-05-05 18:15:12 +00:00
// Start async garbage collection of images.
Start ( ) error
2015-12-02 08:53:56 +00:00
GetImageList ( ) ( [ ] kubecontainer . Image , error )
2015-03-01 03:49:12 +00:00
// TODO(vmarmol): Have this subsume pulls as well.
}
2015-03-16 03:37:19 +00:00
// A policy for garbage collecting images. Policy defines an allowed band in
// which garbage collection will be run.
type ImageGCPolicy struct {
// Any usage above this threshold will always trigger garbage collection.
// This is the highest usage we will allow.
HighThresholdPercent int
// Any usage below this threshold will never trigger garbage collection.
// This is the lowest threshold we will try to garbage collect to.
LowThresholdPercent int
2016-01-24 08:54:51 +00:00
// Minimum age at which a image can be garbage collected.
MinAge time . Duration
2015-03-16 03:37:19 +00:00
}
2015-03-01 03:49:12 +00:00
type realImageManager struct {
2015-09-26 00:29:08 +00:00
// Container runtime
runtime container . Runtime
2015-03-01 03:49:12 +00:00
// Records of images and their use.
2015-03-16 03:37:19 +00:00
imageRecords map [ string ] * imageRecord
2015-03-01 03:49:12 +00:00
imageRecordsLock sync . Mutex
2015-03-16 03:37:19 +00:00
// The image garbage collection policy in use.
policy ImageGCPolicy
// cAdvisor instance.
cadvisor cadvisor . Interface
2015-03-27 20:12:48 +00:00
// Recorder for Kubernetes events.
recorder record . EventRecorder
// Reference to this node.
nodeRef * api . ObjectReference
2015-09-26 00:36:43 +00:00
// Track initialization
initialized bool
2015-03-01 03:49:12 +00:00
}
// Information about the images we track.
type imageRecord struct {
// Time when this image was first detected.
2015-12-17 12:46:56 +00:00
firstDetected time . Time
2015-03-01 03:49:12 +00:00
// Time when we last saw this image being used.
lastUsed time . Time
// Size of the image in bytes.
size int64
}
2015-09-26 00:29:08 +00:00
func newImageManager ( runtime container . Runtime , cadvisorInterface cadvisor . Interface , recorder record . EventRecorder , nodeRef * api . ObjectReference , policy ImageGCPolicy ) ( imageManager , error ) {
2015-03-16 03:37:19 +00:00
// Validate policy.
if policy . HighThresholdPercent < 0 || policy . HighThresholdPercent > 100 {
return nil , fmt . Errorf ( "invalid HighThresholdPercent %d, must be in range [0-100]" , policy . HighThresholdPercent )
}
if policy . LowThresholdPercent < 0 || policy . LowThresholdPercent > 100 {
return nil , fmt . Errorf ( "invalid LowThresholdPercent %d, must be in range [0-100]" , policy . LowThresholdPercent )
}
LowThresholdPercent can not be higher than HighThresholdPercent
if LowThresholdPercent > HighThresholdPercent, amountToFree at image_manager.go:208 is negative and image GC will not free memory properly.
Justification:
1) LowThresholdPercent > HighThresholdPercent implies (LowThresholdPercent * capacity / 100) > (HighThresholdPercent * capacity / 100)
2) usage is at least (HighThresholdPercent * capacity / 100)
3) amountToFree = usage - (LowThresholdPercent * capacity / 100)
Combining 1), 2) and 3) implies amountToFree can be negative.
What happens if amountToFree is negative? in freeSpace method, "for _, image := range images " loops at least once
and if everything goes fine, "delete(im.imageRecords, image.id)" is executed.
When checking for condition "if spaceFreed >= bytesToFree", it is always true as bytesToFree is negative
and spaceFreed is positive. The loop is finished, so is image GC.
At the end, only the oldest image is deleted. In situations where there is a lot of dead containers,
each container corresponing to distinct image, number of unused images can get higher.
If two new images get pulled in every 5 minutes, image GC will not work properly and will not free enough space.
Secondly, it will take a lot of time to free all unused images (hours depending on a number of unused images).
This is an incorrect configuration. Image GC should report it and refuse to work.
2015-12-02 13:10:32 +00:00
if policy . LowThresholdPercent > policy . HighThresholdPercent {
return nil , fmt . Errorf ( "LowThresholdPercent %d can not be higher than HighThresholdPercent %d" , policy . LowThresholdPercent , policy . HighThresholdPercent )
}
2015-03-16 03:37:19 +00:00
im := & realImageManager {
2015-09-26 00:29:08 +00:00
runtime : runtime ,
2015-03-16 03:37:19 +00:00
policy : policy ,
2015-03-01 03:49:12 +00:00
imageRecords : make ( map [ string ] * imageRecord ) ,
2015-03-16 03:37:19 +00:00
cadvisor : cadvisorInterface ,
2015-03-27 20:12:48 +00:00
recorder : recorder ,
nodeRef : nodeRef ,
2015-09-26 00:36:43 +00:00
initialized : false ,
2015-03-16 03:37:19 +00:00
}
return im , nil
2015-03-01 03:49:12 +00:00
}
2015-05-05 18:15:12 +00:00
func ( im * realImageManager ) Start ( ) error {
2016-02-02 10:57:06 +00:00
go wait . Until ( func ( ) {
2015-09-26 00:36:43 +00:00
// Initial detection make detected time "unknown" in the past.
var ts time . Time
if im . initialized {
ts = time . Now ( )
}
err := im . detectImages ( ts )
2015-03-01 03:49:12 +00:00
if err != nil {
glog . Warningf ( "[ImageManager] Failed to monitor images: %v" , err )
2015-09-26 00:36:43 +00:00
} else {
im . initialized = true
2015-03-01 03:49:12 +00:00
}
2016-02-02 10:57:06 +00:00
} , 5 * time . Minute , wait . NeverStop )
2015-03-01 03:49:12 +00:00
return nil
}
2015-12-02 08:53:56 +00:00
// Get a list of images on this node
func ( im * realImageManager ) GetImageList ( ) ( [ ] kubecontainer . Image , error ) {
images , err := im . runtime . ListImages ( )
if err != nil {
return nil , err
}
return images , nil
}
2016-01-26 02:35:11 +00:00
func ( im * realImageManager ) detectImages ( detectTime time . Time ) error {
2015-09-26 00:29:08 +00:00
images , err := im . runtime . ListImages ( )
2015-03-01 03:49:12 +00:00
if err != nil {
return err
}
2015-09-26 00:29:08 +00:00
pods , err := im . runtime . GetPods ( true )
2015-03-01 03:49:12 +00:00
if err != nil {
return err
}
// Make a set of images in use by containers.
2015-09-09 17:45:01 +00:00
imagesInUse := sets . NewString ( )
2015-09-26 00:29:08 +00:00
for _ , pod := range pods {
for _ , container := range pod . Containers {
imagesInUse . Insert ( container . Image )
}
2015-03-01 03:49:12 +00:00
}
// Add new images and record those being used.
now := time . Now ( )
2015-09-09 17:45:01 +00:00
currentImages := sets . NewString ( )
2015-04-20 03:26:07 +00:00
im . imageRecordsLock . Lock ( )
defer im . imageRecordsLock . Unlock ( )
2015-03-01 03:49:12 +00:00
for _ , image := range images {
currentImages . Insert ( image . ID )
// New image, set it as detected now.
2015-04-20 03:26:07 +00:00
if _ , ok := im . imageRecords [ image . ID ] ; ! ok {
im . imageRecords [ image . ID ] = & imageRecord {
2016-01-26 02:35:11 +00:00
firstDetected : detectTime ,
2015-03-01 03:49:12 +00:00
}
}
// Set last used time to now if the image is being used.
2015-09-26 00:29:08 +00:00
if isImageUsed ( image , imagesInUse ) {
2015-04-20 03:26:07 +00:00
im . imageRecords [ image . ID ] . lastUsed = now
2015-03-01 03:49:12 +00:00
}
2015-09-26 00:29:08 +00:00
im . imageRecords [ image . ID ] . size = image . Size
2015-03-01 03:49:12 +00:00
}
// Remove old images from our records.
2015-04-20 03:26:07 +00:00
for image := range im . imageRecords {
2015-03-01 03:49:12 +00:00
if ! currentImages . Has ( image ) {
2015-04-20 03:26:07 +00:00
delete ( im . imageRecords , image )
2015-03-01 03:49:12 +00:00
}
}
return nil
}
2015-04-20 03:26:07 +00:00
func ( im * realImageManager ) GarbageCollect ( ) error {
2015-03-16 03:37:19 +00:00
// Get disk usage on disk holding images.
2015-04-20 03:26:07 +00:00
fsInfo , err := im . cadvisor . DockerImagesFsInfo ( )
2015-03-16 03:37:19 +00:00
if err != nil {
return err
}
usage := int64 ( fsInfo . Usage )
capacity := int64 ( fsInfo . Capacity )
2015-03-16 04:00:46 +00:00
// Check valid capacity.
if capacity == 0 {
2015-03-27 20:12:48 +00:00
err := fmt . Errorf ( "invalid capacity %d on device %q at mount point %q" , capacity , fsInfo . Device , fsInfo . Mountpoint )
2015-11-13 22:30:01 +00:00
im . recorder . Eventf ( im . nodeRef , api . EventTypeWarning , container . InvalidDiskCapacity , err . Error ( ) )
2015-03-27 20:12:48 +00:00
return err
2015-03-16 04:00:46 +00:00
}
2015-03-16 03:37:19 +00:00
// If over the max threshold, free enough to place us at the lower threshold.
2015-03-16 04:00:46 +00:00
usagePercent := int ( usage * 100 / capacity )
2015-04-20 03:26:07 +00:00
if usagePercent >= im . policy . HighThresholdPercent {
amountToFree := usage - ( int64 ( im . policy . LowThresholdPercent ) * capacity / 100 )
glog . Infof ( "[ImageManager]: Disk usage on %q (%s) is at %d%% which is over the high threshold (%d%%). Trying to free %d bytes" , fsInfo . Device , fsInfo . Mountpoint , usagePercent , im . policy . HighThresholdPercent , amountToFree )
2016-01-26 02:35:11 +00:00
freed , err := im . freeSpace ( amountToFree , time . Now ( ) )
2015-03-16 03:37:19 +00:00
if err != nil {
return err
}
if freed < amountToFree {
2015-03-27 20:12:48 +00:00
err := fmt . Errorf ( "failed to garbage collect required amount of images. Wanted to free %d, but freed %d" , amountToFree , freed )
2015-11-13 22:30:01 +00:00
im . recorder . Eventf ( im . nodeRef , api . EventTypeWarning , container . FreeDiskSpaceFailed , err . Error ( ) )
2015-03-27 20:12:48 +00:00
return err
2015-03-16 03:37:19 +00:00
}
}
return nil
}
// Tries to free bytesToFree worth of images on the disk.
//
2015-08-08 21:29:57 +00:00
// Returns the number of bytes free and an error if any occurred. The number of
2015-03-16 03:37:19 +00:00
// bytes freed is always returned.
// Note that error may be nil and the number of bytes free may be less
// than bytesToFree.
2016-01-26 02:35:11 +00:00
func ( im * realImageManager ) freeSpace ( bytesToFree int64 , freeTime time . Time ) ( int64 , error ) {
err := im . detectImages ( freeTime )
2015-03-01 03:49:12 +00:00
if err != nil {
return 0 , err
}
2015-04-20 03:26:07 +00:00
im . imageRecordsLock . Lock ( )
defer im . imageRecordsLock . Unlock ( )
2015-03-01 03:49:12 +00:00
// Get all images in eviction order.
2015-04-20 03:26:07 +00:00
images := make ( [ ] evictionInfo , 0 , len ( im . imageRecords ) )
for image , record := range im . imageRecords {
2015-03-01 03:49:12 +00:00
images = append ( images , evictionInfo {
id : image ,
imageRecord : * record ,
} )
}
sort . Sort ( byLastUsedAndDetected ( images ) )
// Delete unused images until we've freed up enough space.
var lastErr error
spaceFreed := int64 ( 0 )
for _ , image := range images {
// Images that are currently in used were given a newer lastUsed.
2016-01-26 02:35:11 +00:00
if image . lastUsed . After ( freeTime ) {
2015-03-01 03:49:12 +00:00
break
}
2015-12-17 12:46:56 +00:00
// Avoid garbage collect the image if the image is not old enough.
// In such a case, the image may have just been pulled down, and will be used by a container right away.
2016-01-24 08:54:51 +00:00
if freeTime . Sub ( image . firstDetected ) < im . policy . MinAge {
2015-12-17 12:46:56 +00:00
continue
}
2015-03-01 03:49:12 +00:00
// Remove image. Continue despite errors.
2015-03-16 04:00:46 +00:00
glog . Infof ( "[ImageManager]: Removing image %q to free %d bytes" , image . id , image . size )
2015-09-26 00:29:08 +00:00
err := im . runtime . RemoveImage ( container . ImageSpec { Image : image . id } )
2015-03-01 03:49:12 +00:00
if err != nil {
lastErr = err
continue
}
2015-04-20 03:26:07 +00:00
delete ( im . imageRecords , image . id )
2015-03-01 03:49:12 +00:00
spaceFreed += image . size
if spaceFreed >= bytesToFree {
break
}
}
return spaceFreed , lastErr
}
type evictionInfo struct {
id string
imageRecord
}
type byLastUsedAndDetected [ ] evictionInfo
2015-04-20 03:26:07 +00:00
func ( ev byLastUsedAndDetected ) Len ( ) int { return len ( ev ) }
func ( ev byLastUsedAndDetected ) Swap ( i , j int ) { ev [ i ] , ev [ j ] = ev [ j ] , ev [ i ] }
func ( ev byLastUsedAndDetected ) Less ( i , j int ) bool {
2015-03-01 03:49:12 +00:00
// Sort by last used, break ties by detected.
2015-04-20 03:26:07 +00:00
if ev [ i ] . lastUsed . Equal ( ev [ j ] . lastUsed ) {
2015-12-17 12:46:56 +00:00
return ev [ i ] . firstDetected . Before ( ev [ j ] . firstDetected )
2015-03-01 03:49:12 +00:00
} else {
2015-04-20 03:26:07 +00:00
return ev [ i ] . lastUsed . Before ( ev [ j ] . lastUsed )
2015-03-01 03:49:12 +00:00
}
}
2015-09-26 00:29:08 +00:00
func isImageUsed ( image container . Image , imagesInUse sets . String ) bool {
2015-03-01 03:49:12 +00:00
// Check the image ID and all the RepoTags.
if _ , ok := imagesInUse [ image . ID ] ; ok {
return true
}
2015-12-02 08:53:56 +00:00
for _ , tag := range image . RepoTags {
2015-03-01 03:49:12 +00:00
if _ , ok := imagesInUse [ tag ] ; ok {
return true
}
}
return false
}