Add debug logging to eviction manager

pull/6/head
Derek Carr 2017-02-08 12:58:02 -05:00
parent ab794c6128
commit 0171121486
3 changed files with 44 additions and 0 deletions

View File

@ -41,6 +41,11 @@ CGROUP_ROOT=${CGROUP_ROOT:-""}
# name of the cgroup driver, i.e. cgroupfs or systemd
CGROUP_DRIVER=${CGROUP_DRIVER:-""}
# enables testing eviction scenarios locally.
EVICTION_HARD=${EVICTION_HARD:-"memory.available<100Mi"}
EVICTION_SOFT=${EVICTION_SOFT:-""}
EVICTION_PRESSURE_TRANSITION_PERIOD=${EVICTION_PRESSURE_TRANSITION_PERIOD:-"1m"}
# We disable cluster DNS by default because this script uses docker0 (or whatever
# container bridge docker is currently using) and we don't know the IP of the
# DNS pod to pass in as --cluster-dns. To set this up by hand, set this flag
@ -545,6 +550,9 @@ function start_kubelet {
--cgroup-driver=${CGROUP_DRIVER} \
--cgroup-root=${CGROUP_ROOT} \
--keep-terminated-pod-volumes=true \
--eviction-hard=${EVICTION_HARD} \
--eviction-soft=${EVICTION_SOFT} \
--eviction-pressure-transition-period=${EVICTION_PRESSURE_TRANSITION_PERIOD} \
${auth_args} \
${dns_args} \
${net_plugin_dir_args} \

View File

@ -186,6 +186,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
return
}
glog.V(3).Infof("eviction manager: synchronize housekeeping")
// build the ranking functions (if not yet known)
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
if len(m.resourceToRankFunc) == 0 || len(m.resourceToNodeReclaimFuncs) == 0 {
@ -204,6 +206,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
glog.Errorf("eviction manager: unexpected err: %v", err)
return
}
debugLogObservations("observations", observations)
// attempt to create a threshold notifier to improve eviction response time
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
@ -230,15 +233,18 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// determine the set of thresholds met independent of grace period
thresholds = thresholdsMet(thresholds, observations, false)
debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)
// determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim
if len(m.thresholdsMet) > 0 {
thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true)
thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved)
}
debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations)
// determine the set of thresholds whose stats have been updated since the last sync
thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations)
debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations)
// track when a threshold was first observed
now := m.clock.Now()
@ -246,15 +252,22 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// the set of node conditions that are triggered by currently observed thresholds
nodeConditions := nodeConditions(thresholds)
if len(nodeConditions) > 0 {
glog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions)
}
// track when a node condition was last observed
nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now)
// node conditions report true if it has been observed within the transition period window
nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now)
if len(nodeConditions) > 0 {
glog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions)
}
// determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met)
thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now)
debugLogThresholdsWithObservation("thresholds - grace periods satisified", thresholds, observations)
// update internal state
m.Lock()

View File

@ -694,6 +694,29 @@ func thresholdsMet(thresholds []Threshold, observations signalObservations, enfo
return results
}
func debugLogObservations(logPrefix string, observations signalObservations) {
for k, v := range observations {
if !v.time.IsZero() {
glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v, time: %v", logPrefix, k, v.available, v.capacity, v.time)
} else {
glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v", logPrefix, k, v.available, v.capacity)
}
}
}
func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold, observations signalObservations) {
for i := range thresholds {
threshold := thresholds[i]
observed, found := observations[threshold.Signal]
if found {
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available)
} else {
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal)
}
}
}
func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservations signalObservations) []Threshold {
results := []Threshold{}
for i := range thresholds {