Add debug logging to eviction manager

2017-02-08 12:58:02 -05:00 · 2017-02-08 12:58:02 -05:00 · 0171121486
parent ab794c6128
commit 0171121486
3 changed files with 44 additions and 0 deletions
--- a/hack/local-up-cluster.sh
+++ b/hack/local-up-cluster.sh
@ -41,6 +41,11 @@ CGROUP_ROOT=${CGROUP_ROOT:-""}
 # name of the cgroup driver, i.e. cgroupfs or systemd
 CGROUP_DRIVER=${CGROUP_DRIVER:-""}

+# enables testing eviction scenarios locally.
+EVICTION_HARD=${EVICTION_HARD:-"memory.available<100Mi"}
+EVICTION_SOFT=${EVICTION_SOFT:-""}
+EVICTION_PRESSURE_TRANSITION_PERIOD=${EVICTION_PRESSURE_TRANSITION_PERIOD:-"1m"}
+
 # We disable cluster DNS by default because this script uses docker0 (or whatever
 # container bridge docker is currently using) and we don't know the IP of the
 # DNS pod to pass in as --cluster-dns. To set this up by hand, set this flag
@ -545,6 +550,9 @@ function start_kubelet {
        --cgroup-driver=${CGROUP_DRIVER} \
        --cgroup-root=${CGROUP_ROOT} \
        --keep-terminated-pod-volumes=true \
+        --eviction-hard=${EVICTION_HARD} \
+        --eviction-soft=${EVICTION_SOFT} \
+        --eviction-pressure-transition-period=${EVICTION_PRESSURE_TRANSITION_PERIOD} \
        ${auth_args} \
        ${dns_args} \
        ${net_plugin_dir_args} \
--- a/pkg/kubelet/eviction/eviction_manager.go
+++ b/pkg/kubelet/eviction/eviction_manager.go
@ -186,6 +186,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
 		return
 	}

+	glog.V(3).Infof("eviction manager: synchronize housekeeping")
+
 	// build the ranking functions (if not yet known)
 	// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
 	if len(m.resourceToRankFunc) == 0 || len(m.resourceToNodeReclaimFuncs) == 0 {
@ -204,6 +206,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
 		glog.Errorf("eviction manager: unexpected err: %v", err)
 		return
 	}
+	debugLogObservations("observations", observations)

 	// attempt to create a threshold notifier to improve eviction response time
 	if m.config.KernelMemcgNotification && !m.notifiersInitialized {
@ -230,15 +233,18 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act

 	// determine the set of thresholds met independent of grace period
 	thresholds = thresholdsMet(thresholds, observations, false)
+	debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)

 	// determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim
 	if len(m.thresholdsMet) > 0 {
 		thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true)
 		thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved)
 	}
+	debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations)

 	// determine the set of thresholds whose stats have been updated since the last sync
 	thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations)
+	debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations)

 	// track when a threshold was first observed
 	now := m.clock.Now()
@ -246,15 +252,22 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act

 	// the set of node conditions that are triggered by currently observed thresholds
 	nodeConditions := nodeConditions(thresholds)
+	if len(nodeConditions) > 0 {
+		glog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions)
+	}

 	// track when a node condition was last observed
 	nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now)

 	// node conditions report true if it has been observed within the transition period window
 	nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now)
+	if len(nodeConditions) > 0 {
+		glog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions)
+	}

 	// determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met)
 	thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now)
+	debugLogThresholdsWithObservation("thresholds - grace periods satisified", thresholds, observations)

 	// update internal state
 	m.Lock()
--- a/pkg/kubelet/eviction/helpers.go
+++ b/pkg/kubelet/eviction/helpers.go
@ -694,6 +694,29 @@ func thresholdsMet(thresholds []Threshold, observations signalObservations, enfo
 	return results
 }

+func debugLogObservations(logPrefix string, observations signalObservations) {
+	for k, v := range observations {
+		if !v.time.IsZero() {
+			glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v, time: %v", logPrefix, k, v.available, v.capacity, v.time)
+		} else {
+			glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v", logPrefix, k, v.available, v.capacity)
+		}
+	}
+}
+
+func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold, observations signalObservations) {
+	for i := range thresholds {
+		threshold := thresholds[i]
+		observed, found := observations[threshold.Signal]
+		if found {
+			quantity := getThresholdQuantity(threshold.Value, observed.capacity)
+			glog.V(3).Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available)
+		} else {
+			glog.V(3).Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal)
+		}
+	}
+}
+
 func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservations signalObservations) []Threshold {
 	results := []Threshold{}
 	for i := range thresholds {