mirror of https://github.com/k3s-io/k3s
Add debug logging to eviction manager
parent
ab794c6128
commit
0171121486
|
@ -41,6 +41,11 @@ CGROUP_ROOT=${CGROUP_ROOT:-""}
|
|||
# name of the cgroup driver, i.e. cgroupfs or systemd
|
||||
CGROUP_DRIVER=${CGROUP_DRIVER:-""}
|
||||
|
||||
# enables testing eviction scenarios locally.
|
||||
EVICTION_HARD=${EVICTION_HARD:-"memory.available<100Mi"}
|
||||
EVICTION_SOFT=${EVICTION_SOFT:-""}
|
||||
EVICTION_PRESSURE_TRANSITION_PERIOD=${EVICTION_PRESSURE_TRANSITION_PERIOD:-"1m"}
|
||||
|
||||
# We disable cluster DNS by default because this script uses docker0 (or whatever
|
||||
# container bridge docker is currently using) and we don't know the IP of the
|
||||
# DNS pod to pass in as --cluster-dns. To set this up by hand, set this flag
|
||||
|
@ -545,6 +550,9 @@ function start_kubelet {
|
|||
--cgroup-driver=${CGROUP_DRIVER} \
|
||||
--cgroup-root=${CGROUP_ROOT} \
|
||||
--keep-terminated-pod-volumes=true \
|
||||
--eviction-hard=${EVICTION_HARD} \
|
||||
--eviction-soft=${EVICTION_SOFT} \
|
||||
--eviction-pressure-transition-period=${EVICTION_PRESSURE_TRANSITION_PERIOD} \
|
||||
${auth_args} \
|
||||
${dns_args} \
|
||||
${net_plugin_dir_args} \
|
||||
|
|
|
@ -186,6 +186,8 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
return
|
||||
}
|
||||
|
||||
glog.V(3).Infof("eviction manager: synchronize housekeeping")
|
||||
|
||||
// build the ranking functions (if not yet known)
|
||||
// TODO: have a function in cadvisor that lets us know if global housekeeping has completed
|
||||
if len(m.resourceToRankFunc) == 0 || len(m.resourceToNodeReclaimFuncs) == 0 {
|
||||
|
@ -204,6 +206,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
glog.Errorf("eviction manager: unexpected err: %v", err)
|
||||
return
|
||||
}
|
||||
debugLogObservations("observations", observations)
|
||||
|
||||
// attempt to create a threshold notifier to improve eviction response time
|
||||
if m.config.KernelMemcgNotification && !m.notifiersInitialized {
|
||||
|
@ -230,15 +233,18 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
|
||||
// determine the set of thresholds met independent of grace period
|
||||
thresholds = thresholdsMet(thresholds, observations, false)
|
||||
debugLogThresholdsWithObservation("thresholds - ignoring grace period", thresholds, observations)
|
||||
|
||||
// determine the set of thresholds previously met that have not yet satisfied the associated min-reclaim
|
||||
if len(m.thresholdsMet) > 0 {
|
||||
thresholdsNotYetResolved := thresholdsMet(m.thresholdsMet, observations, true)
|
||||
thresholds = mergeThresholds(thresholds, thresholdsNotYetResolved)
|
||||
}
|
||||
debugLogThresholdsWithObservation("thresholds - reclaim not satisfied", thresholds, observations)
|
||||
|
||||
// determine the set of thresholds whose stats have been updated since the last sync
|
||||
thresholds = thresholdsUpdatedStats(thresholds, observations, m.lastObservations)
|
||||
debugLogThresholdsWithObservation("thresholds - updated stats", thresholds, observations)
|
||||
|
||||
// track when a threshold was first observed
|
||||
now := m.clock.Now()
|
||||
|
@ -246,15 +252,22 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
|
|||
|
||||
// the set of node conditions that are triggered by currently observed thresholds
|
||||
nodeConditions := nodeConditions(thresholds)
|
||||
if len(nodeConditions) > 0 {
|
||||
glog.V(3).Infof("eviction manager: node conditions - observed: %v", nodeConditions)
|
||||
}
|
||||
|
||||
// track when a node condition was last observed
|
||||
nodeConditionsLastObservedAt := nodeConditionsLastObservedAt(nodeConditions, m.nodeConditionsLastObservedAt, now)
|
||||
|
||||
// node conditions report true if it has been observed within the transition period window
|
||||
nodeConditions = nodeConditionsObservedSince(nodeConditionsLastObservedAt, m.config.PressureTransitionPeriod, now)
|
||||
if len(nodeConditions) > 0 {
|
||||
glog.V(3).Infof("eviction manager: node conditions - transition period not met: %v", nodeConditions)
|
||||
}
|
||||
|
||||
// determine the set of thresholds we need to drive eviction behavior (i.e. all grace periods are met)
|
||||
thresholds = thresholdsMetGracePeriod(thresholdsFirstObservedAt, now)
|
||||
debugLogThresholdsWithObservation("thresholds - grace periods satisified", thresholds, observations)
|
||||
|
||||
// update internal state
|
||||
m.Lock()
|
||||
|
|
|
@ -694,6 +694,29 @@ func thresholdsMet(thresholds []Threshold, observations signalObservations, enfo
|
|||
return results
|
||||
}
|
||||
|
||||
func debugLogObservations(logPrefix string, observations signalObservations) {
|
||||
for k, v := range observations {
|
||||
if !v.time.IsZero() {
|
||||
glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v, time: %v", logPrefix, k, v.available, v.capacity, v.time)
|
||||
} else {
|
||||
glog.V(3).Infof("eviction manager: %v: signal=%v, available: %v, capacity: %v", logPrefix, k, v.available, v.capacity)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func debugLogThresholdsWithObservation(logPrefix string, thresholds []Threshold, observations signalObservations) {
|
||||
for i := range thresholds {
|
||||
threshold := thresholds[i]
|
||||
observed, found := observations[threshold.Signal]
|
||||
if found {
|
||||
quantity := getThresholdQuantity(threshold.Value, observed.capacity)
|
||||
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v, quantity=%v] observed %v", logPrefix, threshold.Signal, quantity, observed.available)
|
||||
} else {
|
||||
glog.V(3).Infof("eviction manager: %v: threshold [signal=%v] had no observation", logPrefix, threshold.Signal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func thresholdsUpdatedStats(thresholds []Threshold, observations, lastObservations signalObservations) []Threshold {
|
||||
results := []Threshold{}
|
||||
for i := range thresholds {
|
||||
|
|
Loading…
Reference in New Issue