mirror of https://github.com/k3s-io/k3s
Merge pull request #65018 from shyamjvs/add-debug-logs-to-logexporter
Automatic merge from submit-queue (batch tested with PRs 64974, 65009, 65018). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Increase logexporter timeout and add debug logs Ref - https://github.com/kubernetes/kubernetes/issues/63030#issuecomment-396335294 So it seems that logexporter isn't running on too many nodes on our 5k node cluster (~40% of nodes). As a result we fallback to ssh-based copying for so many nodes which is slow and hence the job times out. My feeling is it's because of slow scheduling of logexporter pods (and hence quite some nodes didn't even get the chance to run those pods before we delete the daemonset). /cc @wojtek-t @krzyzacy ```release-note NONE ``` /sig scalability /kind bug /priority important-soon /milestone v1.11 /status approved-for-milestonepull/8/head
commit
55c64a50d4
|
@ -294,7 +294,7 @@ function dump_nodes_with_logexporter() {
|
|||
local -r service_account_credentials="$(cat ${GOOGLE_APPLICATION_CREDENTIALS} | base64 | tr -d '\n')"
|
||||
local -r cloud_provider="${KUBERNETES_PROVIDER}"
|
||||
local -r enable_hollow_node_logs="${ENABLE_HOLLOW_NODE_LOGS:-false}"
|
||||
local -r logexport_sleep_seconds="$(( 90 + NUM_NODES / 5 ))"
|
||||
local -r logexport_sleep_seconds="$(( 90 + NUM_NODES / 3 ))"
|
||||
|
||||
# Fill in the parameters in the logexporter daemonset template.
|
||||
sed -i'' -e "s@{{.LogexporterNamespace}}@${logexporter_namespace}@g" "${KUBE_ROOT}/cluster/log-dump/logexporter-daemonset.yaml"
|
||||
|
@ -345,6 +345,7 @@ function dump_nodes_with_logexporter() {
|
|||
done
|
||||
|
||||
# Delete the logexporter resources and dump logs for the failed nodes (if any) through SSH.
|
||||
"${KUBECTL}" get pods --namespace "${logexporter_namespace}" || true
|
||||
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true
|
||||
if [[ "${#failed_nodes[@]}" != 0 ]]; then
|
||||
echo -e "Dumping logs through SSH for the following nodes:\n${failed_nodes[@]}"
|
||||
|
|
Loading…
Reference in New Issue