mirror of https://github.com/k3s-io/k3s
Update log-dump.sh for Windows nodes.
Tested: ``` $ PROJECT=${CLOUDSDK_CORE_PROJECT} KUBERNETES_SKIP_CONFIRM=y NUM_NODES=2 \ NUM_WINDOWS_NODES=2 KUBE_GCE_ENABLE_IP_ALIASES=true go run \ ./hack/e2e.go -- --up $ cluster/log-dump/log-dump.sh $ ls _artifacts ``` And with: NUM_NODES=2 NUM_WINDOWS_NODES=0; NUM_NODES=0 NUM_WINDOWS_NODES=2pull/564/head
parent
3efd4ca1dc
commit
0bb25290c8
|
@ -52,6 +52,10 @@ readonly initd_logfiles="docker/log"
|
|||
readonly supervisord_logfiles="kubelet.log supervisor/supervisord.log supervisor/kubelet-stdout.log supervisor/kubelet-stderr.log supervisor/docker-stdout.log supervisor/docker-stderr.log"
|
||||
readonly systemd_services="kubelet kubelet-monitor kube-container-runtime-monitor ${LOG_DUMP_SYSTEMD_SERVICES:-docker}"
|
||||
readonly dump_systemd_journal="${LOG_DUMP_SYSTEMD_JOURNAL:-false}"
|
||||
# Log files found in WINDOWS_LOGS_DIR on Windows nodes:
|
||||
readonly windows_node_logfiles="kubelet.log kube-proxy.log docker.log"
|
||||
# Log files found in other directories on Windows nodes:
|
||||
readonly windows_node_otherfiles="C:\\Windows\\MEMORY.dmp"
|
||||
|
||||
# Limit the number of concurrent node connections so that we don't run out of
|
||||
# file descriptors for large clusters.
|
||||
|
@ -195,6 +199,66 @@ function save-logs() {
|
|||
copy-logs-from-node "${node_name}" "${dir}" "${files}"
|
||||
}
|
||||
|
||||
# Saves a copy of the Windows Docker event log to ${WINDOWS_LOGS_DIR}\docker.log
|
||||
# on node $1.
|
||||
function export-windows-docker-event-log() {
|
||||
local -r node="${1}"
|
||||
|
||||
local -r powershell_cmd="powershell.exe -Command \$log=\$(Get-EventLog -LogName Application -Source Docker); Set-Content '${WINDOWS_LOGS_DIR}\\docker.log' \$log.Message"
|
||||
|
||||
# Retry up to 3 times to allow ssh keys to be properly propagated and
|
||||
# stored.
|
||||
for retry in {1..3}; do
|
||||
if gcloud compute ssh --project "${PROJECT}" --zone "${ZONE}" "${node}" \
|
||||
--command "$powershell_cmd"; then
|
||||
break
|
||||
else
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Save log files and serial console output from Windows node $1 into local
|
||||
# directory $2.
|
||||
# This function shouldn't ever trigger errexit.
|
||||
function save-logs-windows() {
|
||||
local -r node="${1}"
|
||||
local -r dest_dir="${2}"
|
||||
|
||||
if [[ ! "${gcloud_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then
|
||||
echo "Not saving logs for ${node}, Windows log dumping requires gcloud support"
|
||||
return
|
||||
fi
|
||||
|
||||
export-windows-docker-event-log "${node}"
|
||||
|
||||
local remote_files=()
|
||||
for file in ${windows_node_logfiles[@]}; do
|
||||
remote_files+=( "${WINDOWS_LOGS_DIR}\\${file}" )
|
||||
done
|
||||
remote_files+=( "${windows_node_otherfiles[@]}" )
|
||||
|
||||
# TODO(pjh, yujuhong): handle rotated logs and copying multiple files at the
|
||||
# same time.
|
||||
for remote_file in ${remote_files[@]}; do
|
||||
# Retry up to 3 times to allow ssh keys to be properly propagated and
|
||||
# stored.
|
||||
for retry in {1..3}; do
|
||||
if gcloud compute scp --recurse --project "${PROJECT}" \
|
||||
--zone "${ZONE}" "${node}:${remote_file}" "${dest_dir}" \
|
||||
> /dev/null; then
|
||||
break
|
||||
else
|
||||
sleep 10
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Serial port 1 contains the Windows console output.
|
||||
gcloud compute instances get-serial-port-output --project "${PROJECT}" \
|
||||
--zone "${ZONE}" --port 1 "${node}" > "${dest_dir}/serial-1.log" || true
|
||||
}
|
||||
|
||||
# Execute a command in container $2 on node $1.
|
||||
# Uses docker because the container may not ordinarily permit direct execution.
|
||||
function run-in-docker-container() {
|
||||
|
@ -247,8 +311,13 @@ function dump_masters() {
|
|||
fi
|
||||
}
|
||||
|
||||
# Dumps logs from nodes in the cluster. Linux nodes to dump logs from can be
|
||||
# specified via $1 or $use_custom_instance_list. If not specified then the nodes
|
||||
# to dump logs for will be detected using detect-node-names(); if Windows nodes
|
||||
# are present then they will be detected and their logs will be dumped too.
|
||||
function dump_nodes() {
|
||||
local node_names=()
|
||||
local windows_node_names=()
|
||||
if [[ -n "${1:-}" ]]; then
|
||||
echo "Dumping logs for nodes provided as args to dump_nodes() function"
|
||||
node_names=( "$@" )
|
||||
|
@ -264,9 +333,12 @@ function dump_nodes() {
|
|||
if [[ -n "${NODE_NAMES:-}" ]]; then
|
||||
node_names=( "${NODE_NAMES[@]}" )
|
||||
fi
|
||||
if [[ -n "${WINDOWS_NODE_NAMES:-}" ]]; then
|
||||
windows_node_names=( "${WINDOWS_NODE_NAMES[@]}" )
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${#node_names[@]}" == 0 ]]; then
|
||||
if [[ "${#node_names[@]}" == 0 && "${#windows_node_names[@]}" == 0 ]]; then
|
||||
echo "No nodes found!"
|
||||
return
|
||||
fi
|
||||
|
@ -276,24 +348,31 @@ function dump_nodes() {
|
|||
node_logfiles_all="${node_logfiles_all} ${hollow_node_logfiles}"
|
||||
fi
|
||||
|
||||
nodes_selected_for_logs=()
|
||||
linux_nodes_selected_for_logs=()
|
||||
if [[ -n "${LOGDUMP_ONLY_N_RANDOM_NODES:-}" ]]; then
|
||||
# We randomly choose 'LOGDUMP_ONLY_N_RANDOM_NODES' many nodes for fetching logs.
|
||||
for index in `shuf -i 0-$(( ${#node_names[*]} - 1 )) -n ${LOGDUMP_ONLY_N_RANDOM_NODES}`
|
||||
do
|
||||
nodes_selected_for_logs+=("${node_names[$index]}")
|
||||
linux_nodes_selected_for_logs+=("${node_names[$index]}")
|
||||
done
|
||||
else
|
||||
nodes_selected_for_logs=( "${node_names[@]}" )
|
||||
linux_nodes_selected_for_logs=( "${node_names[@]}" )
|
||||
fi
|
||||
all_selected_nodes=( "${linux_nodes_selected_for_logs[@]}" )
|
||||
all_selected_nodes+=( "${windows_node_names[@]}" )
|
||||
|
||||
proc=${max_dump_processes}
|
||||
for node_name in "${nodes_selected_for_logs[@]}"; do
|
||||
for i in "${!all_selected_nodes[@]}"; do
|
||||
node_name="${all_selected_nodes[$i]}"
|
||||
node_dir="${report_dir}/${node_name}"
|
||||
mkdir -p "${node_dir}"
|
||||
# Save logs in the background. This speeds up things when there are
|
||||
# many nodes.
|
||||
save-logs "${node_name}" "${node_dir}" "${node_logfiles_all}" "${node_systemd_services}" &
|
||||
if [[ "${i}" -lt "${#linux_nodes_selected_for_logs[@]}" ]]; then
|
||||
# Save logs in the background. This speeds up things when there are
|
||||
# many nodes.
|
||||
save-logs "${node_name}" "${node_dir}" "${node_logfiles_all}" "${node_systemd_services}" &
|
||||
else
|
||||
save-logs-windows "${node_name}" "${node_dir}" &
|
||||
fi
|
||||
|
||||
# We don't want to run more than ${max_dump_processes} at a time, so
|
||||
# wait once we hit that many nodes. This isn't ideal, since one might
|
||||
|
@ -311,6 +390,9 @@ function dump_nodes() {
|
|||
}
|
||||
|
||||
# Collect names of nodes which didn't run logexporter successfully.
|
||||
# This function examines NODE_NAMES but not WINDOWS_NODE_NAMES since logexporter
|
||||
# does not run on Windows nodes.
|
||||
#
|
||||
# Note: This step is O(#nodes^2) as we check if each node is present in the list of succeeded nodes.
|
||||
# Making it linear would add code complexity without much benefit (as it just takes ~1s for 5k nodes).
|
||||
# Assumes:
|
||||
|
@ -328,6 +410,8 @@ function find_non_logexported_nodes() {
|
|||
done
|
||||
}
|
||||
|
||||
# This function examines NODE_NAMES but not WINDOWS_NODE_NAMES since logexporter
|
||||
# does not run on Windows nodes.
|
||||
function dump_nodes_with_logexporter() {
|
||||
if [[ -n "${use_custom_instance_list}" ]]; then
|
||||
echo "Dumping logs for nodes provided by log_dump_custom_get_instances() function"
|
||||
|
@ -446,10 +530,16 @@ function detect_node_failures() {
|
|||
fi
|
||||
|
||||
detect-node-names
|
||||
if [ -z "${INSTANCE_GROUPS:-}" ]; then
|
||||
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
|
||||
local all_instance_groups=(${INSTANCE_GROUPS[@]} ${WINDOWS_INSTANCE_GROUPS[@]})
|
||||
else
|
||||
local all_instance_groups=(${INSTANCE_GROUPS[@]})
|
||||
fi
|
||||
|
||||
if [ -z "${all_instance_groups:-}" ]; then
|
||||
return
|
||||
fi
|
||||
for group in "${INSTANCE_GROUPS[@]}"; do
|
||||
for group in "${all_instance_groups[@]}"; do
|
||||
local creation_timestamp=$(gcloud compute instance-groups managed describe \
|
||||
"${group}" \
|
||||
--project "${PROJECT}" \
|
||||
|
|
Loading…
Reference in New Issue