Add cluster/log-dump.sh.

pull/6/head
Joe Finney 2016-03-01 17:13:18 -08:00
parent 74515a6b23
commit eff5a9c14d
4 changed files with 110 additions and 7 deletions

95
cluster/log-dump.sh Executable file
View File

@ -0,0 +1,95 @@
#!/bin/bash
# Copyright 2016 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Call this to dump all master and node logs into the folder specified in $1
# (defaults to _artifacts). Only works if the provider supports SSH.
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/..
: ${KUBE_CONFIG_FILE:="config-test.sh"}
source "${KUBE_ROOT}/cluster/kube-env.sh"
source "${KUBE_ROOT}/cluster/kube-util.sh"
readonly report_dir="${1:-_artifacts}"
echo "Dumping master and node logs to ${report_dir}"
# Saves a single output of running a given command ($2) on a given node ($1)
# into a given local file ($3). Does not fail if the ssh command fails for any
# reason, just prints an error to stderr.
function save-log() {
local -r node_name="${1}"
local -r cmd="${2}"
local -r output_file="${3}"
if ! ssh-to-node "${node_name}" "${cmd}" > "${output_file}"; then
echo "${cmd} failed for ${node_name}" >&2
fi
}
# Saves logs common to master and nodes. The node name is in $1 and the
# directory/name prefix is in $2. Assumes KUBERNETES_PROVIDER is set.
function save-common-logs() {
local -r node_name="${1}"
local -r prefix="${2}"
echo "Dumping logs for ${node_name}"
save-log "${node_name}" "cat /var/log/kern.log" "${prefix}-kern.log"
save-log "${node_name}" "cat /var/log/docker.log" "${prefix}-docker.log"
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
save-log "${node_name}" "cat /var/log/startupscript.log" "${prefix}-startupscript.log"
fi
if ssh-to-node "${node_name}" "sudo systemctl status kubelet.service" &> /dev/null; then
save-log "${node_name}" "sudo journalctl --output=cat -u kubelet.service" "${prefix}-kubelet.log"
else
save-log "${node_name}" "cat /var/log/kubelet.log" "${prefix}-kubelet.log"
save-log "${node_name}" "cat /var/log/supervisor/supervisord.log" "${prefix}-supervisord.log"
save-log "${node_name}" "cat /var/log/supervisor/kubelet-stdout.log" "${prefix}-supervisord-kubelet-stdout.log"
save-log "${node_name}" "cat /var/log/supervisor/kubelet-stderr.log" "${prefix}-supervisord-kubelet-stderr.log"
fi
}
readonly master_ssh_supported_providers="gce aws kubemark"
readonly node_ssh_supported_providers="gce gke aws"
if [[ ! "${master_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then
echo "Master SSH not supported for ${KUBERNETES_PROVIDER}"
elif ! $(detect-master &> /dev/null); then
echo "Master not detected. Is the cluster up?"
else
echo "Master Name: ${MASTER_NAME}"
readonly master_prefix="${report_dir}/${MASTER_NAME}"
save-log "${MASTER_NAME}" "cat /var/log/kube-apiserver.log" "${master_prefix}-kube-apiserver.log"
save-log "${MASTER_NAME}" "cat /var/log/kube-scheduler.log" "${master_prefix}-kube-scheduler.log"
save-log "${MASTER_NAME}" "cat /var/log/kube-controller-manager.log" "${master_prefix}-kube-controller-manager.log"
save-log "${MASTER_NAME}" "cat /var/log/etcd.log" "${master_prefix}-kube-etcd.log"
save-common-logs "${MASTER_NAME}" "${master_prefix}"
fi
detect-node-names &> /dev/null
if [[ ! "${node_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then
echo "Node SSH not supported for ${KUBERNETES_PROVIDER}"
elif [[ "${#NODE_NAMES[@]}" -eq 0 ]]; then
echo "Nodes not detected. Is the cluster up?"
else
echo "Node Names: ${NODE_NAMES[*]}"
for node_name in "${NODE_NAMES[@]}"; do
node_prefix="${report_dir}/${node_name}"
save-log "${node_name}" "cat /var/log/kube-proxy.log" "${node_prefix}-kube-proxy.log"
save-common-logs "${node_name}" "${node_prefix}"
done
fi

View File

@ -193,7 +193,15 @@ if [[ "${gcp_list_resources}" == "true" ]]; then
${gcp_list_resources_script} > "${gcp_resources_before}"
fi
if [[ "${E2E_UP,,}" == "true" ]]; then
go run ./hack/e2e.go ${E2E_OPT:-} -v --up
# We want to try to gather logs even if kube-up fails, so collect the
# result here and fail after dumping logs if it's nonzero.
go run ./hack/e2e.go ${E2E_OPT:-} -v --up && up_result="$?" || up_result="$?"
if [[ "${up_result}" -ne 0 ]]; then
if [[ -x "cluster/log-dump.sh" ]]; then
./cluster/log-dump.sh "${ARTIFACTS}"
fi
exit "${up_result}"
fi
go run ./hack/e2e.go -v --ctl="version --match-server-version=false"
if [[ "${gcp_list_resources}" == "true" ]]; then
${gcp_list_resources_script} > "${gcp_resources_cluster_up}"

View File

@ -11,9 +11,9 @@
- publisher:
name: gcs-uploader
publishers:
# Use our script for build artifacts, since it's more flexible.
- postbuildscript:
builders:
# Use our script for build artifacts, since it's more flexible.
- shell: |
if [[ -x ./hack/jenkins/upload-to-gcs.sh ]]; then
./hack/jenkins/upload-to-gcs.sh

View File

@ -5,7 +5,6 @@ cluster/aws/templates/configure-vm-aws.sh: # We set the hostname_override to th
cluster/aws/templates/configure-vm-aws.sh: api_servers: '${API_SERVERS}'
cluster/aws/templates/configure-vm-aws.sh: env-to-grains "hostname_override"
cluster/aws/templates/configure-vm-aws.sh: env-to-grains "runtime_config"
cluster/aws/templates/salt-minion.sh:# We set the hostname_override to the full EC2 private dns name
cluster/centos/util.sh: local node_ip=${node#*@}
cluster/gce/configure-vm.sh: advertise_address: '${EXTERNAL_IP}'
cluster/gce/configure-vm.sh: api_servers: '${KUBERNETES_MASTER_NAME}'
@ -50,6 +49,10 @@ cluster/juju/charms/trusty/kubernetes/hooks/network-relation-changed: for k i
cluster/juju/charms/trusty/kubernetes/hooks/network-relation-changed: if api_servers:
cluster/lib/logging.sh: local source_file=${BASH_SOURCE[$frame_no]}
cluster/lib/logging.sh: local source_file=${BASH_SOURCE[$stack_skip]}
cluster/log-dump.sh: for node_name in "${NODE_NAMES[@]}"; do
cluster/log-dump.sh: local -r node_name="${1}"
cluster/log-dump.sh: local -r node_name="${1}"
cluster/log-dump.sh:readonly report_dir="${1:-_artifacts}"
cluster/mesos/docker/km/build.sh: km_path=$(find-binary km darwin/amd64)
cluster/rackspace/util.sh: local node_ip=$(nova show --minimal ${NODE_NAMES[$i]} \
cluster/saltbase/salt/kube-addons/kube-addons.sh:# Create admission_control objects if defined before any other addon services. If the limits
@ -81,7 +84,6 @@ docs/getting-started-guides/coreos/azure/lib/deployment_logic/kubernetes.js: re
docs/getting-started-guides/coreos/azure/lib/deployment_logic/kubernetes.js: return cloud_config.process_template(input_file, output_file, function(data) {
docs/getting-started-guides/coreos/azure/lib/deployment_logic/kubernetes.js: var write_files_extra = cloud_config.write_files_from('addons', '/etc/kubernetes/addons');
docs/getting-started-guides/coreos/azure/lib/deployment_logic/kubernetes.js:var cloud_config = require('../cloud_config.js');
docs/getting-started-guides/docker-multinode/skydns-rc.yaml.in: - -kube_master_url=http://{kube_server_url}:8080
examples/cluster-dns/images/frontend/client.py: service_address = socket.gethostbyname(hostname)
examples/vitess/env.sh: node_ip=$(get_node_ip)
hack/jenkins/job-builder-image/Dockerfile:# JJB configuration lives in /etc/jenkins_jobs/jenkins_jobs.ini
@ -91,11 +93,10 @@ hack/jenkins/update-jobs.sh: # jenkins_jobs.ini contains administrative credent
hack/jenkins/update-jobs.sh: if [[ -e jenkins_jobs.ini ]]; then
hack/local-up-cluster.sh: runtime_config="--runtime-config=${RUNTIME_CONFIG}"
hack/local-up-cluster.sh: runtime_config=""
pkg/kubelet/network/hairpin/hairpin.go: hairpinModeRelativePath = "hairpin_mode"
pkg/kubelet/qos/memory_policy_test.go: t.Errorf("oom_score_adj should be between %d and %d, but was %d", test.lowOOMScoreAdj, test.highOOMScoreAdj, oomScoreAdj)
pkg/kubelet/qos/memory_policy_test.go: highOOMScoreAdj int // The min oom_score_adj score the container should be assigned.
pkg/kubelet/qos/memory_policy_test.go: lowOOMScoreAdj int // The max oom_score_adj score the container should be assigned.
pkg/util/oom/oom_linux.go: err = fmt.Errorf("failed to read oom_score_adj: %v", readErr)
pkg/util/oom/oom_linux.go: err = fmt.Errorf("failed to set oom_score_adj to %d: %v", oomScoreAdj, writeErr)
pkg/util/oom/oom_linux.go: return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid)
pkg/util/oom/oom_linux.go: oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
pkg/util/oom/oom_linux.go:// Writes 'value' to /proc/<pid>/oom_score_adj for all processes in cgroup cgroupName.
@ -110,4 +111,3 @@ test/e2e/host_path.go: fmt.Sprintf("--retry_time=%d", retryDuration),
test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)")
test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop")
test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from")
pkg/kubelet/network/hairpin/hairpin.go: hairpinModeRelativePath = "hairpin_mode"