diff --git a/contrib/util/DIAGNOSTICS.md b/contrib/util/DIAGNOSTICS.md new file mode 100644 index 0000000000..8e3b708e1e --- /dev/null +++ b/contrib/util/DIAGNOSTICS.md @@ -0,0 +1,64 @@ + +# Intro + +The purpose of the diagnostics script is to provide a secure and trusted method +of gathering and uploading logs on systems that are experiencing issues. + +# Method + +The following steps are taken for creating diagnostic logs: + +1. Log data is separated into various categories, the standard output and error +output are recorded for analysis by developers. + +2. Compress and encrypt the log data directory using a randomly generated or +user provided AES key. + +3. Encrypt the AES key "metadata" using a developer provided public key. + +4. Upload the symmetric encrypted log data and public key encrypted metadata +for remote analysis. + +# Implementation Details + +The `diagnostics.sh` command can take an optional argument which describes the +desired steps to take. By default the script will perform a +`gather-upload-confirm`, where the steps of uploading and including metadata +are confirmed by prompt. If the argument is given as `gather` the encryption +and upload step will be skipped. An argument of `gather-upload` would upload +without prompt and include metadata for decryption, while +`gather-upload-nometa` would upload without prompting and without including +necessary decryption metadata. + +The user must then communicate the ID of the log file uploaded for analysis, +and if the encryption metadata was not included in the upload that information +must also be communicated in a secure manner. + +AES encryption metadata will appear in a form like the following: +``` +Save secret metadata for log decryption: +salt=62e2ac13ae6bb66e +key=375dc2863c5b340252c0e5c631dda24b4fdc343139410b97fb5b7678919d8752 +iv=7874c21533a1b4a4129c00e95bd9d0e4 +``` + +The "salt", "key", and "iv" are randomly generated values using openssl, or +they can be manually passed in as environment variables. Similarly a "UUID" is +auto-generated if not provided through the environment. + +# Decrypting Data + +When decrypting log data for analysis a developer requires the following +information: + +1. A working `gsutil` command setup with credentials using the appropriate +bucket. `pip install gsutil` should install and +`echo -e 'rancher-dev-file.json\nk3s-diagnostic-logs' | gsutil config -e` to +configure access. + +2. The AES encryption metadata if not included in the upload (defined as +environment variables), or the private key and passphrase for decrypting +metadata if included in upload. + +The UUID of the desired log set should also be provided (either as env or +first argument), and may be partial or contain wildcards. diff --git a/contrib/util/diagnostics.sh b/contrib/util/diagnostics.sh new file mode 100755 index 0000000000..15e0106a0c --- /dev/null +++ b/contrib/util/diagnostics.sh @@ -0,0 +1,276 @@ +#!/usr/bin/env bash + +# update for posix shell? + +set -e + +[ $(id -u) -eq 0 ] || exec sudo -E $0 $@ + +DIAGCMD=${1:-gather-upload-confirm} + +DIAGPROG=${DIAGPROG:-k3s} +BUCKET_NAME=${BUCKET_NAME:-"$DIAGPROG-diagnostic-logs"} + +bin=/var/lib/rancher/$DIAGPROG/data/current/bin/ +if [ -d $bin ]; then + export PATH=$PATH:$bin:$bin/aux +else + for bin in /var/lib/rancher/k3s/data/**/bin/; do + [ -d $bin ] && export PATH=$PATH:$bin:$bin/aux + done +fi + +PUBKEY=${PUBKEY:-'-----BEGIN PUBLIC KEY----- +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA1SlmOKCafhG5EzqJHWnT +cEupADJ/2WgbU2PgvTG9TlbaoVyiB5AX6pGFy9hasEJtscmngLvpgY+65te0cJBo +WJ+CMa3nTFXmiX+PGbrBhWMGT5bdM9Lhx5pKvkoaHzL1nNvN/DMeusGqyIdJr3gk +1wlNHr0bZYjlUOvJ3c+X0uIyjX5y0JTjaF5AcbBMlz//zdf7beToPlPuKIlz8FZd +ff4h6dKBYpOnqJW2NBxwICD8ZVokPRRMSZvSY3Mr7HZL1gDoCkOvCsWml27xB0S6 +Z6Ib8zB8PFCVWtMZxzcj7ae4tI79OHmaFkEEBAqkBNNU/9S+J0F5tz0caVVnZ+j1 +fy13JKIp75vwuDxGgfaru8012QM9zLwXQOcYcHLkLbaTJJ4HpMLC/v0R7TahlLVw +3F1OtQrhQH5PFNtCecpk8SNMgFhYyuCAuWGoai3BtYMNiKFbvuakFSq/XMLFUZS9 +T89FaJF2S9liz3VFfCUapBFoD4rZkFCbNufhypwnSVq6MRe1k9V5EaYIsUpfJs33 +mpKDVuU/yWwYM+bnlJYo9Sn1QcnjqxRVhUePIActoQ0s9b1CA9NpbqTRiSn7Qxx5 +dcnKK+f2NUEdQroCDeUxe2dBLfAvKTCM+c4VCEt2o2d9poSwPytd4K9VdDfiUor+ +6u2c2QnLeIcdfRM4j7SmxM0CAwEAAQ== +-----END PUBLIC KEY-----'} + +gen-uuid() { + uuid=$(uuidgen 2>/dev/null) + [ -z "$uuid" ] && uuid=$(cat /proc/sys/kernel/random/uuid 2>/dev/null) + [ -z "$uuid" ] && uuid=$(od -x /dev/urandom | head -1 | awk '{OFS="-"; srand($6); sub(/./,"4",$5); sub(/./,substr("89ab",rand()*4,1),$6); print $2$3,$4,$5,$6,$7$8$9}') + if [ -z "$uuid" ]; then + echo "Unable to generate UUID" >&2 + return 1 + fi + tr '[:lower:]' '[:upper:]' <<< "$uuid" +} +echo setup $DIAGUUID +UUID=${UUID:-$(gen-uuid)} +if [ -z "$UUID" ]; then + echo "UUID is not set and could not be created" >&2 + exit 1 +fi + +no-cleanup() { + echo + echo "Skipping cleanup for $DIAGDIR" +} + +cleanup() { + exit_code=$? + set +e +x +v + trap - EXIT INT + if [ -n "$DIAGDIR" ] && [ -d "$DIAGDIR" ]; then + rm -rf $DIAGDIR $DIAGDIR.* + fi + exit $exit_code +} + +setup_diagdir() { + if [ -z "$DIAGDIR" ]; then + DIAGDIR=$(readlink -m $(mktemp -d ${TMPDIR:-/tmp}/$DIAGPROG-diagnostics-$UUID-XXXXXXXX)) + trap cleanup INT + fi + trap no-cleanup EXIT + + set +e +x +v + echo "Diagnostics location: $DIAGDIR" +} + +remove_empty() { + if [ -f "$1" ] && [ ! -s "$1" ]; then + rm "$1" + fi +} + +run_cmd() { + cmd="$@" + cmd=${cmd//-/} + cmd=${cmd// /-} + cmd=${cmd//\//_} + logCmdFile="$LOGDIR/$cmd.cmd.txt" + logOutFile="$LOGDIR/$cmd.txt" + logErrFile="$LOGDIR/$cmd.err.txt" + + if [ -f "$logCmdFile" ]; then + echo "Error already ran: $@" >&2 + return 1 + fi + + echo "Gathering command: $@" + echo "$@" >"$logCmdFile" + $@ >"$logOutFile" 2>"$logErrFile" + remove_empty "$logOutFile" + remove_empty "$logErrFile" + return 0 +} + +copy() { + from=$1 + to=${2:-$1} + to=${to//\//_} + to="$LOGDIR/$to" + + if [ ! -e "$from" ]; then + echo "Skipping copy, does not exist: $from" + return 1 + fi + echo "Copying: $from" + cp --recursive --dereference $from $to +} + +setup_logs() { + export LOGDIR=$DIAGDIR/$1 + mkdir -p $LOGDIR + echo + echo "Using subdirectory: $1" +} + +log_system() { + setup_logs system + + copy /etc/os-release + run_cmd sysctl -a + run_cmd uname -a + run_cmd ps uax + run_cmd dmesg + run_cmd id + run_cmd mount + run_cmd df -h + run_cmd ifconfig -a + run_cmd netstat -ln + run_cmd netstat -nr + run_cmd lsof -n -P -p $(pgrep -o $DIAGPROG) + run_cmd iptables -L + run_cmd iptables -S + run_cmd hostname -f +} + +log_prog() { + setup_logs $DIAGPROG + + run_cmd $DIAGPROG --version + run_cmd $DIAGPROG check-config + for log in /var/log/$DIAGPROG*.log; do + copy $log + done + if command -v journalctl >/dev/null 2>&1; then + for unit in $(journalctl --field _SYSTEMD_UNIT | grep "$DIAGPROG"); do + run_cmd journalctl --unit "$unit" --no-pager + done + fi + copy "/var/lib/rancher/$DIAGPROG/agent/containerd/containerd.log" + + # log cert openssl data? +} + +log_kube() { + setup_logs kube + + copy /var/log/pods # copies all pod logs + run_cmd command -v kubectl + run_cmd kubectl version + run_cmd kubectl config get-contexts + run_cmd kubectl config current-context + run_cmd kubectl cluster-info dump + run_cmd kubectl get namespaces + run_cmd kubectl get nodes + run_cmd kubectl describe nodes + run_cmd kubectl describe pods --all-namespaces + run_cmd kubectl describe services --all-namespaces + run_cmd kubectl describe daemonset --all-namespaces + run_cmd kubectl describe deployments --all-namespaces + run_cmd kubectl describe replicaset --all-namespaces + run_cmd kubectl describe storageclass,pv,pvc +} + +contains() { + [ -z "${1##*$2*}" ] +} + +gather() { + log_system + log_prog + log_kube +} + +confirm() { + local def=${2:-'N'} + local prompt='(y/N)' + if [ "$def" = 'Y' ]; then + prompt='(Y/n)' + fi + echo + read -p "$1 $prompt: " -n 1 input + echo + if [ "$(tr '[:lower:]' '[:upper:]' <<<"$input")" = 'Y' ]; then + return 0 + fi + if [ -z "$input" ] && [ "$def" = 'Y' ]; then + return 0 + fi + return 1 +} + +upload() { + echo + echo "Prepare upload of $DIAGDIR" + + if contains "$DIAGCMD" confirm && ! confirm "Perform upload?"; then + return 1 + fi + trap cleanup EXIT + + local salt=${salt:-$(openssl rand -hex 8)} + local key=${key:-$(openssl rand -hex 32)} + local iv=${iv:-$(openssl rand -hex 16)} + local base=$(basename $DIAGDIR) + local dir=$(dirname $DIAGDIR) + local tar=${TMPDIR:-/tmp}/$base.tar.gz + + echo + echo "Creating $tar" + + tar -c -z -C $dir $base | \ + openssl enc -aes-256-cbc -S $salt -K $key -iv $iv -in /dev/stdin -out $DIAGDIR.logs.tar.gz.enc + + cat >$DIAGDIR.meta </dev/null 2>&1; then + echo "gsutil command not found" >&2 + echo "hint: pip install gsutil" >&2 + exit 1 +fi + +UUID=${1:-} +if [ -z "$UUID" ]; then + echo "ERROR: should pass UUID as first arg" + exit 1 +fi + +PRIVATE_PEM=${PRIVATE_PEM:-'diags.private.pem'} +PRIVATE_KEY=${PRIVATE_KEY:-'diags.private.key'} + +if [ ! -f "$PRIVATE_PEM" ]; then + echo "WARNING: PRIVATE_PEM $PRIVATE_PEM not found" >&2 +fi +if [ ! -f "$PRIVATE_KEY" ]; then + echo "WARNING: PRIVATE_KEY $PRIVATE_KEY not found (PRIVATE_PEM password)" >&2 +fi + +DIAGPROG=${DIAGPROG:-'k3s'} +BUCKET_NAME=${BUCKET_NAME:-"$DIAGPROG-diagnostic-logs"} + +decrypt() { + if [ -z "$1" ]; then + echo "ERROR: decrypt param undefined" >&2 + return 1 + fi + + if [ ! -f "$1.tar.gz" ]; then + echo "ERROR: $1.tar.gz does not exist" >&2 + return 1 + fi + + tar xzf "$1.tar.gz" + if [ ! -f "$1.logs.tar.gz.enc" ]; then + echo "ERROR: encrypted logs file $1.logs.tar.gz.enc does not exist" >&2 + return 1 + fi + + if [ -f "$1.meta.enc" ]; then + openssl rsautl -decrypt -passin "file:$PRIVATE_KEY" -inkey "$PRIVATE_PEM" -in "$1.meta.enc" -out "$1.meta" + fi + + local salt="$salt" + local key="$key" + local iv="$iv" + + if [ -f "$1.meta" ]; then + salt=$(grep 'salt=' $1.meta | cut -f2 -d=) + key=$(grep 'key=' $1.meta | cut -f2 -d=) + iv=$(grep 'iv=' $1.meta | cut -f2 -d=) + fi + + if [ -z "$salt" ] || [ -z "$key" ] || [ -z "$iv" ]; then + echo "$1: Missing decryption metadata" >&2 + echo + return 1 + fi + + if openssl enc -d -aes-256-cbc -in "$1.logs.tar.gz.enc" -out /dev/stdout -S "$salt" -K "$key" -iv "$iv" | tar xzf -; then + rm "$1".* + return 0 + fi + + echo "$1: Error decrypting" >&2 + echo + return 1 +} + +fetch() { + for url in $@; do + local log=$(basename "$url" | cut -f1 -d.) + echo "Downloading $log" + gsutil cp $url $log.tar.gz + decrypt $log + done +} + +{ + fetch $(gsutil ls "gs://$BUCKET_NAME/*$UUID*") +}