mirror of https://github.com/k3s-io/k3s
Merge pull request #34696 from wojtek-t/restore_etcd_from_backup
Automatic merge from submit-queue Restore etcd from backup The new script performs disaster recovery of etcd from backup. https://github.com/kubernetes/kubernetes/issues/20504 @alex-mohr @lavalamp @kubernetes/goog-gkepull/6/head
commit
b47d862aea
|
@ -0,0 +1,234 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2016 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script performs disaster recovery of etcd from the backup data.
|
||||
# Assumptions:
|
||||
# - backup was done using etcdctl command:
|
||||
# a) in case of etcd2
|
||||
# $ etcdctl backup --data-dir=<dir>
|
||||
# produced .snap and .wal files
|
||||
# b) in case of etcd3
|
||||
# $ etcdctl --endpoints=<address> snapshot save
|
||||
# produced .db file
|
||||
# - version.txt file is in the current directy (if it isn't it will be
|
||||
# defaulted to "2.2.1/etcd2"). Based on this file, the script will
|
||||
# decide to which version we are restoring (procedures are different
|
||||
# for etcd2 and etcd3).
|
||||
# - in case of etcd2 - *.snap and *.wal files are in current directory
|
||||
# - in case of etcd3 - *.db file is in the current directory
|
||||
# - the script is run as root
|
||||
#
|
||||
# The script doesn't support restoring event etcd.
|
||||
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
# Version file contains information about current version in the format:
|
||||
# <etcd binary version>/<etcd api mode> (e.g. "3.0.12/etcd3").
|
||||
#
|
||||
# If the file doesn't exist we assume "2.2.1/etcd2" configuration is
|
||||
# the current one and create a file with such configuration.
|
||||
# The restore procedure is chosen based on this information.
|
||||
VERSION_FILE="version.txt"
|
||||
if [ ! -f "${VERSION_FILE}" ]; then
|
||||
echo "2.2.1/etcd2" > "${VERSION_FILE}"
|
||||
fi
|
||||
VERSION_CONTENTS="$(cat ${VERSION_FILE})"
|
||||
ETCD_VERSION="$(echo $VERSION_CONTENTS | cut -d '/' -f 1)"
|
||||
ETCD_API="$(echo $VERSION_CONTENTS | cut -d '/' -f 2)"
|
||||
|
||||
# Name is used only in case of etcd3 mode, to appropriate set the metadata
|
||||
# for the etcd data.
|
||||
# NOTE: NAME HAS TO BE EQUAL TO WHAT WE USE IN --name flag when starting etcd.
|
||||
NAME="${NAME:-etcd-$(hostname)}"
|
||||
|
||||
# Port on which etcd is exposed.
|
||||
etcd_port=2379
|
||||
event_etcd_port=4002
|
||||
|
||||
# Wait until both etcd instances are up
|
||||
wait_for_etcd_up() {
|
||||
port=$1
|
||||
# TODO: As of 3.0.x etcd versions, all 2.* and 3.* versions return
|
||||
# {"health": "true"} on /health endpoint in healthy case.
|
||||
# However, we should come with a regex for it to avoid future break.
|
||||
health_ok="{\"health\": \"true\"}"
|
||||
for i in $(seq 120); do
|
||||
# TODO: Is it enough to look into /health endpoint?
|
||||
health=$(curl --silent http://127.0.0.1:${port}/health)
|
||||
if [ "${health}" == "${health_ok}" ]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Wait until apiserver is up.
|
||||
wait_for_cluster_healthy() {
|
||||
for i in $(seq 120); do
|
||||
cs_status=$(kubectl get componentstatuses -o template --template='{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}}{{end}}{{"\n"}}{{end}}') || true
|
||||
componentstatuses=$(echo "${cs_status}" | grep -c 'Healthy:') || true
|
||||
healthy=$(echo "${cs_status}" | grep -c 'Healthy:True') || true
|
||||
if [ "${componentstatuses}" -eq "${healthy}" ]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Wait until etcd and apiserver pods are down.
|
||||
wait_for_etcd_and_apiserver_down() {
|
||||
for i in $(seq 120); do
|
||||
etcd=$(docker ps | grep etcd | grep -v etcd-empty-dir | wc -l)
|
||||
apiserver=$(docker ps | grep apiserver | wc -l)
|
||||
# TODO: Theoretically it is possible, that apiserver and or etcd
|
||||
# are currently down, but Kubelet is now restarting them and they
|
||||
# will reappear again. We should avoid it.
|
||||
if [ "${etcd}" -eq "0" -a "${apiserver}" -eq "0" ]; then
|
||||
return 0
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Move the manifest files to stop etcd and kube-apiserver
|
||||
# while we swap the data out from under them.
|
||||
MANIFEST_DIR="/etc/kubernetes/manifests"
|
||||
MANIFEST_BACKUP_DIR="/etc/kubernetes/manifests-backups"
|
||||
mkdir -p "${MANIFEST_BACKUP_DIR}"
|
||||
# If those files were already moved (e.g. during previous
|
||||
# try of backup) don't fail on it.
|
||||
mv "${MANIFEST_DIR}/kube-apiserver.manifest" "${MANIFEST_BACKUP_DIR}" || true
|
||||
mv "${MANIFEST_DIR}/etcd.manifest" "${MANIFEST_BACKUP_DIR}" || true
|
||||
mv "${MANIFEST_DIR}/etcd-events.manifest" "${MANIFEST_BACKUP_DIR}" || true
|
||||
|
||||
# Wait for the pods to be stopped
|
||||
echo "Waiting for etcd and kube-apiserver to be down"
|
||||
if ! wait_for_etcd_and_apiserver_down; then
|
||||
# Couldn't kill etcd and apiserver.
|
||||
echo "Downing etcd and apiserver failed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create the sort of directory structure that etcd expects.
|
||||
# If this directory already exists, remove it.
|
||||
BACKUP_DIR="/var/tmp/backup"
|
||||
rm -rf "${BACKUP_DIR}"
|
||||
if [ "${ETCD_API}" == "etcd2" ]; then
|
||||
echo "Preparing etcd backup data for restore"
|
||||
# In v2 mode, we simply copy both snap and wal files to a newly created
|
||||
# directory. After that, we start etcd with --force-new-cluster option
|
||||
# that (according to the etcd documentation) is required to recover from
|
||||
# a backup.
|
||||
mkdir -p "${BACKUP_DIR}/member/snap"
|
||||
mkdir -p "${BACKUP_DIR}/member/wal"
|
||||
# If the cluster is relatively new, there can be no .snap file.
|
||||
mv *.snap "${BACKUP_DIR}/member/snap/" || true
|
||||
mv *.wal "${BACKUP_DIR}/member/wal/"
|
||||
|
||||
# TODO(jsz): This won't work with HA setups (e.g. do we need to set --name flag)?
|
||||
image=$(docker run -d -v ${BACKUP_DIR}:/var/etcd/data \
|
||||
--net=host -p ${etcd_port}:${etcd_port} \
|
||||
"gcr.io/google_containers/etcd:${ETCD_VERSION}" /bin/sh -c \
|
||||
"/usr/local/bin/etcd --data-dir /var/etcd/data --force-new-cluster")
|
||||
if [ "$?" -ne "0" ]; then
|
||||
echo "Docker container didn't started correctly"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! wait_for_etcd_up "${etcd_port}"; then
|
||||
echo "Etcd didn't come back correctly"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Kill that etcd instance.
|
||||
docker kill "${image}"
|
||||
elif [ "${ETCD_API}" == "etcd3" ]; then
|
||||
echo "Preparing etcd snapshot for restore"
|
||||
mkdir -p "${BACKUP_DIR}"
|
||||
number_files=$(find . -maxdepth 1 -type f -name "*.db" | wc -l)
|
||||
if [ "${number_files}" -ne "1" ]; then
|
||||
echo "Incorrect number of *.db files - expected 1"
|
||||
exit 1
|
||||
fi
|
||||
mv *.db "${BACKUP_DIR}/"
|
||||
snapshot="$(ls ${BACKUP_DIR})"
|
||||
|
||||
# Run etcdctl snapshot restore command and wait until it is finished.
|
||||
# setting with --name in the etcd manifest file and then it seems to work.
|
||||
# TODO(jsz): This command may not work in case of HA.
|
||||
image=$(docker run -d -v ${BACKUP_DIR}:/var/tmp/backup --env ETCDCTL_API=3 \
|
||||
"gcr.io/google_containers/etcd:${ETCD_VERSION}" /bin/sh -c \
|
||||
"/usr/local/bin/etcdctl snapshot restore ${BACKUP_DIR}/${snapshot} --name ${NAME} --initial-cluster ${NAME}=http://localhost:2380; mv /${NAME}.etcd/member /var/tmp/backup/")
|
||||
if [ "$?" -ne "0" ]; then
|
||||
echo "Docker container didn't started correctly"
|
||||
exit 1
|
||||
fi
|
||||
echo "Prepare container exit code: $(docker wait ${image})"
|
||||
|
||||
rm -f "${BACKUP_DIR}/${snapshot}"
|
||||
fi
|
||||
# Also copy version.txt file.
|
||||
cp "${VERSION_FILE}" "${BACKUP_DIR}"
|
||||
|
||||
# Find out if we are running GCI vs CVM.
|
||||
export CVM=$(curl "http://metadata/computeMetadata/v1/instance/attributes/" -H "Metadata-Flavor: Google" |& grep -q gci; echo $?)
|
||||
if [[ "$CVM" == "1" ]]; then
|
||||
export MNT_DISK="/mnt/master-pd"
|
||||
else
|
||||
export MNT_DISK="/mnt/disks/master-pd"
|
||||
fi
|
||||
|
||||
# Save the corrupted data (clean directory if it is already non-empty).
|
||||
rm -rf "${MNT_DISK}/var/etcd-corrupted"
|
||||
mkdir -p "${MNT_DISK}/var/etcd-corrupted"
|
||||
mv /var/etcd/data "${MNT_DISK}/var/etcd-corrupted"
|
||||
|
||||
# Replace the corrupted data dir with the resotred data.
|
||||
mv "${BACKUP_DIR}" /var/etcd/data
|
||||
|
||||
# Start etcd and kube-apiserver again.
|
||||
echo "Restarting etcd and apiserver from restored snapshot"
|
||||
mv "${MANIFEST_BACKUP_DIR}"/* "${MANIFEST_DIR}/"
|
||||
rm -rf "${MANIFEST_BACKUP_DIR}"
|
||||
|
||||
# Verify that etcd is back.
|
||||
echo "Waiting for etcd to come back"
|
||||
if ! wait_for_etcd_up "${etcd_port}"; then
|
||||
echo "Etcd didn't come back correctly"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify that event etcd is back.
|
||||
echo "Waiting for event etcd to come back"
|
||||
if ! wait_for_etcd_up "${event_etcd_port}"; then
|
||||
echo "Event etcd didn't come back correctly"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify that kube-apiserver is back and cluster is healthy.
|
||||
echo "Waiting for apiserver to come back"
|
||||
if ! wait_for_cluster_healthy; then
|
||||
echo "Apiserver didn't come back correctly"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Cluster successfully restored!"
|
Loading…
Reference in New Issue