2018-04-16 16:31:44 +00:00
#!/usr/bin/env bash
2016-10-11 08:26:37 +00:00
# Copyright 2016 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script performs disaster recovery of etcd from the backup data.
# Assumptions:
# - backup was done using etcdctl command:
# a) in case of etcd2
# $ etcdctl backup --data-dir=<dir>
# produced .snap and .wal files
# b) in case of etcd3
# $ etcdctl --endpoints=<address> snapshot save
# produced .db file
2018-01-31 19:16:39 +00:00
# - version.txt file is in the current directory (if it isn't it will be
2016-10-11 08:26:37 +00:00
# defaulted to "2.2.1/etcd2"). Based on this file, the script will
# decide to which version we are restoring (procedures are different
# for etcd2 and etcd3).
# - in case of etcd2 - *.snap and *.wal files are in current directory
# - in case of etcd3 - *.db file is in the current directory
# - the script is run as root
2016-10-28 06:50:04 +00:00
# - for event etcd, we only support clearing it - to do it, you need to
# set RESET_EVENT_ETCD=true env var.
2016-10-11 08:26:37 +00:00
set -o errexit
set -o nounset
set -o pipefail
# Version file contains information about current version in the format:
# <etcd binary version>/<etcd api mode> (e.g. "3.0.12/etcd3").
#
# If the file doesn't exist we assume "2.2.1/etcd2" configuration is
# the current one and create a file with such configuration.
# The restore procedure is chosen based on this information.
VERSION_FILE = "version.txt"
2016-10-28 06:50:04 +00:00
# Make it possible to overwrite version file (or default version)
# with VERSION_CONTENTS env var.
if [ -n " ${ VERSION_CONTENTS :- } " ] ; then
echo " ${ VERSION_CONTENTS } " > " ${ VERSION_FILE } "
fi
2016-10-11 08:26:37 +00:00
if [ ! -f " ${ VERSION_FILE } " ] ; then
echo "2.2.1/etcd2" > " ${ VERSION_FILE } "
fi
VERSION_CONTENTS = " $( cat ${ VERSION_FILE } ) "
ETCD_VERSION = " $( echo $VERSION_CONTENTS | cut -d '/' -f 1) "
ETCD_API = " $( echo $VERSION_CONTENTS | cut -d '/' -f 2) "
# Name is used only in case of etcd3 mode, to appropriate set the metadata
# for the etcd data.
# NOTE: NAME HAS TO BE EQUAL TO WHAT WE USE IN --name flag when starting etcd.
NAME = " ${ NAME :- etcd - $( hostname) } "
2017-12-01 11:48:35 +00:00
INITIAL_CLUSTER = " ${ INITIAL_CLUSTER :- ${ NAME } =http : //localhost : 2380 } "
INITIAL_ADVERTISE_PEER_URLS = " ${ INITIAL_ADVERTISE_PEER_URLS :- http : //localhost : 2380 } "
2016-10-11 08:26:37 +00:00
# Port on which etcd is exposed.
etcd_port = 2379
event_etcd_port = 4002
# Wait until both etcd instances are up
wait_for_etcd_up( ) {
port = $1
# TODO: As of 3.0.x etcd versions, all 2.* and 3.* versions return
# {"health": "true"} on /health endpoint in healthy case.
# However, we should come with a regex for it to avoid future break.
health_ok = "{\"health\": \"true\"}"
for i in $( seq 120) ; do
# TODO: Is it enough to look into /health endpoint?
health = $( curl --silent http://127.0.0.1:${ port } /health)
if [ " ${ health } " = = " ${ health_ok } " ] ; then
return 0
fi
sleep 1
done
return 1
}
# Wait until apiserver is up.
wait_for_cluster_healthy( ) {
for i in $( seq 120) ; do
cs_status = $( kubectl get componentstatuses -o template --template= '{{range .items}}{{with index .conditions 0}}{{.type}}:{{.status}}{{end}}{{"\n"}}{{end}}' ) || true
componentstatuses = $( echo " ${ cs_status } " | grep -c 'Healthy:' ) || true
healthy = $( echo " ${ cs_status } " | grep -c 'Healthy:True' ) || true
if [ " ${ componentstatuses } " -eq " ${ healthy } " ] ; then
return 0
fi
sleep 1
done
return 1
}
# Wait until etcd and apiserver pods are down.
wait_for_etcd_and_apiserver_down( ) {
for i in $( seq 120) ; do
2017-12-01 11:48:35 +00:00
etcd = $( docker ps | grep etcd-server | wc -l)
2016-10-11 08:26:37 +00:00
apiserver = $( docker ps | grep apiserver | wc -l)
# TODO: Theoretically it is possible, that apiserver and or etcd
# are currently down, but Kubelet is now restarting them and they
# will reappear again. We should avoid it.
if [ " ${ etcd } " -eq "0" -a " ${ apiserver } " -eq "0" ] ; then
return 0
fi
sleep 1
done
return 1
}
# Move the manifest files to stop etcd and kube-apiserver
# while we swap the data out from under them.
MANIFEST_DIR = "/etc/kubernetes/manifests"
MANIFEST_BACKUP_DIR = "/etc/kubernetes/manifests-backups"
mkdir -p " ${ MANIFEST_BACKUP_DIR } "
2016-12-21 14:10:47 +00:00
echo " Moving etcd(s) & apiserver manifest files to ${ MANIFEST_BACKUP_DIR } "
2016-10-11 08:26:37 +00:00
# If those files were already moved (e.g. during previous
# try of backup) don't fail on it.
mv " ${ MANIFEST_DIR } /kube-apiserver.manifest " " ${ MANIFEST_BACKUP_DIR } " || true
mv " ${ MANIFEST_DIR } /etcd.manifest " " ${ MANIFEST_BACKUP_DIR } " || true
mv " ${ MANIFEST_DIR } /etcd-events.manifest " " ${ MANIFEST_BACKUP_DIR } " || true
# Wait for the pods to be stopped
echo "Waiting for etcd and kube-apiserver to be down"
if ! wait_for_etcd_and_apiserver_down; then
# Couldn't kill etcd and apiserver.
echo "Downing etcd and apiserver failed"
exit 1
fi
2017-12-01 11:48:35 +00:00
read -rsp $'Press enter when all etcd instances are down...\n'
2016-10-11 08:26:37 +00:00
# Create the sort of directory structure that etcd expects.
# If this directory already exists, remove it.
BACKUP_DIR = "/var/tmp/backup"
rm -rf " ${ BACKUP_DIR } "
if [ " ${ ETCD_API } " = = "etcd2" ] ; then
echo "Preparing etcd backup data for restore"
# In v2 mode, we simply copy both snap and wal files to a newly created
# directory. After that, we start etcd with --force-new-cluster option
# that (according to the etcd documentation) is required to recover from
# a backup.
2016-12-21 14:10:47 +00:00
echo " Copying data to ${ BACKUP_DIR } and restoring there "
2016-10-11 08:26:37 +00:00
mkdir -p " ${ BACKUP_DIR } /member/snap "
mkdir -p " ${ BACKUP_DIR } /member/wal "
# If the cluster is relatively new, there can be no .snap file.
mv *.snap " ${ BACKUP_DIR } /member/snap/ " || true
mv *.wal " ${ BACKUP_DIR } /member/wal/ "
# TODO(jsz): This won't work with HA setups (e.g. do we need to set --name flag)?
2016-12-21 14:10:47 +00:00
echo " Starting etcd ${ ETCD_VERSION } to restore data "
2016-10-11 08:26:37 +00:00
image = $( docker run -d -v ${ BACKUP_DIR } :/var/etcd/data \
--net= host -p ${ etcd_port } :${ etcd_port } \
Switch to k8s.gcr.io vanity domain
This is the 2nd attempt. The previous was reverted while we figured out
the regional mirrors (oops).
New plan: k8s.gcr.io is a read-only facade that auto-detects your source
region (us, eu, or asia for now) and pulls from the closest. To publish
an image, push k8s-staging.gcr.io and it will be synced to the regionals
automatically (similar to today). For now the staging is an alias to
gcr.io/google_containers (the legacy URL).
When we move off of google-owned projects (working on it), then we just
do a one-time sync, and change the google-internal config, and nobody
outside should notice.
We can, in parallel, change the auto-sync into a manual sync - send a PR
to "promote" something from staging, and a bot activates it. Nice and
visible, easy to keep track of.
2018-01-17 19:36:53 +00:00
" k8s.gcr.io/etcd: ${ ETCD_VERSION } " /bin/sh -c \
2016-10-11 08:26:37 +00:00
"/usr/local/bin/etcd --data-dir /var/etcd/data --force-new-cluster" )
if [ " $? " -ne "0" ] ; then
echo "Docker container didn't started correctly"
exit 1
fi
2016-12-21 14:10:47 +00:00
echo " Container ${ image } created, waiting for etcd to report as healthy "
2016-10-11 08:26:37 +00:00
if ! wait_for_etcd_up " ${ etcd_port } " ; then
echo "Etcd didn't come back correctly"
exit 1
fi
# Kill that etcd instance.
2016-12-21 14:10:47 +00:00
echo " Etcd healthy - killing ${ image } container "
2016-10-11 08:26:37 +00:00
docker kill " ${ image } "
elif [ " ${ ETCD_API } " = = "etcd3" ] ; then
echo "Preparing etcd snapshot for restore"
mkdir -p " ${ BACKUP_DIR } "
2016-12-21 14:10:47 +00:00
echo " Copying data to ${ BACKUP_DIR } and restoring there "
2016-10-11 08:26:37 +00:00
number_files = $( find . -maxdepth 1 -type f -name "*.db" | wc -l)
if [ " ${ number_files } " -ne "1" ] ; then
echo "Incorrect number of *.db files - expected 1"
exit 1
fi
mv *.db " ${ BACKUP_DIR } / "
snapshot = " $( ls ${ BACKUP_DIR } ) "
# Run etcdctl snapshot restore command and wait until it is finished.
# setting with --name in the etcd manifest file and then it seems to work.
2017-12-01 11:48:35 +00:00
docker run -v ${ BACKUP_DIR } :/var/tmp/backup --env ETCDCTL_API = 3 \
Switch to k8s.gcr.io vanity domain
This is the 2nd attempt. The previous was reverted while we figured out
the regional mirrors (oops).
New plan: k8s.gcr.io is a read-only facade that auto-detects your source
region (us, eu, or asia for now) and pulls from the closest. To publish
an image, push k8s-staging.gcr.io and it will be synced to the regionals
automatically (similar to today). For now the staging is an alias to
gcr.io/google_containers (the legacy URL).
When we move off of google-owned projects (working on it), then we just
do a one-time sync, and change the google-internal config, and nobody
outside should notice.
We can, in parallel, change the auto-sync into a manual sync - send a PR
to "promote" something from staging, and a bot activates it. Nice and
visible, easy to keep track of.
2018-01-17 19:36:53 +00:00
" k8s.gcr.io/etcd: ${ ETCD_VERSION } " /bin/sh -c \
2017-12-01 11:48:35 +00:00
" /usr/local/bin/etcdctl snapshot restore ${ BACKUP_DIR } / ${ snapshot } --name ${ NAME } --initial-cluster ${ INITIAL_CLUSTER } --initial-advertise-peer-urls ${ INITIAL_ADVERTISE_PEER_URLS } ; mv / ${ NAME } .etcd/member /var/tmp/backup/ "
2016-10-11 08:26:37 +00:00
if [ " $? " -ne "0" ] ; then
echo "Docker container didn't started correctly"
exit 1
fi
rm -f " ${ BACKUP_DIR } / ${ snapshot } "
fi
# Also copy version.txt file.
cp " ${ VERSION_FILE } " " ${ BACKUP_DIR } "
2018-03-27 17:10:55 +00:00
export MNT_DISK = "/mnt/disks/master-pd"
2016-10-11 08:26:37 +00:00
# Save the corrupted data (clean directory if it is already non-empty).
rm -rf " ${ MNT_DISK } /var/etcd-corrupted "
mkdir -p " ${ MNT_DISK } /var/etcd-corrupted "
2016-12-21 14:10:47 +00:00
echo " Saving corrupted data to ${ MNT_DISK } /var/etcd-corrupted "
2016-10-11 08:26:37 +00:00
mv /var/etcd/data " ${ MNT_DISK } /var/etcd-corrupted "
2018-02-09 06:53:53 +00:00
# Replace the corrupted data dir with the restored data.
2016-12-21 14:10:47 +00:00
echo "Copying restored data to /var/etcd/data"
2016-10-11 08:26:37 +00:00
mv " ${ BACKUP_DIR } " /var/etcd/data
2016-10-28 06:50:04 +00:00
if [ " ${ RESET_EVENT_ETCD :- } " = = "true" ] ; then
2016-12-21 14:10:47 +00:00
echo "Removing event-etcd corrupted data"
2016-10-28 06:50:04 +00:00
EVENTS_CORRUPTED_DIR = " ${ MNT_DISK } /var/etcd-events-corrupted "
# Save the corrupted data (clean directory if it is already non-empty).
rm -rf " ${ EVENTS_CORRUPTED_DIR } "
mkdir -p " ${ EVENTS_CORRUPTED_DIR } "
mv /var/etcd/data-events " ${ EVENTS_CORRUPTED_DIR } "
fi
2016-10-11 08:26:37 +00:00
# Start etcd and kube-apiserver again.
echo "Restarting etcd and apiserver from restored snapshot"
mv " ${ MANIFEST_BACKUP_DIR } " /* " ${ MANIFEST_DIR } / "
rm -rf " ${ MANIFEST_BACKUP_DIR } "
# Verify that etcd is back.
echo "Waiting for etcd to come back"
if ! wait_for_etcd_up " ${ etcd_port } " ; then
echo "Etcd didn't come back correctly"
exit 1
fi
# Verify that event etcd is back.
echo "Waiting for event etcd to come back"
if ! wait_for_etcd_up " ${ event_etcd_port } " ; then
echo "Event etcd didn't come back correctly"
exit 1
fi
# Verify that kube-apiserver is back and cluster is healthy.
echo "Waiting for apiserver to come back"
if ! wait_for_cluster_healthy; then
echo "Apiserver didn't come back correctly"
exit 1
fi
echo "Cluster successfully restored!"