Merge pull request #30500 from wojtek-t/etcd_migration

Automatic merge from submit-queue

Support for etcd migration

@xiang90 @timothysc @hongchaodeng
pull/6/head
Kubernetes Submit Queue 2016-08-19 00:34:06 -07:00 committed by GitHub
commit 1e09eb7949
8 changed files with 128 additions and 29 deletions

View File

@ -543,6 +543,7 @@ function prepare-etcd-manifest {
sed -i -e "s@{{ *cpulimit *}}@\"$4\"@g" "${temp_file}"
sed -i -e "s@{{ *hostname *}}@$host_name@g" "${temp_file}"
sed -i -e "s@{{ *etcd_cluster *}}@$etcd_cluster@g" "${temp_file}"
sed -i -e "s@{{ *storage_backend *}}@${STORAGE_BACKEND:-}@g" "${temp_file}"
sed -i -e "s@{{ *cluster_state *}}@$cluster_state@g" "${temp_file}"
# Replace the volume host path.
sed -i -e "s@/mnt/master-pd/var/etcd@/mnt/disks/master-pd/var/etcd@g" "${temp_file}"

View File

@ -429,6 +429,7 @@ prepare_etcd_manifest() {
sed -i -e "s@{{ *cpulimit *}}@\"$4\"@g" "${etcd_temp_file}"
sed -i -e "s@{{ *hostname *}}@$host_name@g" "${etcd_temp_file}"
sed -i -e "s@{{ *etcd_cluster *}}@$etcd_cluster@g" "${etcd_temp_file}"
sed -i -e "s@{{ *storage_backend *}}@${STORAGE_BACKEND:-}@g" "${temp_file}"
sed -i -e "s@{{ *cluster_state *}}@$cluster_state@g" "${etcd_temp_file}"
# Replace the volume host path
sed -i -e "s@/mnt/master-pd/var/etcd@/mnt/disks/master-pd/var/etcd@g" "${etcd_temp_file}"

View File

@ -16,4 +16,4 @@ FROM BASEIMAGE
MAINTAINER Dawn Chen <dawnchen@google.com>
EXPOSE 2379 2380 4001 7001
COPY etcd etcdctl /usr/local/bin/
COPY etcd etcdctl migrate-if-needed.sh /usr/local/bin/

View File

@ -0,0 +1,66 @@
#!/bin/sh
# Copyright 2016 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script performs data migration between etcd2 and etcd3 versions
# if needed.
# Expected usage of it is:
# ./migrate_if_needed <target-storage> <data-dir>
# It will look into the contents of file <data-dir>/version.txt to
# determine the current storage version (no file means etcd2).
set -o errexit
set -o nounset
if [ -z "${TARGET_STORAGE:-}" ]; then
echo "TARGET_USAGE variable unset - skipping migration"
exit 0
fi
if [ -z "${DATA_DIRECTORY:-}" ]; then
echo "DATA_DIRECTORY variable unset - skipping migration"
exit 0
fi
ETCDCTL="${ETCDCTL:-/usr/local/bin/etcdctl}"
VERSION_FILE="version.txt"
CURRENT_STORAGE='etcd2'
if [ -e "${DATA_DIRECTORY}/${VERSION_FILE}" ]; then
CURRENT_STORAGE="$(cat ${DATA_DIRECTORY}/${VERSION_FILE})"
fi
if [ "${CURRENT_STORAGE}" = "etcd2" -a "${TARGET_STORAGE}" = "etcd3" ]; then
# If directory doesn't exist or is empty, this means that there aren't any
# data for migration, which means we can skip this step.
if [ -d "${DATA_DIRECTORY}" ]; then
if [ "$(ls -A ${DATA_DIRECTORY})" ]; then
echo "Performing etcd2 -> etcd3 migration"
# TODO: Pass a correct transformer to handle TTLs.
echo "ETCDCTL_API=3 ${ETCDCTL} migrate --data-dir=${DATA_DIRECTORY}"
ETCDCTL_API=3 ${ETCDCTL} migrate --data-dir=${DATA_DIRECTORY}
fi
fi
fi
if [ "${CURRENT_STORAGE}" = "etcd3" -a "${TARGET_STORAGE}" = "etcd2" ]; then
echo "Performing etcd3 -> etcd2 migration"
# TODO: Implement rollback once this will be supported.
echo "etcd3 -> etcd2 downgrade is NOT supported."
fi
# Write current storage version to avoid future migrations.
# If directory doesn't exist, we need to create it first.
mkdir -p "${DATA_DIRECTORY}"
echo "${TARGET_STORAGE}" > "${DATA_DIRECTORY}/${VERSION_FILE}"

View File

@ -14,6 +14,7 @@
{% endfor -%}
{% set etcd_cluster = vars.etcd_cluster -%}
{% set cluster_state = vars.cluster_state -%}
{% set storage_backend = pillar.get('storage_backend', 'etcd2') -%}
{
"apiVersion": "v1",
@ -38,6 +39,14 @@
"-c",
"/usr/local/bin/etcd --name etcd-{{ hostname }} --listen-peer-urls http://{{ hostname }}:{{ server_port }} --initial-advertise-peer-urls http://{{ hostname }}:{{ server_port }} --advertise-client-urls http://127.0.0.1:{{ port }} --listen-client-urls http://127.0.0.1:{{ port }} --data-dir /var/etcd/data{{ suffix }} --initial-cluster-state {{ cluster_state }} --initial-cluster {{ etcd_cluster }} 1>>/var/log/etcd{{ suffix }}.log 2>&1"
],
"env": [
{ "name": "TARGET_STORAGE",
"value": "{{ storage_backend }}"
},
{ "name": "DATA_DIRECTORY",
"value": "/var/etcd/data{{ suffix }}"
}
],
"livenessProbe": {
"httpGet": {
"host": "127.0.0.1",
@ -47,26 +56,26 @@
"initialDelaySeconds": 15,
"timeoutSeconds": 15
},
"ports":[
"ports": [
{ "name": "serverport",
"containerPort": {{ server_port }},
"hostPort": {{ server_port }}
},{
"name": "clientport",
},
{ "name": "clientport",
"containerPort": {{ port }},
"hostPort": {{ port }}
}
],
"volumeMounts": [
{"name": "varetcd",
"mountPath": "/var/etcd",
"readOnly": false
{ "name": "varetcd",
"mountPath": "/var/etcd",
"readOnly": false
},
{"name": "varlogetcd",
"mountPath": "/var/log/etcd{{ suffix }}.log",
"readOnly": false
{ "name": "varlogetcd",
"mountPath": "/var/log/etcd{{ suffix }}.log",
"readOnly": false
}
]
]
}
],
"volumes":[

View File

@ -40,7 +40,7 @@ kube::etcd::start() {
fi
# Start etcd
ETCD_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t test-etcd.XXXXXX)
ETCD_DIR=${ETCD_DIR:-$(mktemp -d 2>/dev/null || mktemp -d -t test-etcd.XXXXXX)}
if [[ -d "${ARTIFACTS_DIR:-}" ]]; then
ETCD_LOGFILE="${ARTIFACTS_DIR}/etcd.$(uname -n).$(id -un).log.DEBUG.$(date +%Y%m%d-%H%M%S).$$"
else

View File

@ -32,6 +32,9 @@ KUBE_NEW_API_VERSION=${KUBE_NEW_API_VERSION:-"v1"}
KUBE_OLD_STORAGE_VERSIONS=${KUBE_OLD_STORAGE_VERSIONs:-""}
KUBE_NEW_STORAGE_VERSIONS=${KUBE_NEW_STORAGE_VERSIONs:-""}
STORAGE_BACKEND_ETCD2="etcd2"
STORAGE_BACKEND_ETCD3="etcd3"
KUBE_STORAGE_MEDIA_TYPE_JSON="application/json"
KUBE_STORAGE_MEDIA_TYPE_PROTOBUF="application/vnd.kubernetes.protobuf"
@ -43,13 +46,16 @@ API_HOST=${API_HOST:-127.0.0.1}
KUBE_API_VERSIONS=""
RUNTIME_CONFIG=""
ETCDCTL=$(which etcdctl)
KUBECTL="${KUBE_OUTPUT_HOSTBIN}/kubectl"
UPDATE_ETCD_OBJECTS_SCRIPT="${KUBE_ROOT}/cluster/update-storage-objects.sh"
function startApiServer() {
local storage_versions=${1:-""}
local storage_media_type=${2:-""}
local storage_backend=${1:-"${STORAGE_BACKEND_ETCD2}"}
local storage_versions=${2:-""}
local storage_media_type=${3:-""}
kube::log::status "Starting kube-apiserver with KUBE_API_VERSIONS: ${KUBE_API_VERSIONS}"
kube::log::status " and storage-backend: ${storage_backend}"
kube::log::status " and storage-media-type: ${storage_media_type}"
kube::log::status " and runtime-config: ${RUNTIME_CONFIG}"
kube::log::status " and storage-version overrides: ${storage_versions}"
@ -59,8 +65,9 @@ function startApiServer() {
--insecure-bind-address="${API_HOST}" \
--bind-address="${API_HOST}" \
--insecure-port="${API_PORT}" \
--storage-backend="${storage_backend}" \
--etcd-servers="http://${ETCD_HOST}:${ETCD_PORT}" \
--etcd-prefix="${ETCD_PREFIX}" \
--etcd-prefix="/${ETCD_PREFIX}" \
--runtime-config="${RUNTIME_CONFIG}" \
--cert-dir="${TMPDIR:-/tmp/}" \
--service-cluster-ip-range="10.0.0.0/24" \
@ -120,7 +127,7 @@ KUBE_NEW_STORAGE_VERSIONS="batch/v1,autoscaling/v1"
#######################################################
KUBE_API_VERSIONS="${KUBE_OLD_API_VERSION},${KUBE_NEW_API_VERSION}"
RUNTIME_CONFIG="api/all=false,api/${KUBE_OLD_API_VERSION}=true,api/${KUBE_NEW_API_VERSION}=true"
startApiServer ${KUBE_OLD_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_JSON}
startApiServer ${STORAGE_BACKEND_ETCD2} ${KUBE_OLD_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_JSON}
# Create object(s)
@ -145,14 +152,27 @@ killApiServer
#######################################################
# Step 2: Start a server which supports both the old and new api versions,
# Step 2: Perform etcd2 -> etcd migration.
# We always perform offline migration, so we need to stop etcd.
#######################################################
kube::etcd::stop
TARGET_STORAGE="etcd3" \
DATA_DIRECTORY="${ETCD_DIR}" \
ETCDCTL=$(which etcdctl) \
${KUBE_ROOT}/cluster/images/etcd/migrate-if-needed.sh
kube::etcd::start
#######################################################
# Step 3: Start a server which supports both the old and new api versions,
# but KUBE_NEW_API_VERSION is the latest (storage) version.
# Still use KUBE_STORAGE_MEDIA_TYPE_JSON for storage encoding.
#######################################################
KUBE_API_VERSIONS="${KUBE_NEW_API_VERSION},${KUBE_OLD_API_VERSION}"
RUNTIME_CONFIG="api/all=false,api/${KUBE_OLD_API_VERSION}=true,api/${KUBE_NEW_API_VERSION}=true"
startApiServer ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_JSON}
startApiServer ${STORAGE_BACKEND_ETCD3} ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_JSON}
# Update etcd objects, so that will now be stored in the new api version.
kube::log::status "Updating storage versions in etcd"
@ -167,14 +187,14 @@ for test in ${tests[@]}; do
new_storage_version=${test_data[5]}
kube::log::status "Verifying ${resource}/${namespace}/${name} has updated storage version ${new_storage_version} in etcd"
curl -s http://${ETCD_HOST}:${ETCD_PORT}/v2/keys/${ETCD_PREFIX}/${resource}/${namespace}/${name} | grep ${new_storage_version}
ETCDCTL_API=3 ${ETCDCTL} --endpoints="${ETCD_HOST}:${ETCD_PORT}" get "/${ETCD_PREFIX}/${resource}/${namespace}/${name}" | grep ${new_storage_version}
done
killApiServer
#######################################################
# Step 3 : Start a server which supports only the new api version.
# Step 4 : Start a server which supports only the new api version.
# However, change storage encoding to KUBE_STORAGE_MEDIA_TYPE_PROTOBUF.
#######################################################
@ -183,7 +203,7 @@ RUNTIME_CONFIG="api/all=false,api/${KUBE_NEW_API_VERSION}=true"
# This seems to reduce flakiness.
sleep 1
startApiServer ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_PROTOBUF}
startApiServer ${STORAGE_BACKEND_ETCD3} ${KUBE_NEW_STORAGE_VERSIONS} ${KUBE_STORAGE_MEDIA_TYPE_PROTOBUF}
for test in ${tests[@]}; do
IFS=',' read -ra test_data <<<"$test"

View File

@ -16,7 +16,9 @@ cluster/gce/configure-vm.sh: kubelet_api_servers: '${KUBELET_APISERVER}'
cluster/gce/gci/configure-helper.sh: reconcile_cidr="false"
cluster/gce/gci/configure-helper.sh: local api_servers="--master=https://${KUBERNETES_MASTER_NAME}"
cluster/gce/gci/configure-helper.sh: local reconcile_cidr="true"
cluster/gce/gci/configure-helper.sh: sed -i -e "s@{{ *storage_backend *}}@${STORAGE_BACKEND:-}@g" "${temp_file}"
cluster/gce/gci/configure-helper.sh: sed -i -e "s@{{pillar\['allow_privileged'\]}}@true@g" "${src_file}"
cluster/gce/trusty/configure-helper.sh: sed -i -e "s@{{ *storage_backend *}}@${STORAGE_BACKEND:-}@g" "${temp_file}"
cluster/gce/trusty/configure-helper.sh: sed -i -e "s@{{pillar\['allow_privileged'\]}}@true@g" "${src_file}"
cluster/gce/util.sh: local node_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
cluster/juju/layers/kubernetes/reactive/k8s.py: check_call(split(cmd.format(kubeconfig, cluster_name, server, ca)))
@ -37,10 +39,10 @@ cluster/photon-controller/util.sh: local cert_dir="/srv/kubernetes"
cluster/photon-controller/util.sh: node_name=${1}
cluster/rackspace/util.sh: local node_ip=$(nova show --minimal ${NODE_NAMES[$i]} \
cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest:{% set params = pillar['autoscaler_mig_config'] + " " + cloud_config -%}
cluster/saltbase/salt/etcd/etcd.manifest: "value": "{{ storage_backend }}"
cluster/saltbase/salt/etcd/etcd.manifest:{% set storage_backend = pillar.get('storage_backend', 'etcd2') -%}
cluster/saltbase/salt/kube-admission-controls/init.sls:{% if 'LimitRanger' in pillar.get('admission_control', '') %}
cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest:{% set enable_garbage_collector = pillar['enable_garbage_collector'] -%}
cluster/saltbase/salt/kube-apiserver/kube-apiserver.manifest:{% set params = address + " " + storage_backend + " " + etcd_servers + " " + etcd_servers_overrides + " " + cloud_provider + " " + cloud_config + " " + runtime_config + " " + admission_control + " " + target_ram_mb + " " + service_cluster_ip_range + " " + client_ca_file + basic_auth_file + " " + min_request_timeout + " " + enable_garbage_collector -%}
cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest:{% set enable_garbage_collector = pillar['enable_garbage_collector'] -%}
cluster/saltbase/salt/kube-controller-manager/kube-controller-manager.manifest:{% set params = "--master=127.0.0.1:8080" + " " + cluster_name + " " + cluster_cidr + " " + allocate_node_cidrs + " " + service_cluster_ip_range + " " + terminated_pod_gc + " " + enable_garbage_collector + " " + cloud_provider + " " + cloud_config + " " + service_account_key + " " + log_level + " " + root_ca_file -%}
cluster/saltbase/salt/kube-proxy/kube-proxy.manifest: {% set api_servers_with_port = api_servers + ":6443" -%}
cluster/saltbase/salt/kube-proxy/kube-proxy.manifest: {% set api_servers_with_port = api_servers -%}
@ -80,8 +82,9 @@ hack/local-up-cluster.sh: advertise_address="--advertise_address=${API_HO
hack/local-up-cluster.sh: runtime_config="--runtime-config=${RUNTIME_CONFIG}"
hack/local-up-cluster.sh: advertise_address=""
hack/local-up-cluster.sh: runtime_config=""
hack/test-update-storage-objects.sh: local storage_media_type=${2:-""}
hack/test-update-storage-objects.sh: local storage_versions=${1:-""}
hack/test-update-storage-objects.sh: local storage_backend=${1:-"${STORAGE_BACKEND_ETCD2}"}
hack/test-update-storage-objects.sh: local storage_media_type=${3:-""}
hack/test-update-storage-objects.sh: local storage_versions=${2:-""}
hack/test-update-storage-objects.sh: source_file=${test_data[0]}
hack/test-update-storage-objects.sh:# source_file,resource,namespace,name,old_version,new_version
pkg/kubelet/api/v1alpha1/runtime/api.pb.go: ContainerPort *int32 `protobuf:"varint,3,opt,name=container_port,json=containerPort" json:"container_port,omitempty"`
@ -98,14 +101,13 @@ pkg/util/oom/oom_linux.go:// Writes 'value' to /proc/<pid>/oom_score_adj for all
pkg/util/oom/oom_linux.go:// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
test/e2e/common/configmap.go: Command: []string{"/mt", "--break_on_expected_content=false", "--retry_time=120", "--file_content_in_loop=/etc/configmap-volume/data-1"},
test/e2e/common/downwardapi_volume.go: Command: []string{"/mt", "--break_on_expected_content=false", "--retry_time=120", "--file_content_in_loop=" + filePath},
test/e2e/es_cluster_logging.go: framework.Failf("No cluster_name field in Elasticsearch response: %v", esResponse)
test/e2e/es_cluster_logging.go: // Check to see if have a cluster_name field.
test/e2e/es_cluster_logging.go: clusterName, ok := esResponse["cluster_name"]
test/e2e/common/host_path.go: fmt.Sprintf("--file_content_in_loop=%v", filePath),
test/e2e/common/host_path.go: fmt.Sprintf("--file_content_in_loop=%v", filePathInReader),
test/e2e/common/host_path.go: fmt.Sprintf("--retry_time=%d", retryDuration),
test/e2e/common/host_path.go: fmt.Sprintf("--retry_time=%d", retryDuration),
test/e2e_node/configmap_test.go: Command: []string{"/mt", "--break_on_expected_content=false", "--retry_time=120", "--file_content_in_loop=/etc/configmap-volume/data-1"},
test/e2e/es_cluster_logging.go: framework.Failf("No cluster_name field in Elasticsearch response: %v", esResponse)
test/e2e/es_cluster_logging.go: // Check to see if have a cluster_name field.
test/e2e/es_cluster_logging.go: clusterName, ok := esResponse["cluster_name"]
test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)")
test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop")
test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from")