From 3437ac691a95ccca7fd92dab59fd512c6aa7f682 Mon Sep 17 00:00:00 2001 From: Max Forbes Date: Thu, 7 May 2015 17:41:22 -0700 Subject: [PATCH] Rolling node upgrade --- cluster/gce/coreos/helper.sh | 9 ++++- cluster/gce/debian/helper.sh | 9 ++++- cluster/gce/upgrade.sh | 65 ++++++++++++++++++++++++++++++++---- cluster/gce/util.sh | 15 +++++++-- 4 files changed, 87 insertions(+), 11 deletions(-) diff --git a/cluster/gce/coreos/helper.sh b/cluster/gce/coreos/helper.sh index a5c63c5ef2..8bf190b44b 100644 --- a/cluster/gce/coreos/helper.sh +++ b/cluster/gce/coreos/helper.sh @@ -129,8 +129,15 @@ function create-master-instance { # TODO(dawnchen): Check $CONTAINER_RUNTIME to decide which # cloud_config yaml file should be passed +# TODO(mbforbes): Make $1 required. +# TODO(mbforbes): Document required vars (for this and call chain). +# $1 version function create-node-instance-template { - create-node-template "${NODE_INSTANCE_PREFIX}-template" "${scope_flags[*]}" \ + local suffix="" + if [[ -n ${1:-} ]]; then + suffix="-${1}" + fi + create-node-template "${NODE_INSTANCE_PREFIX}-template${suffix}" "${scope_flags[*]}" \ "kube-env=${KUBE_TEMP}/node-kube-env.yaml" \ "user-data=${KUBE_ROOT}/cluster/gce/coreos/node.yaml" } diff --git a/cluster/gce/debian/helper.sh b/cluster/gce/debian/helper.sh index d3a61d2fb3..6ae48475d0 100644 --- a/cluster/gce/debian/helper.sh +++ b/cluster/gce/debian/helper.sh @@ -107,8 +107,15 @@ function create-master-instance { --disk name="${MASTER_NAME}-pd" device-name=master-pd mode=rw boot=no auto-delete=no } +# TODO(mbforbes): Make $1 required. +# TODO(mbforbes): Document required vars (for this and call chain). +# $1 version function create-node-instance-template { - create-node-template "${NODE_INSTANCE_PREFIX}-template" "${scope_flags[*]}" \ + local suffix="" + if [[ -n ${1:-} ]]; then + suffix="-${1}" + fi + create-node-template "${NODE_INSTANCE_PREFIX}-template${suffix}" "${scope_flags[*]}" \ "startup-script=${KUBE_ROOT}/cluster/gce/configure-vm.sh" \ "kube-env=${KUBE_TEMP}/node-kube-env.yaml" } diff --git a/cluster/gce/upgrade.sh b/cluster/gce/upgrade.sh index 5b6fbdbc4e..ad8f464f4d 100755 --- a/cluster/gce/upgrade.sh +++ b/cluster/gce/upgrade.sh @@ -66,9 +66,10 @@ function usage() { function upgrade-master() { echo "== Upgrading master to '${SERVER_BINARY_TAR_URL}'. Do not interrupt, deleting master instance. ==" - detect-master get-password - set-master-htpasswd + get-bearer-token + + detect-master # Delete the master instance. Note that the master-pd is created # with auto-delete=no, so it should not be deleted. @@ -111,13 +112,58 @@ function prepare-upgrade() { fi } +# Reads kube-env metadata from master and extracts value from provided key. +# +# Assumed vars: +# MASTER_NAME +# ZONE +# +# Args: +# $1 env key to use +function get-env-val() { + # TODO(mbforbes): Make this more reliable with retries. + gcloud compute ssh --zone ${ZONE} ${MASTER_NAME} --command \ + "curl --fail --silent -H 'Metadata-Flavor: Google' \ + 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null \ + | grep ${1} | cut -d : -f 2 | cut -d \' -f 2 +} + +# $1 veresion function upgrade-nodes() { - echo "== Upgrading nodes to ${SERVER_BINARY_TAR_URL}. ==" + local version=${1} + local sanitized_version=$(echo ${version} | sed s/"\."/-/g) + echo "== Upgrading nodes to ${version}. ==" detect-minion-names - get-password - set-master-htpasswd - kube-update-nodes upgrade + + # TODO(mbforbes): Refactor setting scope flags. + local -a scope_flags=() + if (( "${#MINION_SCOPES[@]}" > 0 )); then + scope_flags=("--scopes" "${MINION_SCOPES[@]}") + else + scope_flags=("--no-scopes") + fi + + # Get required node tokens. + KUBELET_TOKEN=$(get-env-val "KUBELET_TOKEN") + KUBE_PROXY_TOKEN=$(get-env-val "KUBE_PROXY_TOKEN") + + # TODO(mbforbes): How do we ensure kube-env is written in a ${version}- + # compatible way? + write-node-env + # TODO(mbforbes): Get configure-vm script from ${version}. (Must plumb this + # through all create-node-instance-template implementations). + create-node-instance-template ${sanitized_version} + + # Do the actual upgrade. + gcloud preview rolling-updates start \ + --group "${NODE_INSTANCE_PREFIX}-group" \ + --max-num-concurrent-instances 1 \ + --max-num-failed-instances 0 \ + --project "${PROJECT}" \ + --zone "${ZONE}" \ + --template "${NODE_INSTANCE_PREFIX}-template-${sanitized_version}" + echo "== Done ==" } @@ -192,7 +238,12 @@ if [[ "${master_upgrade}" == "true" ]]; then fi if [[ "${node_upgrade}" == "true" ]]; then - upgrade-nodes + if [[ "${local_binaries}" == "true" ]]; then + echo "Upgrading nodes to local binaries is not yet supported." >&2 + else + upgrade-nodes ${binary_version} + fi fi +echo "== Validating cluster post-upgrade ==" "${KUBE_ROOT}/cluster/validate-cluster.sh" diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 3d3b513e19..2ed70e2328 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -383,6 +383,16 @@ function create-firewall-rule { # $4: The kube-env metadata. function create-node-template { detect-project + + # First, ensure the template doesn't exist. + # TODO(mbforbes): To make this really robust, we need to parse the output and + # add retries. Just relying on a non-zero exit code doesn't + # distinguish an ephemeral failed call from a "not-exists". + if gcloud compute instance-templates describe "$1"; then + echo "Instance template ${1} already exists; continuing." >&2 + return + fi + local attempt=0 while true; do if ! gcloud compute instance-templates create "$1" \ @@ -398,10 +408,10 @@ function create-node-template { --can-ip-forward \ --metadata-from-file "$3" "$4"; then if (( attempt > 5 )); then - echo -e "${color_red}Failed to create instance template $1 ${color_norm}" + echo -e "${color_red}Failed to create instance template $1 ${color_norm}" >&2 exit 2 fi - echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create instance template $1. Retrying.${color_norm}" + echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create instance template $1. Retrying.${color_norm}" >&2 attempt=$(($attempt+1)) else break @@ -624,6 +634,7 @@ function kube-up { echo "Creating minions." + # TODO(mbforbes): Refactor setting scope flags. local -a scope_flags=() if (( "${#MINION_SCOPES[@]}" > 0 )); then scope_flags=("--scopes" "${MINION_SCOPES[@]}")