k3s/cluster/gce/util.sh

924 lines
32 KiB
Bash
Raw Normal View History

#!/bin/bash
# Copyright 2014 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# A library of helper functions and constant for the local config.
# Use the config file specified in $KUBE_CONFIG_FILE, or default to
# config-default.sh.
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
source "${KUBE_ROOT}/cluster/gce/${KUBE_CONFIG_FILE-"config-default.sh"}"
NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
# Verify prereqs
function verify-prereqs {
local cmd
2014-11-25 18:32:27 +00:00
for cmd in gcloud gsutil; do
which "${cmd}" >/dev/null || {
echo "Can't find ${cmd} in PATH, please fix and retry. The Google Cloud "
echo "SDK can be downloaded from https://cloud.google.com/sdk/."
exit 1
}
done
}
# Create a temp dir that'll be deleted at the end of this bash session.
#
# Vars set:
# KUBE_TEMP
function ensure-temp-dir {
if [[ -z ${KUBE_TEMP-} ]]; then
KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX)
trap 'rm -rf "${KUBE_TEMP}"' EXIT
fi
}
# Verify and find the various tar files that we are going to use on the server.
#
# Vars set:
# SERVER_BINARY_TAR
# SALT_TAR
function find-release-tars {
SERVER_BINARY_TAR="${KUBE_ROOT}/server/kubernetes-server-linux-amd64.tar.gz"
if [[ ! -f "$SERVER_BINARY_TAR" ]]; then
SERVER_BINARY_TAR="${KUBE_ROOT}/_output/release-tars/kubernetes-server-linux-amd64.tar.gz"
fi
if [[ ! -f "$SERVER_BINARY_TAR" ]]; then
echo "!!! Cannot find kubernetes-server-linux-amd64.tar.gz"
exit 1
fi
SALT_TAR="${KUBE_ROOT}/server/kubernetes-salt.tar.gz"
if [[ ! -f "$SALT_TAR" ]]; then
SALT_TAR="${KUBE_ROOT}/_output/release-tars/kubernetes-salt.tar.gz"
fi
if [[ ! -f "$SALT_TAR" ]]; then
echo "!!! Cannot find kubernetes-salt.tar.gz"
exit 1
fi
}
# Use the gcloud defaults to find the project. If it is already set in the
# environment then go with that.
#
# Vars set:
# PROJECT
2015-01-15 19:21:42 +00:00
# PROJECT_REPORTED
function detect-project () {
if [[ -z "${PROJECT-}" ]]; then
PROJECT=$(gcloud config list project | tail -n 1 | cut -f 3 -d ' ')
fi
if [[ -z "${PROJECT-}" ]]; then
echo "Could not detect Google Cloud Platform project. Set the default project using " >&2
echo "'gcloud config set project <PROJECT>'" >&2
exit 1
fi
2015-01-15 19:21:42 +00:00
if [[ -z "${PROJECT_REPORTED-}" ]]; then
echo "Project: ${PROJECT}" >&2
echo "Zone: ${ZONE}" >&2
PROJECT_REPORTED=true
fi
}
# Take the local tar files and upload them to Google Storage. They will then be
# downloaded by the master as part of the start up script for the master.
#
# Assumed vars:
# PROJECT
# SERVER_BINARY_TAR
# SALT_TAR
# Vars set:
# SERVER_BINARY_TAR_URL
# SALT_TAR_URL
function upload-server-tars() {
SERVER_BINARY_TAR_URL=
SALT_TAR_URL=
local project_hash
if which md5 > /dev/null 2>&1; then
project_hash=$(md5 -q -s "$PROJECT")
else
project_hash=$(echo -n "$PROJECT" | md5sum | awk '{ print $1 }')
fi
project_hash=${project_hash:0:5}
local -r staging_bucket="gs://kubernetes-staging-${project_hash}"
# Ensure the bucket is created
if ! gsutil ls "$staging_bucket" > /dev/null 2>&1 ; then
echo "Creating $staging_bucket"
gsutil mb "${staging_bucket}"
fi
local -r staging_path="${staging_bucket}/devel"
echo "+++ Staging server tars to Google Storage: ${staging_path}"
local server_binary_gs_url="${staging_path}/${SERVER_BINARY_TAR##*/}"
gsutil -q -h "Cache-Control:private, max-age=0" cp "${SERVER_BINARY_TAR}" "${server_binary_gs_url}"
gsutil acl ch -g all:R "${server_binary_gs_url}" >/dev/null 2>&1
local salt_gs_url="${staging_path}/${SALT_TAR##*/}"
gsutil -q -h "Cache-Control:private, max-age=0" cp "${SALT_TAR}" "${salt_gs_url}"
gsutil acl ch -g all:R "${salt_gs_url}" >/dev/null 2>&1
# Convert from gs:// URL to an https:// URL
SERVER_BINARY_TAR_URL="${server_binary_gs_url/gs:\/\//https://storage.googleapis.com/}"
SALT_TAR_URL="${salt_gs_url/gs:\/\//https://storage.googleapis.com/}"
}
# Detect minions created in the minion group
#
# Assumed vars:
# NODE_INSTANCE_PREFIX
# Vars set:
# MINION_NAMES
function detect-minion-names {
detect-project
MINION_NAMES=($(gcloud preview --project "${PROJECT}" instance-groups \
--zone "${ZONE}" instances --group "${NODE_INSTANCE_PREFIX}-group" list \
| cut -d'/' -f11))
echo "MINION_NAMES=${MINION_NAMES[*]}"
}
# Waits until the number of running nodes in the instance group is equal to NUM_NODES
#
# Assumed vars:
# NODE_INSTANCE_PREFIX
# NUM_MINIONS
function wait-for-minions-to-run {
detect-project
local running_minions=0
while [[ "${NUM_MINIONS}" != "${running_minions}" ]]; do
echo -e -n "${color_yellow}Waiting for minions to run. "
echo -e "${running_minions} out of ${NUM_MINIONS} running. Retrying.${color_norm}"
sleep 5
running_minions=$(gcloud preview --project "${PROJECT}" instance-groups \
--zone "${ZONE}" instances --group "${NODE_INSTANCE_PREFIX}-group" list \
--running | wc -l | xargs)
done
}
# Detect the information about the minions
#
# Assumed vars:
# ZONE
# Vars set:
# MINION_NAMES
2014-12-09 23:07:54 +00:00
# KUBE_MINION_IP_ADDRESSES (array)
function detect-minions () {
2014-12-09 23:07:54 +00:00
detect-project
detect-minion-names
KUBE_MINION_IP_ADDRESSES=()
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
local minion_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
2014-11-25 18:32:27 +00:00
"${MINION_NAMES[$i]}" --fields networkInterfaces[0].accessConfigs[0].natIP \
--format=text | awk '{ print $2 }')
if [[ -z "${minion_ip-}" ]] ; then
echo "Did not find ${MINION_NAMES[$i]}" >&2
else
echo "Found ${MINION_NAMES[$i]} at ${minion_ip}"
KUBE_MINION_IP_ADDRESSES+=("${minion_ip}")
fi
done
if [[ -z "${KUBE_MINION_IP_ADDRESSES-}" ]]; then
echo "Could not detect Kubernetes minion nodes. Make sure you've launched a cluster with 'kube-up.sh'" >&2
exit 1
fi
}
# Detect the IP for the master
#
# Assumed vars:
# MASTER_NAME
# ZONE
# Vars set:
# KUBE_MASTER
# KUBE_MASTER_IP
function detect-master () {
2014-12-09 23:07:54 +00:00
detect-project
KUBE_MASTER=${MASTER_NAME}
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
KUBE_MASTER_IP=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
2014-11-25 18:32:27 +00:00
"${MASTER_NAME}" --fields networkInterfaces[0].accessConfigs[0].natIP \
--format=text | awk '{ print $2 }')
fi
if [[ -z "${KUBE_MASTER_IP-}" ]]; then
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" >&2
exit 1
fi
echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP)"
}
# Ensure that we have a password created for validating to the master. Will
2015-02-04 03:38:24 +00:00
# read from the kubernetes auth-file for the current context if available.
#
# Assumed vars
# KUBE_ROOT
#
# Vars set:
# KUBE_USER
# KUBE_PASSWORD
function get-password {
# go template to extract the auth-path of the current-context user
2015-02-12 01:44:28 +00:00
# Note: we save dot ('.') to $dot because the 'with' action overrides dot
local template='{{$dot := .}}{{with $ctx := index $dot "current-context"}}{{$user := index $dot "contexts" $ctx "user"}}{{index $dot "users" $user "auth-path"}}{{end}}'
local file=$("${KUBE_ROOT}/cluster/kubectl.sh" config view -o template --template="${template}")
if [[ ! -z "$file" && -r "$file" ]]; then
KUBE_USER=$(cat "$file" | python -c 'import json,sys;print json.load(sys.stdin)["User"]')
KUBE_PASSWORD=$(cat "$file" | python -c 'import json,sys;print json.load(sys.stdin)["Password"]')
return
fi
KUBE_USER=admin
KUBE_PASSWORD=$(python -c 'import string,random; print "".join(random.SystemRandom().choice(string.ascii_letters + string.digits) for _ in range(16))')
}
# Generate authentication token for admin user. Will
# read from $HOME/.kubernetes_auth if available.
#
# Vars set:
# KUBE_ADMIN_TOKEN
function get-admin-token {
local file="$HOME/.kubernetes_auth"
if [[ -r "$file" ]]; then
KUBE_ADMIN_TOKEN=$(cat "$file" | python -c 'import json,sys;print json.load(sys.stdin)["BearerToken"]')
return
fi
KUBE_ADMIN_TOKEN=$(python -c 'import string,random; print "".join(random.SystemRandom().choice(string.ascii_letters + string.digits) for _ in range(32))')
}
# Wait for background jobs to finish. Exit with
# an error status if any of the jobs failed.
function wait-for-jobs {
local fail=0
local job
for job in $(jobs -p); do
wait "${job}" || fail=$((fail + 1))
done
if (( fail != 0 )); then
echo -e "${color_red}${fail} commands failed. Exiting.${color_norm}" >&2
# Ignore failures for now.
# exit 2
fi
}
# Robustly try to create a firewall rule.
# $1: The name of firewall rule.
# $2: IP ranges.
# $3: Target tags for this firewall rule.
function create-firewall-rule {
2014-12-09 23:07:54 +00:00
detect-project
local attempt=0
while true; do
if ! gcloud compute firewall-rules create "$1" \
--project "${PROJECT}" \
--network "${NETWORK}" \
--source-ranges "$2" \
--target-tags "$3" \
2014-12-09 23:07:54 +00:00
--allow tcp udp icmp esp ah sctp; then
if (( attempt > 5 )); then
echo -e "${color_red}Failed to create firewall rule $1 ${color_norm}"
exit 2
fi
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create firewall rule $1. Retrying.${color_norm}"
attempt=$(($attempt+1))
else
break
fi
done
}
# Robustly try to create a route.
# $1: The name of the route.
# $2: IP range.
function create-route {
2014-12-09 23:07:54 +00:00
detect-project
local attempt=0
while true; do
if ! gcloud compute routes create "$1" \
--project "${PROJECT}" \
--destination-range "$2" \
--network "${NETWORK}" \
--next-hop-instance "$1" \
--next-hop-instance-zone "${ZONE}"; then
if (( attempt > 5 )); then
echo -e "${color_red}Failed to create route $1 ${color_norm}"
exit 2
fi
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create route $1. Retrying.${color_norm}"
attempt=$(($attempt+1))
else
break
fi
done
}
# Robustly try to create an instance template.
# $1: The name of the instance template.
# $2: The scopes flag.
# $3: The minion start script metadata from file.
function create-node-template {
2014-12-09 23:07:54 +00:00
detect-project
local attempt=0
while true; do
if ! gcloud compute instance-templates create "$1" \
--project "${PROJECT}" \
--machine-type "${MINION_SIZE}" \
--boot-disk-type "${MINION_DISK_TYPE}" \
--boot-disk-size "${MINION_DISK_SIZE}" \
--image-project="${IMAGE_PROJECT}" \
--image "${IMAGE}" \
--tags "${MINION_TAG}" \
--network "${NETWORK}" \
$2 \
--can-ip-forward \
--metadata-from-file "$3"; then
if (( attempt > 5 )); then
echo -e "${color_red}Failed to create instance template $1 ${color_norm}"
exit 2
fi
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create instance template $1. Retrying.${color_norm}"
attempt=$(($attempt+1))
else
break
fi
done
}
# Robustly try to add metadata on an instance.
# $1: The name of the instace.
# $2: The metadata key=value pair to add.
function add-instance-metadata {
detect-project
local attempt=0
while true; do
if ! gcloud compute instances add-metadata "$1" \
--project "${PROJECT}" \
--zone "${ZONE}" \
--metadata "$2"; then
if (( attempt > 5 )); then
echo -e "${color_red}Failed to add instance metadata in $1 ${color_norm}"
exit 2
fi
echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in $1. Retrying.${color_norm}"
attempt=$(($attempt+1))
else
break
fi
done
}
# Instantiate a kubernetes cluster
#
# Assumed vars
# KUBE_ROOT
# <Various vars set in config file>
function kube-up {
detect-project
# Make sure we have the tar files staged on Google Storage
find-release-tars
upload-server-tars
2014-09-24 17:55:58 +00:00
ensure-temp-dir
get-password
python "${KUBE_ROOT}/third_party/htpasswd/htpasswd.py" \
-b -c "${KUBE_TEMP}/htpasswd" "$KUBE_USER" "$KUBE_PASSWORD"
local htpasswd
htpasswd=$(cat "${KUBE_TEMP}/htpasswd")
if ! gcloud compute networks --project "${PROJECT}" describe "${NETWORK}" &>/dev/null; then
2014-11-25 18:32:27 +00:00
echo "Creating new network: ${NETWORK}"
2014-09-24 17:55:58 +00:00
# The network needs to be created synchronously or we have a race. The
# firewalls can be added concurrent with instance creation.
gcloud compute networks create --project "${PROJECT}" "${NETWORK}" --range "10.240.0.0/16"
fi
if ! gcloud compute firewall-rules --project "${PROJECT}" describe "${NETWORK}-default-internal" &>/dev/null; then
2014-11-25 18:32:27 +00:00
gcloud compute firewall-rules create "${NETWORK}-default-internal" \
--project "${PROJECT}" \
--network "${NETWORK}" \
2014-11-25 18:32:27 +00:00
--source-ranges "10.0.0.0/8" \
--allow "tcp:1-65535" "udp:1-65535" "icmp" &
fi
if ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NETWORK}-default-ssh" &>/dev/null; then
2014-11-25 18:32:27 +00:00
gcloud compute firewall-rules create "${NETWORK}-default-ssh" \
--project "${PROJECT}" \
--network "${NETWORK}" \
2014-11-25 18:32:27 +00:00
--source-ranges "0.0.0.0/0" \
--allow "tcp:22" &
fi
echo "Starting VMs and configuring firewalls"
2014-11-25 18:32:27 +00:00
gcloud compute firewall-rules create "${MASTER_NAME}-https" \
--project "${PROJECT}" \
--network "${NETWORK}" \
2014-11-25 18:32:27 +00:00
--target-tags "${MASTER_TAG}" \
--allow tcp:443 &
2014-09-24 17:55:58 +00:00
(
echo "#! /bin/bash"
echo "mkdir -p /var/cache/kubernetes-install"
echo "cd /var/cache/kubernetes-install"
echo "readonly MASTER_NAME='${MASTER_NAME}'"
echo "readonly NODE_INSTANCE_PREFIX='${NODE_INSTANCE_PREFIX}'"
echo "readonly SERVER_BINARY_TAR_URL='${SERVER_BINARY_TAR_URL}'"
echo "readonly SALT_TAR_URL='${SALT_TAR_URL}'"
echo "readonly MASTER_HTPASSWD='${htpasswd}'"
2014-09-18 23:03:34 +00:00
echo "readonly PORTAL_NET='${PORTAL_NET}'"
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
echo "readonly ENABLE_CLUSTER_MONITORING='${ENABLE_CLUSTER_MONITORING:-false}'"
echo "readonly ENABLE_NODE_MONITORING='${ENABLE_NODE_MONITORING:-false}'"
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
echo "readonly ENABLE_CLUSTER_LOGGING='${ENABLE_CLUSTER_LOGGING:-false}'"
2014-11-14 07:07:43 +00:00
echo "readonly ENABLE_NODE_LOGGING='${ENABLE_NODE_LOGGING:-false}'"
echo "readonly LOGGING_DESTINATION='${LOGGING_DESTINATION:-}'"
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
echo "readonly ELASTICSEARCH_LOGGING_REPLICAS='${ELASTICSEARCH_LOGGING_REPLICAS:-}'"
2014-11-07 04:49:21 +00:00
echo "readonly ENABLE_CLUSTER_DNS='${ENABLE_CLUSTER_DNS:-false}'"
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
echo "readonly DNS_REPLICAS='${DNS_REPLICAS:-}'"
2014-11-07 04:49:21 +00:00
echo "readonly DNS_SERVER_IP='${DNS_SERVER_IP:-}'"
echo "readonly DNS_DOMAIN='${DNS_DOMAIN:-}'"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/common.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/format-and-mount-pd.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/create-dynamic-salt-files.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/download-release.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/salt-master.sh"
) > "${KUBE_TEMP}/master-start.sh"
2014-09-24 17:55:58 +00:00
# Report logging choice (if any).
2014-11-14 07:07:43 +00:00
if [[ "${ENABLE_NODE_LOGGING-}" == "true" ]]; then
echo "+++ Logging using Fluentd to ${LOGGING_DESTINATION:-unknown}"
# For logging to GCP we need to enable some minion scopes.
if [[ "${LOGGING_DESTINATION-}" == "gcp" ]]; then
MINION_SCOPES+=('https://www.googleapis.com/auth/logging.write')
2014-11-14 07:07:43 +00:00
fi
fi
# We have to make sure the disk is created before creating the master VM, so
# run this in the foreground.
gcloud compute disks create "${MASTER_NAME}-pd" \
--project "${PROJECT}" \
--zone "${ZONE}" \
--size "10GB"
2014-11-25 18:32:27 +00:00
gcloud compute instances create "${MASTER_NAME}" \
--project "${PROJECT}" \
--zone "${ZONE}" \
2014-11-25 18:32:27 +00:00
--machine-type "${MASTER_SIZE}" \
--image-project="${IMAGE_PROJECT}" \
--image "${IMAGE}" \
--tags "${MASTER_TAG}" \
--network "${NETWORK}" \
2014-11-25 18:32:27 +00:00
--scopes "storage-ro" "compute-rw" \
--metadata-from-file "startup-script=${KUBE_TEMP}/master-start.sh" \
--disk name="${MASTER_NAME}-pd" device-name=master-pd mode=rw boot=no auto-delete=no &
# Create a single firewall rule for all minions.
create-firewall-rule "${MINION_TAG}-all" "${CLUSTER_IP_RANGE}" "${MINION_TAG}" &
# Wait for last batch of jobs.
wait-for-jobs
local -a scope_flags=()
if (( "${#MINION_SCOPES[@]}" > 0 )); then
scope_flags=("--scopes" "${MINION_SCOPES[@]}")
else
scope_flags=("--no-scopes")
fi
(
echo "#! /bin/bash"
echo "ZONE='${ZONE}'"
2014-09-24 17:55:58 +00:00
echo "MASTER_NAME='${MASTER_NAME}'"
echo "until MINION_IP_RANGE=\$(curl --fail --silent -H 'Metadata-Flavor: Google'\\"
echo " http://metadata/computeMetadata/v1/instance/attributes/node-ip-range); do"
echo " echo 'Waiting for metadata MINION_IP_RANGE...'"
echo " sleep 3"
echo "done"
echo "EXTRA_DOCKER_OPTS='${EXTRA_DOCKER_OPTS}'"
echo "ENABLE_DOCKER_REGISTRY_CACHE='${ENABLE_DOCKER_REGISTRY_CACHE:-false}'"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/common.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/salt-minion.sh"
) > "${KUBE_TEMP}/minion-start.sh"
create-node-template "${NODE_INSTANCE_PREFIX}-template" "${scope_flags[*]}" \
"startup-script=${KUBE_TEMP}/minion-start.sh"
gcloud preview managed-instance-groups --zone "${ZONE}" \
create "${NODE_INSTANCE_PREFIX}-group" \
--project "${PROJECT}" \
--base-instance-name "${NODE_INSTANCE_PREFIX}" \
--size "${NUM_MINIONS}" \
--template "${NODE_INSTANCE_PREFIX}-template" || true;
# TODO: this should be true when the above create managed-instance-group
# command returns, but currently it returns before the instances come up due
# to gcloud's deficiency.
wait-for-minions-to-run
detect-minion-names
# Create the routes and set IP ranges to instance metadata, 5 instances at a time.
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
create-route "${MINION_NAMES[$i]}" "${MINION_IP_RANGES[$i]}" &
add-instance-metadata "${MINION_NAMES[$i]}" "node-ip-range=${MINION_IP_RANGES[$i]}" &
if [ $i -ne 0 ] && [ $((i%5)) -eq 0 ]; then
echo Waiting for a batch of routes at $i...
wait-for-jobs
2014-11-25 18:32:27 +00:00
fi
done
# Wait for last batch of jobs.
wait-for-jobs
2014-11-25 18:32:27 +00:00
detect-master
echo "Waiting for cluster initialization."
echo
echo " This will continually check to see if the API for kubernetes is reachable."
echo " This might loop forever if there was some uncaught error during start"
echo " up."
echo
until curl --insecure --user "${KUBE_USER}:${KUBE_PASSWORD}" --max-time 5 \
--fail --output /dev/null --silent "https://${KUBE_MASTER_IP}/api/v1beta1/pods"; do
printf "."
sleep 2
done
echo "Kubernetes cluster created."
2014-08-06 16:57:00 +00:00
local kube_cert="kubecfg.crt"
local kube_key="kubecfg.key"
local ca_cert="kubernetes.ca.crt"
# TODO use token instead of kube_auth
local kube_auth="kubernetes_auth"
local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
local context="${PROJECT}_${INSTANCE_PREFIX}"
local user="${context}-admin"
local config_dir="${HOME}/.kube/${context}"
2014-09-23 21:14:34 +00:00
# TODO: generate ADMIN (and KUBELET) tokens and put those in the master's
# config file. Distribute the same way the htpasswd is done.
(
mkdir -p "${config_dir}"
umask 077
gcloud compute ssh --project "${PROJECT}" --zone "$ZONE" "${MASTER_NAME}" --command "sudo cat /srv/kubernetes/kubecfg.crt" >"${config_dir}/${kube_cert}" 2>/dev/null
gcloud compute ssh --project "${PROJECT}" --zone "$ZONE" "${MASTER_NAME}" --command "sudo cat /srv/kubernetes/kubecfg.key" >"${config_dir}/${kube_key}" 2>/dev/null
gcloud compute ssh --project "${PROJECT}" --zone "$ZONE" "${MASTER_NAME}" --command "sudo cat /srv/kubernetes/ca.crt" >"${config_dir}/${ca_cert}" 2>/dev/null
"${kubectl}" config set-cluster "${context}" --server="https://${KUBE_MASTER_IP}" --certificate-authority="${config_dir}/${ca_cert}" --global
"${kubectl}" config set-credentials "${user}" --auth-path="${config_dir}/${kube_auth}" --global
"${kubectl}" config set-context "${context}" --cluster="${context}" --user="${user}" --global
"${kubectl}" config use-context "${context}" --global
cat << EOF > "${config_dir}/${kube_auth}"
2014-09-23 21:14:34 +00:00
{
"User": "$KUBE_USER",
"Password": "$KUBE_PASSWORD",
"CAFile": "${config_dir}/${ca_cert}",
"CertFile": "${config_dir}/${kube_cert}",
"KeyFile": "${config_dir}/${kube_key}"
2014-09-23 21:14:34 +00:00
}
2014-09-24 16:39:42 +00:00
EOF
2014-09-24 16:39:42 +00:00
chmod 0600 "${config_dir}/${kube_auth}" "${config_dir}/$kube_cert" \
"${config_dir}/${kube_key}" "${config_dir}/${ca_cert}"
echo "Wrote ${config_dir}/${kube_auth}"
)
echo "Sanity checking cluster..."
# Basic sanity checking
local i
local rc # Capture return code without exiting because of errexit bash option
for (( i=0; i<${#MINION_NAMES[@]}; i++)); do
# Make sure docker is installed and working.
local attempt=0
while true; do
echo -n Attempt "$(($attempt+1))" to check Docker on node "${MINION_NAMES[$i]}" ...
local output=$(gcloud compute --project "${PROJECT}" ssh --zone "$ZONE" "${MINION_NAMES[$i]}" --command "sudo docker ps -a" 2>/dev/null)
if [[ -z "${output}" ]]; then
if (( attempt > 9 )); then
echo
echo -e "${color_red}Docker failed to install on node ${MINION_NAMES[$i]}. Your cluster is unlikely" >&2
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
echo -e "cluster. (sorry!)${color_norm}" >&2
exit 1
fi
elif [[ "${output}" != *"kubernetes/pause"* ]]; then
if (( attempt > 9 )); then
echo
echo -e "${color_red}Failed to observe kubernetes/pause on node ${MINION_NAMES[$i]}. Your cluster is unlikely" >&2
echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
echo -e "cluster. (sorry!)${color_norm}" >&2
exit 1
fi
else
echo -e " ${color_green}[working]${color_norm}"
break
fi
echo -e " ${color_yellow}[not working yet]${color_norm}"
# Start Docker, in case it failed to start.
gcloud compute --project "${PROJECT}" ssh --zone "$ZONE" "${MINION_NAMES[$i]}" \
--command "sudo service docker start" 2>/dev/null || true
attempt=$(($attempt+1))
sleep 30
done
done
echo
echo -e "${color_green}Kubernetes cluster is running. The master is running at:"
echo
echo -e "${color_yellow} https://${KUBE_MASTER_IP}"
echo
echo -e "${color_green}The user name and password to use is located in ${config_dir}/${kube_auth}.${color_norm}"
echo
}
2014-12-09 23:07:54 +00:00
# Delete a kubernetes cluster. This is called from test-teardown.
#
# Assumed vars:
# MASTER_NAME
# NODE_INSTANCE_PREFIX
# ZONE
# This function tears down cluster resources 10 at a time to avoid issuing too many
# API calls and exceeding API quota. It is important to bring down the instances before bringing
# down the firewall rules and routes.
function kube-down {
detect-project
echo "Bringing down cluster"
gcloud preview managed-instance-groups --zone "${ZONE}" delete \
--project "${PROJECT}" \
--quiet \
"${NODE_INSTANCE_PREFIX}-group" || true
gcloud compute instance-templates delete \
--project "${PROJECT}" \
--quiet \
"${NODE_INSTANCE_PREFIX}-template" || true
# First delete the master (if it exists).
gcloud compute instances delete \
--project "${PROJECT}" \
2014-11-25 18:32:27 +00:00
--quiet \
--delete-disks all \
--zone "${ZONE}" \
"${MASTER_NAME}" || true
# Find out what minions are running.
local -a minions
minions=( $(gcloud compute instances list \
--project "${PROJECT}" --zone "${ZONE}" \
--regexp "${NODE_INSTANCE_PREFIX}-.+" \
| awk 'NR >= 2 { print $1 }') )
# If any minions are running, delete them in batches.
while (( "${#minions[@]}" > 0 )); do
echo Deleting nodes "${minions[*]::10}"
gcloud compute instances delete \
2014-11-25 18:32:27 +00:00
--project "${PROJECT}" \
--quiet \
--delete-disks boot \
--zone "${ZONE}" \
"${minions[@]::10}" || true
minions=( "${minions[@]:10}" )
done
# Delete firewall rule for the master.
gcloud compute firewall-rules delete \
--project "${PROJECT}" \
--quiet \
"${MASTER_NAME}-https" || true
# Delete firewall rule for minions.
gcloud compute firewall-rules delete \
--project "${PROJECT}" \
--quiet \
"${MINION_TAG}-all" || true
# Delete routes.
local -a routes
routes=( $(gcloud compute routes list --project "${PROJECT}" \
--regexp "${NODE_INSTANCE_PREFIX}-.+" | awk 'NR >= 2 { print $1 }') )
while (( "${#routes[@]}" > 0 )); do
echo Deleting routes "${routes[*]::10}"
gcloud compute routes delete \
2014-11-25 18:32:27 +00:00
--project "${PROJECT}" \
--quiet \
"${routes[@]::10}" || true
routes=( "${routes[@]:10}" )
2014-11-25 18:32:27 +00:00
done
}
# Update a kubernetes cluster with latest source
function kube-push {
detect-project
detect-master
# Make sure we have the tar files staged on Google Storage
find-release-tars
upload-server-tars
(
echo "#! /bin/bash"
echo "mkdir -p /var/cache/kubernetes-install"
echo "cd /var/cache/kubernetes-install"
echo "readonly SERVER_BINARY_TAR_URL='${SERVER_BINARY_TAR_URL}'"
echo "readonly SALT_TAR_URL='${SALT_TAR_URL}'"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/common.sh"
grep -v "^#" "${KUBE_ROOT}/cluster/gce/templates/download-release.sh"
echo "echo Executing configuration"
echo "sudo salt '*' mine.update"
echo "sudo salt --force-color '*' state.highstate"
) | gcloud compute ssh --project "${PROJECT}" --zone "$ZONE" "$KUBE_MASTER" --command "sudo bash"
get-password
echo
echo "Kubernetes cluster is running. The master is running at:"
echo
echo " https://${KUBE_MASTER_IP}"
echo
echo "The user name and password to use is located in ~/.kubernetes_auth."
echo
}
# -----------------------------------------------------------------------------
# Cluster specific test helpers used from hack/e2e-test.sh
# Execute prior to running tests to build a release if required for env.
#
# Assumed Vars:
# KUBE_ROOT
function test-build-release {
# Make a release
"${KUBE_ROOT}/build/release.sh"
}
# Execute prior to running tests to initialize required structure. This is
2014-12-09 23:07:54 +00:00
# called from hack/e2e.go only when running -up (it is run after kube-up).
#
# Assumed vars:
# Variables from config.sh
function test-setup {
# Detect the project into $PROJECT if it isn't set
detect-project
# Open up port 80 & 8080 so common containers on minions can be reached
2014-11-25 18:32:27 +00:00
gcloud compute firewall-rules create \
--project "${PROJECT}" \
2014-11-25 18:32:27 +00:00
--target-tags "${MINION_TAG}" \
--allow tcp:80 tcp:8080 \
--network "${NETWORK}" \
"${MINION_TAG}-${INSTANCE_PREFIX}-http-alt"
}
2014-12-09 23:07:54 +00:00
# Execute after running tests to perform any required clean-up. This is called
# from hack/e2e.go
function test-teardown {
2014-12-09 23:07:54 +00:00
detect-project
echo "Shutting down test cluster in background."
2014-11-25 18:32:27 +00:00
gcloud compute firewall-rules delete \
--project "${PROJECT}" \
2014-11-25 18:32:27 +00:00
--quiet \
"${MINION_TAG}-${INSTANCE_PREFIX}-http-alt" || true
"${KUBE_ROOT}/cluster/kube-down.sh"
}
2014-10-10 05:38:00 +00:00
# SSH to a node by name ($1) and run a command ($2).
function ssh-to-node {
local node="$1"
local cmd="$2"
for try in $(seq 1 5); do
if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}"; then
break
fi
done
2014-10-10 05:38:00 +00:00
}
# Restart the kube-proxy on a node ($1)
function restart-kube-proxy {
ssh-to-node "$1" "sudo /etc/init.d/kube-proxy restart"
}
# Restart the kube-proxy on a node ($1)
function restart-apiserver {
ssh-to-node "$1" "sudo /etc/init.d/kube-apiserver restart"
}
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
# Setup monitoring firewalls using heapster and InfluxDB
function setup-monitoring-firewall {
if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
return
fi
2014-11-25 18:32:27 +00:00
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
echo "Setting up firewalls to Heapster based cluster monitoring."
detect-project
gcloud compute firewall-rules create "${INSTANCE_PREFIX}-monitoring-heapster" --project "${PROJECT}" \
--allow tcp:80 tcp:8083 tcp:8086 --target-tags="${MINION_TAG}" --network="${NETWORK}"
local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
local grafana_host=""
echo "waiting for monitoring pods to be scheduled."
for i in `seq 1 10`; do
grafana_host=$("${kubectl}" get pods -l name=influxGrafana -o template -t {{range.items}}{{.currentState.hostIP}}:{{end}} | sed s/://g)
if [[ ${grafana_host} != *"<"* ]]; then
break
2014-11-25 18:32:27 +00:00
fi
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
sleep 10
done
if [[ ${grafana_host} != *"<"* ]]; then
echo
echo -e "${color_green}Grafana dashboard will be available at ${color_yellow}http://${grafana_host}${color_green}. Wait for the monitoring dashboard to be online.${color_norm}"
echo
else
echo -e "${color_red}Monitoring pods failed to be scheduled!${color_norm}"
fi
}
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
function teardown-monitoring-firewall {
if [[ "${ENABLE_CLUSTER_MONITORING}" != "true" ]]; then
return
fi
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
detect-project
gcloud compute firewall-rules delete -q "${INSTANCE_PREFIX}-monitoring-heapster" --project "${PROJECT}" || true
}
2014-11-11 19:03:07 +00:00
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
function setup-logging-firewall {
# If logging with Fluentd to Elasticsearch is enabled then create pods
# and services for Elasticsearch (for ingesting logs) and Kibana (for
# viewing logs).
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
[[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
[[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
return
fi
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
detect-project
gcloud compute firewall-rules create "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" \
--allow tcp:5601 tcp:9200 tcp:9300 --target-tags "${MINION_TAG}" --network="${NETWORK}"
# This should be nearly instant once kube-addons gets a chance to
# run, and we already know we can hit the apiserver, but it's still
# worth checking.
echo "waiting for logging services to be created by the master."
local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
for i in `seq 1 10`; do
if "${kubectl}" get services -l name=kibana-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q kibana-logging &&
"${kubectl}" get services -l name=elasticsearch-logging -o template -t {{range.items}}{{.id}}{{end}} | grep -q elasticsearch-logging; then
break
fi
sleep 10
done
2015-02-07 00:35:39 +00:00
local -r region="${ZONE:0:${#ZONE}-2}"
local -r es_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" elasticsearch-logging | grep IPAddress | awk '{print $2}')
local -r kibana_ip=$(gcloud compute forwarding-rules --project "${PROJECT}" describe --region "${region}" kibana-logging | grep IPAddress | awk '{print $2}')
echo
echo -e "${color_green}Cluster logs are ingested into Elasticsearch running at ${color_yellow}http://${es_ip}:9200"
echo -e "${color_green}Kibana logging dashboard will be available at ${color_yellow}http://${kibana_ip}:5601${color_norm}"
echo
}
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
function teardown-logging-firewall {
if [[ "${ENABLE_NODE_LOGGING-}" != "true" ]] || \
[[ "${LOGGING_DESTINATION-}" != "elasticsearch" ]] || \
[[ "${ENABLE_CLUSTER_LOGGING-}" != "true" ]]; then
return
fi
Deferred creation of SkyDNS, monitoring and logging objects This implements phase 1 of the proposal in #3579, moving the creation of the pods, RCs, and services to the master after the apiserver is available. This is such a wide commit because our existing initial config story is special: * Add kube-addons service and associated salt configuration: ** We configure /etc/kubernetes/addons to be a directory of objects that are appropriately configured for the current cluster. ** "/etc/init.d/kube-addons start" slurps up everything in that dir. (Most of the difficult is the business logic in salt around getting that directory built at all.) ** We cheat and overlay cluster/addons into saltbase/salt/kube-addons as config files for the kube-addons meta-service. * Change .yaml.in files to salt templates * Rename {setup,teardown}-{monitoring,logging} to {setup,teardown}-{monitoring,logging}-firewall to properly reflect their real purpose now (the purpose of these functions is now ONLY to bring up the firewall rules, and possibly to relay the IP to the user). * Rework GCE {setup,teardown}-{monitoring,logging}-firewall: Both functions were improperly configuring global rules, yet used lifecycles tied to the cluster. Use $NODE_INSTANCE_PREFIX with the rule. The logging rule needed a $NETWORK specifier. The monitoring rule tried gcloud describe first, but given the instancing, this feels like a waste of time now. * Plumb ENABLE_CLUSTER_MONITORING, ENABLE_CLUSTER_LOGGING, ELASTICSEARCH_LOGGING_REPLICAS and DNS_REPLICAS down to the master, since these are needed there now. (Desperately want just a yaml or json file we can share between providers that has all this crap. Maybe #3525 is an answer?) Huge caveats: I've gone pretty firm testing on GCE, including twiddling the env variables and making sure the objects I expect to come up, come up. I've tested that it doesn't break GKE bringup somehow. But I haven't had a chance to test the other providers.
2015-01-18 23:16:52 +00:00
detect-project
gcloud compute firewall-rules delete -q "${INSTANCE_PREFIX}-fluentd-elasticsearch-logging" --project "${PROJECT}" || true
# Also delete the logging services which will remove the associated forwarding rules (TCP load balancers).
local kubectl="${KUBE_ROOT}/cluster/kubectl.sh"
"${kubectl}" delete services elasticsearch-logging || true
"${kubectl}" delete services kibana-logging || true
}
2014-11-11 19:03:07 +00:00
# Perform preparations required to run e2e tests
function prepare-e2e() {
detect-project
}