Make big clusters work again after introduction of subnets

pull/6/head
gmarek 2017-06-14 13:23:41 +02:00
parent 4fd040afc7
commit 64f6606833
6 changed files with 75 additions and 11 deletions

View File

@ -36,6 +36,25 @@ function get-master-size {
echo "${suggested_master_size}"
}
function get-node-ip-range {
if [[ -n "${NODE_IP_RANGE:-}" ]]; then
>&2 echo "Using user provided NODE_IP_RANGE: ${NODE_IP_RANGE}"
echo "${NODE_IP_RANGE}"
return
fi
local suggested_range="10.40.0.0/22"
if [[ "${NUM_NODES}" -gt 1000 ]]; then
suggested_range="10.40.0.0/21"
fi
if [[ "${NUM_NODES}" -gt 2000 ]]; then
suggested_range="10.40.0.0/20"
fi
if [[ "${NUM_NODES}" -gt 4000 ]]; then
suggested_range="10.40.0.0/19"
fi
echo "${suggested_range}"
}
if [[ "${FEDERATION:-}" == true ]]; then
NODE_SCOPES="${NODE_SCOPES:-compute-rw,monitoring,logging-write,storage-ro,https://www.googleapis.com/auth/ndev.clouddns.readwrite}"
else

View File

@ -218,7 +218,7 @@ if [ ${ENABLE_IP_ALIASES} = true ]; then
SERVICE_CLUSTER_IP_SUBNETWORK=${KUBE_GCE_SERVICE_CLUSTER_IP_SUBNETWORK:-${INSTANCE_PREFIX}-subnet-services}
# NODE_IP_RANGE is used when ENABLE_IP_ALIASES=true. It is the primary range in
# the subnet and is the range used for node instance IPs.
NODE_IP_RANGE="${NODE_IP_RANGE:-10.40.0.0/22}"
NODE_IP_RANGE="$(get-node-ip-range)"
# Add to the provider custom variables.
PROVIDER_VARS="${PROVIDER_VARS} ENABLE_IP_ALIASES"
fi
@ -245,7 +245,7 @@ NETWORK_POLICY_PROVIDER="${NETWORK_POLICY_PROVIDER:-none}" # calico
# How should the kubelet configure hairpin mode?
HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none
# Optional: if set to true, kube-up will configure the cluster to run e2e tests.
E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}
E2E_STORAGE_TEST_ENVIRONMENT="${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}"
# Evict pods whenever compute resource availability on the nodes gets below a threshold.
EVICTION_HARD="${EVICTION_HARD:-memory.available<250Mi,nodefs.available<10%,nodefs.inodesFree<5%}"
@ -266,4 +266,6 @@ SOFTLOCKUP_PANIC="${SOFTLOCKUP_PANIC:-false}" # true, false
# Indicates if the values (i.e. KUBE_USER and KUBE_PASSWORD for basic
# authentication) in metadata should be treated as canonical, and therefore disk
# copies ought to be recreated/clobbered.
METADATA_CLOBBERS_CONFIG=${METADATA_CLOBBERS_CONFIG:-false}
METADATA_CLOBBERS_CONFIG="${METADATA_CLOBBERS_CONFIG:-false}"
ENABLE_BIG_CLUSTER_SUBNETS="${ENABLE_BIG_CLUSTER_SUBNETS:-false}"

View File

@ -91,7 +91,7 @@ CLUSTER_IP_RANGE="${CLUSTER_IP_RANGE:-10.100.0.0/14}"
MASTER_IP_RANGE="${MASTER_IP_RANGE:-10.246.0.0/24}"
# NODE_IP_RANGE is used when ENABLE_IP_ALIASES=true. It is the primary range in
# the subnet and is the range used for node instance IPs.
NODE_IP_RANGE="${NODE_IP_RANGE:-10.40.0.0/22}"
NODE_IP_RANGE="$(get-node-ip-range)"
RUNTIME_CONFIG="${KUBE_RUNTIME_CONFIG:-}"
@ -315,3 +315,5 @@ ENABLE_APISERVER_ADVANCED_AUDIT="${ENABLE_APISERVER_ADVANCED_AUDIT:-true}" # tru
if [[ "${ENABLE_APISERVER_ADVANCED_AUDIT}" == "true" ]]; then
FEATURE_GATES="${FEATURE_GATES},AdvancedAuditing=true"
fi
ENABLE_BIG_CLUSTER_SUBNETS="${ENABLE_BIG_CLUSTER_SUBNETS:-false}"

View File

@ -88,6 +88,8 @@ NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
NODE_TAGS="${NODE_TAG}"
ALLOCATE_NODE_CIDRS=true
PREEXISTING_NETWORK=false
PREEXISTING_NETWORK_MODE=""
KUBE_PROMPT_FOR_UPDATE=${KUBE_PROMPT_FOR_UPDATE:-"n"}
# How long (in seconds) to wait for cluster initialization.
@ -508,7 +510,11 @@ function make-gcloud-network-argument() {
ret="${ret},aliases=pods-default:${alias_size}"
ret="${ret} --no-can-ip-forward"
else
ret="--network ${network}"
if [[ ${PREEXISTING_NETWORK} = "true" && "${PREEXISTING_NETWORK_MODE}" != "custom" ]]; then
ret="--network ${network}"
else
ret="--subnet=${network}"
fi
ret="${ret} --can-ip-forward"
if [[ -n ${address:-} ]]; then
ret="${ret} --address ${address}"
@ -746,6 +752,10 @@ function create-network() {
# The network needs to be created synchronously or we have a race. The
# firewalls can be added concurrent with instance creation.
gcloud compute networks create --project "${PROJECT}" "${NETWORK}" --mode=auto
else
PREEXISTING_NETWORK=true
PREEXISTING_NETWORK_MODE="$(gcloud compute networks list ${NETWORK} --format='value(x_gcloud_mode)' || true)"
echo "Found existing network ${NETWORK} in ${PREEXISTING_NETWORK_MODE} mode."
fi
if ! gcloud compute firewall-rules --project "${PROJECT}" describe "${CLUSTER_NAME}-default-internal-master" &>/dev/null; then
@ -775,10 +785,31 @@ function create-network() {
fi
}
function expand-default-subnetwork() {
gcloud compute networks switch-mode "${NETWORK}" \
--mode custom \
--project "${PROJECT}" \
--quiet || true
gcloud compute networks subnets expand-ip-range "${NETWORK}" \
--region="${REGION}" \
--project "${PROJECT}" \
--prefix-length=19 \
--quiet
}
function create-subnetworks() {
case ${ENABLE_IP_ALIASES} in
true) ;;
false) return;;
true) echo "IP aliases are enabled. Creating subnetworks.";;
false)
echo "IP aliases are disabled."
if [[ "${ENABLE_BIG_CLUSTER_SUBNETS}" = "true" ]]; then
if [[ "${PREEXISTING_NETWORK}" != "true" ]]; then
expand-default-subnetwork
else
echo "${color_yellow}Using pre-existing network ${NETWORK}, subnets won't be expanded to /19!${color_norm}"
fi
fi
return;;
*) echo "${color_red}Invalid argument to ENABLE_IP_ALIASES${color_norm}"
exit 1;;
esac
@ -867,6 +898,17 @@ function delete-network() {
function delete-subnetworks() {
if [[ ${ENABLE_IP_ALIASES:-} != "true" ]]; then
if [[ "${ENABLE_BIG_CLUSTER_SUBNETS}" = "true" ]]; then
# If running in custom mode network we need to delete subnets
mode="$(gcloud compute networks list ${NETWORK} --format='value(x_gcloud_mode)' || true)"
if [[ "${mode}" == "custom" ]]; then
echo "Deleting default subnets..."
# This value should be kept in sync with number of regions.
local parallelism=9
gcloud compute networks subnets list --network="${NETWORK}" --format='value(region.basename())' | \
xargs -i -P ${parallelism} gcloud --quiet compute networks subnets delete "${NETWORK}" --region="{}" || true
fi
fi
return
fi
@ -1612,9 +1654,8 @@ function kube-down() {
"${NETWORK}-default-ssh" \
"${NETWORK}-default-internal" # Pre-1.5 clusters
delete-subnetworks
if [[ "${KUBE_DELETE_NETWORK}" == "true" ]]; then
delete-subnetworks || true
delete-network || true # might fail if there are leaked firewall rules
fi

View File

@ -39,7 +39,7 @@ NODE_OS_DISTRIBUTION=${KUBE_NODE_OS_DISTRIBUTION:-debian}
MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-cos-stable-59-9460-64-0}
MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
NETWORK=${KUBE_GCE_NETWORK:-default}
NETWORK=${KUBE_GCE_NETWORK:-e2e}
INSTANCE_PREFIX="${INSTANCE_PREFIX:-"default"}"
MASTER_NAME="${INSTANCE_PREFIX}-kubemark-master"
AGGREGATOR_MASTER_NAME="${INSTANCE_PREFIX}-kubemark-aggregator"

View File

@ -80,7 +80,7 @@ function create-master-instance-with-resources {
--image-project="${MASTER_IMAGE_PROJECT}" \
--image "${MASTER_IMAGE}" \
--tags "${MASTER_TAG}" \
--network "${NETWORK}" \
--subnet "${NETWORK}" \
--scopes "storage-ro,compute-rw,logging-write" \
--boot-disk-size "${MASTER_ROOT_DISK_SIZE}" \
--disk "name=${MASTER_NAME}-pd,device-name=master-pd,mode=rw,boot=no,auto-delete=no"