From ac4b380453a644412497fd893781da37576ddd73 Mon Sep 17 00:00:00 2001 From: Marcin Wielgus Date: Mon, 9 May 2016 16:23:00 +0200 Subject: [PATCH] Salt configuration for the new Cluster Autoscaler for GCE --- cluster/common.sh | 6 + cluster/gce/config-default.sh | 2 - cluster/gce/configure-vm.sh | 7 ++ cluster/gce/util.sh | 119 ++++++++++-------- .../cluster-autoscaler.manifest | 58 +++++++++ .../saltbase/salt/cluster-autoscaler/init.sls | 19 +++ cluster/saltbase/salt/top.sls | 3 + 7 files changed, 162 insertions(+), 52 deletions(-) create mode 100644 cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest create mode 100644 cluster/saltbase/salt/cluster-autoscaler/init.sls diff --git a/cluster/common.sh b/cluster/common.sh index 030101e489..285360c102 100755 --- a/cluster/common.sh +++ b/cluster/common.sh @@ -641,6 +641,12 @@ KUBERNETES_CONTAINER_RUNTIME: $(yaml-quote ${CONTAINER_RUNTIME:-docker}) RKT_VERSION: $(yaml-quote ${RKT_VERSION:-}) RKT_PATH: $(yaml-quote ${RKT_PATH:-}) KUBERNETES_CONFIGURE_CBR0: $(yaml-quote ${KUBERNETES_CONFIGURE_CBR0:-true}) +EOF + fi + if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then + cat >>$file <>/srv/salt-overlay/pillar/cluster-params.sls +enable_node_autoscaler: '$(echo "${ENABLE_NODE_AUTOSCALER}" | sed -e "s/'/''/g")' +autoscaler_mig_config: '$(echo "${AUTOSCALER_MIG_CONFIG}" | sed -e "s/'/''/g")' +EOF + fi + } # The job of this function is simple, but the basic regular expression syntax makes diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 8bbf8c3860..45a91db4fd 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -562,16 +562,15 @@ function kube-up { if [[ ${KUBE_USE_EXISTING_MASTER:-} == "true" ]]; then parse-master-env create-nodes - create-autoscaler else check-existing create-network write-cluster-name + create-autoscaler-config create-master create-nodes-firewall create-nodes-template create-nodes - create-autoscaler check-cluster fi } @@ -733,37 +732,82 @@ function set_num_migs() { function create-nodes() { local template_name="${NODE_INSTANCE_PREFIX}-template" - local instances_per_mig=$(((${NUM_NODES} + ${NUM_MIGS} - 1) / ${NUM_MIGS})) - local last_mig_size=$((${NUM_NODES} - (${NUM_MIGS} - 1) * ${instances_per_mig})) + local instances_left=${NUM_NODES} #TODO: parallelize this loop to speed up the process - for ((i=1; i<${NUM_MIGS}; i++)); do + for ((i=1; i<=${NUM_MIGS}; i++)); do + local group_name="${NODE_INSTANCE_PREFIX}-group-$i" + if [[ $i == ${NUM_MIGS} ]]; then + # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG. + # We should change it at some point, but note #18545 when changing this. + group_name="${NODE_INSTANCE_PREFIX}-group" + fi + # Spread the remaining number of nodes evenly + this_mig_size=$((${instances_left} / (${NUM_MIGS}-${i}+1))) + instances_left=$((instances_left-${this_mig_size})) + gcloud compute instance-groups managed \ - create "${NODE_INSTANCE_PREFIX}-group-$i" \ + create "${group_name}" \ --project "${PROJECT}" \ --zone "${ZONE}" \ --base-instance-name "${NODE_INSTANCE_PREFIX}" \ - --size "${instances_per_mig}" \ + --size "${this_mig_size}" \ --template "$template_name" || true; gcloud compute instance-groups managed wait-until-stable \ - "${NODE_INSTANCE_PREFIX}-group-$i" \ + "${group_name}" \ --zone "${ZONE}" \ --project "${PROJECT}" || true; done +} - # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG. - # We should change it at some point, but note #18545 when changing this. - gcloud compute instance-groups managed \ - create "${NODE_INSTANCE_PREFIX}-group" \ - --project "${PROJECT}" \ - --zone "${ZONE}" \ - --base-instance-name "${NODE_INSTANCE_PREFIX}" \ - --size "${last_mig_size}" \ - --template "$template_name" || true; - gcloud compute instance-groups managed wait-until-stable \ - "${NODE_INSTANCE_PREFIX}-group" \ - --zone "${ZONE}" \ - --project "${PROJECT}" || true; +# Assumes: +# - NUM_MIGS +# - NODE_INSTANCE_PREFIX +# - PROJECT +# - ZONE +# - AUTOSCALER_MAX_NODES +# - AUTOSCALER_MIN_NODES +# Exports +# - AUTOSCALER_MIG_CONFIG +function create-cluster-autoscaler-mig-config() { + + # Each MIG must have at least one node, so the min number of nodes + # must be greater or equal to the number of migs. + if [[ ${AUTOSCALER_MIN_NODES} < ${NUM_MIGS} ]]; then + echo "AUTOSCALER_MIN_NODES must be greater or equal ${NUM_MIGS}" + exit 2 + fi + + # Each MIG must have at least one node, so the min number of nodes + # must be greater or equal to the number of migs. + if [[ ${AUTOSCALER_MAX_NODES} < ${NUM_MIGS} ]]; then + echo "AUTOSCALER_MAX_NODES must be greater or equal ${NUM_MIGS}" + exit 2 + fi + + # The code assumes that the migs were created with create-nodes + # function which tries to evenly spread nodes across the migs. + AUTOSCALER_MIG_CONFIG="" + + local left_min=${AUTOSCALER_MIN_NODES} + local left_max=${AUTOSCALER_MAX_NODES} + + for ((i=1; i<=${NUM_MIGS}; i++)); do + local group_name="${NODE_INSTANCE_PREFIX}-group-$i" + if [[ $i == ${NUM_MIGS} ]]; then + # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG. + # We should change it at some point, but note #18545 when changing this. + group_name="${NODE_INSTANCE_PREFIX}-group" + fi + + this_mig_min=$((${left_min}/(${NUM_MIGS}-${i}+1))) + this_mig_max=$((${left_max}/(${NUM_MIGS}-${i}+1))) + left_min=$((left_min-$this_mig_min)) + left_max=$((left_max-$this_mig_max)) + + local mig_url="https://www.googleapis.com/compute/v1/projects/${PROJECT}/zones/${ZONE}/instanceGroups/${group_name}" + AUTOSCALER_MIG_CONFIG="${AUTOSCALER_MIG_CONFIG} --nodes=${this_mig_min}:${this_mig_max}:${mig_url}" + done } # Assumes: @@ -772,38 +816,13 @@ function create-nodes() { # - PROJECT # - ZONE # - ENABLE_NODE_AUTOSCALER -# - TARGET_NODE_UTILIZATION\ # - AUTOSCALER_MAX_NODES # - AUTOSCALER_MIN_NODES -function create-autoscaler() { - # Create autoscaler for nodes if requested +function create-autoscaler-config() { + # Create autoscaler for nodes configuration if requested if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then - local metrics="" - # Current usage - metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/cpu/node_utilization," - metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE " - metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/memory/node_utilization," - metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE " - - # Reservation - metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/cpu/node_reservation," - metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE " - metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/memory/node_reservation," - metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE " - - echo "Creating node autoscalers." - - local max_instances_per_mig=$(((${AUTOSCALER_MAX_NODES} + ${NUM_MIGS} - 1) / ${NUM_MIGS})) - local last_max_instances=$((${AUTOSCALER_MAX_NODES} - (${NUM_MIGS} - 1) * ${max_instances_per_mig})) - local min_instances_per_mig=$(((${AUTOSCALER_MIN_NODES} + ${NUM_MIGS} - 1) / ${NUM_MIGS})) - local last_min_instances=$((${AUTOSCALER_MIN_NODES} - (${NUM_MIGS} - 1) * ${min_instances_per_mig})) - - for ((i=1; i<${NUM_MIGS}; i++)); do - gcloud compute instance-groups managed set-autoscaling "${NODE_INSTANCE_PREFIX}-group-$i" --zone "${ZONE}" --project "${PROJECT}" \ - --min-num-replicas "${min_instances_per_mig}" --max-num-replicas "${max_instances_per_mig}" ${metrics} || true - done - gcloud compute instance-groups managed set-autoscaling "${NODE_INSTANCE_PREFIX}-group" --zone "${ZONE}" --project "${PROJECT}" \ - --min-num-replicas "${last_min_instances}" --max-num-replicas "${last_max_instances}" ${metrics} || true + create-cluster-autoscaler-mig-config + echo "Using autoscaler config: ${AUTOSCALER_MIG_CONFIG}" fi } diff --git a/cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest b/cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest new file mode 100644 index 0000000000..c40cb60b50 --- /dev/null +++ b/cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest @@ -0,0 +1,58 @@ +{% set params = pillar['autoscaler_mig_config'] -%} +{ + "kind": "Pod", + "apiVersion": "v1", + "metadata": { + "name": "cluster-autoscaler", + "namespace": "kube-system", + "labels": { + "tier": "cluster-management", + "component": "cluster-autoscaler" + } + }, + "spec": { + "hostNetwork": true, + "containers": [ + { + "name": "cluster-autoscaler", + "image": "gcr.io/mwielgus-proj/cluster-autoscaler:v0.0.1-alpha2-4", + "command": [ + "./cluster-autoscaler", + "--kubernetes=http://127.0.0.1:8080?inClusterConfig=f", + {% for param in params.split(" ") %} + "{{param}}", + {% endfor %} + "-v=4" + ], + "resources": { + "limits": { + "cpu": "100m", + "memory": "300Mi" + }, + "requests": { + "cpu": "50m", + "memory": "300Mi" + } + }, + "volumeMounts": [ + { + "name": "ssl-certs", + "readOnly": true, + "mountPath": "/etc/ssl/certs" + } + ], + "terminationMessagePath": "/dev/termination-log", + "imagePullPolicy": "IfNotPresent" + } + ], + "volumes": [ + { + "name": "ssl-certs", + "hostPath": { + "path": "/etc/ssl/certs" + } + } + ], + "restartPolicy": "Always" + } +} diff --git a/cluster/saltbase/salt/cluster-autoscaler/init.sls b/cluster/saltbase/salt/cluster-autoscaler/init.sls new file mode 100644 index 0000000000..17774195bb --- /dev/null +++ b/cluster/saltbase/salt/cluster-autoscaler/init.sls @@ -0,0 +1,19 @@ +# Copy autoscaler manifest to manifests folder for master. +# The ordering of salt states for service docker, kubelet and +# master-addon below is very important to avoid the race between +# salt restart docker or kubelet and kubelet start master components. +# Please see http://issue.k8s.io/10122#issuecomment-114566063 +# for detail explanation on this very issue. + +/etc/kubernetes/manifests/cluster-autoscaler.manifest: + file.managed: + - source: salt://cluster-autoscaler/cluster-autoscaler.manifest + - template: jinja + - user: root + - group: root + - mode: 644 + - makedirs: true + - dir_mode: 755 + - require: + - service: docker + - service: kubelet diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index 4b84cefccf..f997b6a111 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -79,3 +79,6 @@ base: {% if pillar.get('network_provider', '').lower() == 'opencontrail' %} - opencontrail-networking-master {% endif %} +{% if pillar.get('enable_node_autoscaler', '').lower() == 'true' %} + - cluster-autoscaler +{% endif %}