Salt configuration for the new Cluster Autoscaler for GCE

2016-05-09 16:23:00 +02:00 · 2016-05-09 16:23:00 +02:00 · ac4b380453
parent a7be41d4fe
commit ac4b380453
7 changed files with 162 additions and 52 deletions
--- a/cluster/common.sh
+++ b/cluster/common.sh
@ -641,6 +641,12 @@ KUBERNETES_CONTAINER_RUNTIME: $(yaml-quote ${CONTAINER_RUNTIME:-docker})
 RKT_VERSION: $(yaml-quote ${RKT_VERSION:-})
 RKT_PATH: $(yaml-quote ${RKT_PATH:-})
 KUBERNETES_CONFIGURE_CBR0: $(yaml-quote ${KUBERNETES_CONFIGURE_CBR0:-true})
 EOF
  fi
  if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
      cat >>$file <<EOF
 ENABLE_NODE_AUTOSCALER: $(yaml-quote ${ENABLE_NODE_AUTOSCALER})
 AUTOSCALER_MIG_CONFIG: $(yaml-quote ${AUTOSCALER_MIG_CONFIG})
 EOF
  fi
 }
--- a/cluster/gce/config-default.sh
+++ b/cluster/gce/config-default.sh
@ -109,8 +109,6 @@ ENABLE_NODE_AUTOSCALER="${KUBE_ENABLE_NODE_AUTOSCALER:-false}"
 if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
  AUTOSCALER_MIN_NODES="${KUBE_AUTOSCALER_MIN_NODES:-1}"
  AUTOSCALER_MAX_NODES="${KUBE_AUTOSCALER_MAX_NODES:-${NUM_NODES}}"
  TARGET_NODE_UTILIZATION="${KUBE_TARGET_NODE_UTILIZATION:-0.7}"
  ENABLE_CLUSTER_MONITORING=googleinfluxdb
 fi
 # Admission Controllers to invoke prior to persisting objects in cluster
--- a/cluster/gce/configure-vm.sh
+++ b/cluster/gce/configure-vm.sh
@ -535,6 +535,13 @@ EOF
 node_labels: '$(echo "${NODE_LABELS}" | sed -e "s/'/''/g")'
 EOF
    fi
    if [[ "${ENABLE_NODE_AUTOSCALER:-false}" == "true" ]]; then
      cat <<EOF >>/srv/salt-overlay/pillar/cluster-params.sls
 enable_node_autoscaler: '$(echo "${ENABLE_NODE_AUTOSCALER}" | sed -e "s/'/''/g")'
 autoscaler_mig_config: '$(echo "${AUTOSCALER_MIG_CONFIG}" | sed -e "s/'/''/g")'
 EOF
    fi
 }
 # The job of this function is simple, but the basic regular expression syntax makes
--- a/cluster/gce/util.sh
+++ b/cluster/gce/util.sh
@ -562,16 +562,15 @@ function kube-up {
  if [[ ${KUBE_USE_EXISTING_MASTER:-} == "true" ]]; then
    parse-master-env
    create-nodes
    create-autoscaler
  else
    check-existing
    create-network
    write-cluster-name
    create-autoscaler-config
    create-master
    create-nodes-firewall
    create-nodes-template
    create-nodes
    create-autoscaler
    check-cluster
  fi
 }
@ -733,37 +732,82 @@ function set_num_migs() {
 function create-nodes() {
  local template_name="${NODE_INSTANCE_PREFIX}-template"
-  local instances_per_mig=$(((${NUM_NODES} + ${NUM_MIGS} - 1) / ${NUM_MIGS}))
+  local instances_left=${NUM_NODES}
  local last_mig_size=$((${NUM_NODES} - (${NUM_MIGS} - 1) * ${instances_per_mig}))
  #TODO: parallelize this loop to speed up the process
-  for ((i=1; i<${NUM_MIGS}; i++)); do
+  for ((i=1; i<=${NUM_MIGS}; i++)); do
    local group_name="${NODE_INSTANCE_PREFIX}-group-$i"
    if [[ $i == ${NUM_MIGS} ]]; then
      # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
      # We should change it at some point, but note #18545 when changing this.
      group_name="${NODE_INSTANCE_PREFIX}-group"
    fi
    # Spread the remaining number of nodes evenly
    this_mig_size=$((${instances_left} / (${NUM_MIGS}-${i}+1)))
    instances_left=$((instances_left-${this_mig_size}))
    gcloud compute instance-groups managed \
-        create "${NODE_INSTANCE_PREFIX}-group-$i" \
+        create "${group_name}" \
        --project "${PROJECT}" \
        --zone "${ZONE}" \
        --base-instance-name "${NODE_INSTANCE_PREFIX}" \
-        --size "${instances_per_mig}" \
+        --size "${this_mig_size}" \
        --template "$template_name" || true;
    gcloud compute instance-groups managed wait-until-stable \
-        "${NODE_INSTANCE_PREFIX}-group-$i" \
+        "${group_name}" \
        --zone "${ZONE}" \
        --project "${PROJECT}" || true;
  done
 }
 # Assumes:
 # - NUM_MIGS
 # - NODE_INSTANCE_PREFIX
 # - PROJECT
 # - ZONE
 # - AUTOSCALER_MAX_NODES
 # - AUTOSCALER_MIN_NODES
 # Exports
 # - AUTOSCALER_MIG_CONFIG
 function create-cluster-autoscaler-mig-config() {
  # Each MIG must have at least one node, so the min number of nodes
  # must be greater or equal to the number of migs. 
  if [[ ${AUTOSCALER_MIN_NODES} < ${NUM_MIGS} ]]; then
    echo "AUTOSCALER_MIN_NODES must be greater or equal ${NUM_MIGS}"
    exit 2    
  fi
  # Each MIG must have at least one node, so the min number of nodes
  # must be greater or equal to the number of migs. 
  if [[ ${AUTOSCALER_MAX_NODES} < ${NUM_MIGS} ]]; then
    echo "AUTOSCALER_MAX_NODES must be greater or equal ${NUM_MIGS}"
    exit 2    
  fi
  # The code assumes that the migs were created with create-nodes 
  # function which tries to evenly spread nodes across the migs.
  AUTOSCALER_MIG_CONFIG=""
  local left_min=${AUTOSCALER_MIN_NODES}
  local left_max=${AUTOSCALER_MAX_NODES}
  for ((i=1; i<=${NUM_MIGS}; i++)); do
    local group_name="${NODE_INSTANCE_PREFIX}-group-$i"
    if [[ $i == ${NUM_MIGS} ]]; then
      # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
      # We should change it at some point, but note #18545 when changing this.
-  gcloud compute instance-groups managed \
+      group_name="${NODE_INSTANCE_PREFIX}-group"
-      create "${NODE_INSTANCE_PREFIX}-group" \
+    fi
-      --project "${PROJECT}" \
+
-      --zone "${ZONE}" \
+    this_mig_min=$((${left_min}/(${NUM_MIGS}-${i}+1)))
-      --base-instance-name "${NODE_INSTANCE_PREFIX}" \
+    this_mig_max=$((${left_max}/(${NUM_MIGS}-${i}+1)))
-      --size "${last_mig_size}" \
+    left_min=$((left_min-$this_mig_min))
-      --template "$template_name" || true;
+    left_max=$((left_max-$this_mig_max))
-  gcloud compute instance-groups managed wait-until-stable \
+
-      "${NODE_INSTANCE_PREFIX}-group" \
+    local mig_url="https://www.googleapis.com/compute/v1/projects/${PROJECT}/zones/${ZONE}/instanceGroups/${group_name}"
-      --zone "${ZONE}" \
+    AUTOSCALER_MIG_CONFIG="${AUTOSCALER_MIG_CONFIG} --nodes=${this_mig_min}:${this_mig_max}:${mig_url}"
-      --project "${PROJECT}" || true;
+  done
 }
 # Assumes:
@ -772,38 +816,13 @@ function create-nodes() {
 # - PROJECT
 # - ZONE
 # - ENABLE_NODE_AUTOSCALER
 # - TARGET_NODE_UTILIZATION\
 # - AUTOSCALER_MAX_NODES
 # - AUTOSCALER_MIN_NODES
-function create-autoscaler() {
+function create-autoscaler-config() {
-  # Create autoscaler for nodes if requested
+  # Create autoscaler for nodes configuration if requested
  if [[ "${ENABLE_NODE_AUTOSCALER}" == "true" ]]; then
-    local metrics=""
+    create-cluster-autoscaler-mig-config
-    # Current usage
+    echo "Using autoscaler config: ${AUTOSCALER_MIG_CONFIG}"
    metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/cpu/node_utilization,"
    metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE "
    metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/memory/node_utilization,"
    metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE "
    # Reservation
    metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/cpu/node_reservation,"
    metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE "
    metrics+="--custom-metric-utilization metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/memory/node_reservation,"
    metrics+="utilization-target=${TARGET_NODE_UTILIZATION},utilization-target-type=GAUGE "
    echo "Creating node autoscalers."
    local max_instances_per_mig=$(((${AUTOSCALER_MAX_NODES} + ${NUM_MIGS} - 1) / ${NUM_MIGS}))
    local last_max_instances=$((${AUTOSCALER_MAX_NODES} - (${NUM_MIGS} - 1) * ${max_instances_per_mig}))
    local min_instances_per_mig=$(((${AUTOSCALER_MIN_NODES} + ${NUM_MIGS} - 1) / ${NUM_MIGS}))
    local last_min_instances=$((${AUTOSCALER_MIN_NODES} - (${NUM_MIGS} - 1) * ${min_instances_per_mig}))
    for ((i=1; i<${NUM_MIGS}; i++)); do
      gcloud compute instance-groups managed set-autoscaling "${NODE_INSTANCE_PREFIX}-group-$i" --zone "${ZONE}" --project "${PROJECT}" \
          --min-num-replicas "${min_instances_per_mig}" --max-num-replicas "${max_instances_per_mig}" ${metrics} || true
    done
    gcloud compute instance-groups managed set-autoscaling "${NODE_INSTANCE_PREFIX}-group" --zone "${ZONE}" --project "${PROJECT}" \
      --min-num-replicas "${last_min_instances}" --max-num-replicas "${last_max_instances}" ${metrics} || true
  fi
 }
--- a/cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest
+++ b/cluster/saltbase/salt/cluster-autoscaler/cluster-autoscaler.manifest
@ -0,0 +1,58 @@
 {% set params = pillar['autoscaler_mig_config'] -%}
 {
    "kind": "Pod",
    "apiVersion": "v1",
    "metadata": {
        "name": "cluster-autoscaler",
        "namespace": "kube-system",
        "labels": {
            "tier": "cluster-management",
            "component": "cluster-autoscaler"
        }
    },
    "spec": {
        "hostNetwork": true,
        "containers": [
            {
                "name": "cluster-autoscaler",
                "image": "gcr.io/mwielgus-proj/cluster-autoscaler:v0.0.1-alpha2-4",
                "command": [
                    "./cluster-autoscaler",
                    "--kubernetes=http://127.0.0.1:8080?inClusterConfig=f",
                    {% for param in params.split(" ") %}
                    "{{param}}",
                    {% endfor %}
                    "-v=4"
                ],
                "resources": {
                    "limits": {
                        "cpu": "100m",
                        "memory": "300Mi"
                    },
                    "requests": {
                        "cpu": "50m",
                        "memory": "300Mi"
                    }
                },
                "volumeMounts": [
                    {
                        "name": "ssl-certs",
                        "readOnly": true,
                        "mountPath": "/etc/ssl/certs"
                    }
                ],
                "terminationMessagePath": "/dev/termination-log",
                "imagePullPolicy": "IfNotPresent"
            }
        ],
        "volumes": [
            {
                "name": "ssl-certs",
                "hostPath": {
                    "path": "/etc/ssl/certs"
                }
            }
        ],
        "restartPolicy": "Always"
    }
 }
--- a/cluster/saltbase/salt/cluster-autoscaler/init.sls
+++ b/cluster/saltbase/salt/cluster-autoscaler/init.sls
@ -0,0 +1,19 @@
 # Copy autoscaler manifest to manifests folder for master.
 # The ordering of salt states for service docker, kubelet and
 # master-addon below is very important to avoid the race between
 # salt restart docker or kubelet and kubelet start master components.
 # Please see http://issue.k8s.io/10122#issuecomment-114566063
 # for detail explanation on this very issue.
 /etc/kubernetes/manifests/cluster-autoscaler.manifest:
  file.managed:
    - source: salt://cluster-autoscaler/cluster-autoscaler.manifest
    - template: jinja
    - user: root
    - group: root
    - mode: 644
    - makedirs: true
    - dir_mode: 755
    - require:
      - service: docker
      - service: kubelet
--- a/cluster/saltbase/salt/top.sls
+++ b/cluster/saltbase/salt/top.sls
@ -79,3 +79,6 @@ base:
 {% if pillar.get('network_provider', '').lower() == 'opencontrail' %}
    - opencontrail-networking-master
 {% endif %}
 {% if pillar.get('enable_node_autoscaler', '').lower() == 'true' %}
    - cluster-autoscaler
 {% endif %}