diff --git a/build/common.sh b/build/common.sh index 32707393a9..a143dfa2c2 100755 --- a/build/common.sh +++ b/build/common.sh @@ -944,6 +944,7 @@ function kube::release::package_kube_manifests_tarball() { cp "${salt_dir}/kube-controller-manager/kube-controller-manager.manifest" "${dst_dir}" cp "${salt_dir}/kube-addons/kube-addon-manager.yaml" "${dst_dir}" cp "${salt_dir}/l7-gcp/glbc.manifest" "${dst_dir}" + cp "${salt_dir}/rescheduler/rescheduler.manifest" "${dst_dir}/" cp "${KUBE_ROOT}/cluster/gce/trusty/configure-helper.sh" "${dst_dir}/trusty-configure-helper.sh" cp "${KUBE_ROOT}/cluster/gce/gci/configure-helper.sh" "${dst_dir}/gci-configure-helper.sh" cp "${KUBE_ROOT}/cluster/gce/gci/health-monitor.sh" "${dst_dir}/health-monitor.sh" diff --git a/cluster/common.sh b/cluster/common.sh index e8895fc45e..3f5828a07b 100755 --- a/cluster/common.sh +++ b/cluster/common.sh @@ -550,6 +550,7 @@ ENABLE_CLUSTER_LOGGING: $(yaml-quote ${ENABLE_CLUSTER_LOGGING:-false}) ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false}) ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-false}) ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false}) +ENABLE_RESCHEDULER: $(yaml-quote ${ENABLE_RESCHEDULER:-false}) LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-}) ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-}) ENABLE_CLUSTER_DNS: $(yaml-quote ${ENABLE_CLUSTER_DNS:-false}) diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 093515cc16..3e2dec051a 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -128,6 +128,9 @@ if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then AUTOSCALER_ENABLE_SCALE_DOWN="${KUBE_AUTOSCALER_ENABLE_SCALE_DOWN:-true}" fi +# Optional: Enable Rescheduler +ENABLE_RESCHEDULER="${KUBE_ENABLE_RESCHEDULER:-false}" + # Admission Controllers to invoke prior to persisting objects in cluster # If we included ResourceQuota, we should keep it at the end of the list to prevent incremeting quota usage prematurely. ADMISSION_CONTROL=NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,ResourceQuota diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 6ef55fc987..8944660e4f 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -148,6 +148,9 @@ if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then AUTOSCALER_ENABLE_SCALE_DOWN="${KUBE_AUTOSCALER_ENABLE_SCALE_DOWN:-false}" fi +# Optional: Enable Rescheduler +ENABLE_RESCHEDULER="${KUBE_ENABLE_RESCHEDULER:-false}" + # If we included ResourceQuota, we should keep it at the end of the list to prevent incremeting quota usage prematurely. ADMISSION_CONTROL="${KUBE_ADMISSION_CONTROL:-NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,ResourceQuota}" diff --git a/cluster/gce/configure-vm.sh b/cluster/gce/configure-vm.sh index d7192649e6..c99d2fa45c 100755 --- a/cluster/gce/configure-vm.sh +++ b/cluster/gce/configure-vm.sh @@ -434,6 +434,7 @@ enable_cluster_ui: '$(echo "$ENABLE_CLUSTER_UI" | sed -e "s/'/''/g")' enable_node_problem_detector: '$(echo "$ENABLE_NODE_PROBLEM_DETECTOR" | sed -e "s/'/''/g")' enable_l7_loadbalancing: '$(echo "$ENABLE_L7_LOADBALANCING" | sed -e "s/'/''/g")' enable_node_logging: '$(echo "$ENABLE_NODE_LOGGING" | sed -e "s/'/''/g")' +enable_rescheduler: '$(echo "$ENABLE_RESCHEDULER" | sed -e "s/'/''/g")' logging_destination: '$(echo "$LOGGING_DESTINATION" | sed -e "s/'/''/g")' elasticsearch_replicas: '$(echo "$ELASTICSEARCH_LOGGING_REPLICAS" | sed -e "s/'/''/g")' enable_cluster_dns: '$(echo "$ENABLE_CLUSTER_DNS" | sed -e "s/'/''/g")' diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 99fe866fea..32171d75f4 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -978,6 +978,16 @@ function start-lb-controller { fi } +# Starts rescheduler. +function start-rescheduler { + if [[ "${ENABLE_RESCHEDULER:-}" == "true" ]]; then + echo "Starting Rescheduler" + prepare-log-file /var/log/rescheduler.log + cp "${KUBE_HOME}/kube-manifests/kubernetes/gci-trusty/rescheduler.manifest" \ + /etc/kubernetes/manifests/ + fi +} + function reset-motd { # kubelet is installed both on the master and nodes, and the version is easy to parse (unlike kubectl) local -r version="$(/usr/bin/kubelet --version=true | cut -f2 -d " ")" @@ -1052,6 +1062,7 @@ if [[ "${KUBERNETES_MASTER:-}" == "true" ]]; then start-kube-addons start-cluster-autoscaler start-lb-controller + start-rescheduler else start-kube-proxy # Kube-registry-proxy. diff --git a/cluster/gce/trusty/configure-helper.sh b/cluster/gce/trusty/configure-helper.sh index f2115727c8..2724fadf0a 100644 --- a/cluster/gce/trusty/configure-helper.sh +++ b/cluster/gce/trusty/configure-helper.sh @@ -701,6 +701,15 @@ start_cluster_autoscaler() { fi } +# Starts rescheduler. +start-rescheduler() { + if [[ "${ENABLE_RESCHEDULER:-}" == "true" ]]; then + prepare-log-file /var/log/rescheduler.log + cp "${KUBE_HOME}/kube-manifests/kubernetes/gci-trusty/rescheduler.manifest" \ + /etc/kubernetes/manifests/ + fi +} + # Starts a fluentd static pod for logging. start_fluentd() { if [ "${ENABLE_NODE_LOGGING:-}" = "true" ]; then diff --git a/cluster/gce/trusty/master.yaml b/cluster/gce/trusty/master.yaml index ae6fb973d8..826c46b616 100644 --- a/cluster/gce/trusty/master.yaml +++ b/cluster/gce/trusty/master.yaml @@ -192,6 +192,7 @@ script start_kube_scheduler start_kube_addons start_cluster_autoscaler + start_rescheduler reset_motd } 2>&1 | logger --priority daemon.info -t ${UPSTART_JOB} end script diff --git a/cluster/saltbase/salt/rescheduler/init.sls b/cluster/saltbase/salt/rescheduler/init.sls new file mode 100644 index 0000000000..a32d085575 --- /dev/null +++ b/cluster/saltbase/salt/rescheduler/init.sls @@ -0,0 +1,15 @@ +/etc/kubernetes/manifests/rescheduler.manifest: + file.managed: + - source: salt://rescheduler/rescheduler.manifest + - template: jinja + - user: root + - group: root + - mode: 644 + - makedirs: true + - dir_mode: 755 + +/var/log/rescheduler.log: + file.managed: + - user: root + - group: root + - mode: 644 diff --git a/cluster/saltbase/salt/rescheduler/rescheduler.manifest b/cluster/saltbase/salt/rescheduler/rescheduler.manifest new file mode 100644 index 0000000000..657bd775a6 --- /dev/null +++ b/cluster/saltbase/salt/rescheduler/rescheduler.manifest @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: Pod +metadata: + name: rescheduler-v0.1.0 + namespace: kube-system + labels: + k8s-app: rescheduler + version: v0.1.0 + kubernetes.io/cluster-service: "true" + kubernetes.io/name: "Rescheduler" +spec: + hostNetwork: true + containers: + - image: gcr.io/google_containers/rescheduler:v0.1.0 + name: rescheduler + volumeMounts: + - mountPath: /var/log/rescheduler.log + name: logfile + readOnly: false + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 10m + memory: 100Mi + command: + # TODO: split this out into args when we no longer need to pipe stdout to a file #6428 + - sh + - -c + - '/rescheduler --running-in-cluster=false 1>>/var/log/rescheduler.log 2>&1' + volumes: + - hostPath: + path: /var/log/rescheduler.log + name: logfile diff --git a/cluster/saltbase/salt/top.sls b/cluster/saltbase/salt/top.sls index 2ae5ed8f5f..324a5bf44e 100644 --- a/cluster/saltbase/salt/top.sls +++ b/cluster/saltbase/salt/top.sls @@ -101,6 +101,9 @@ base: {% if pillar.get('enable_cluster_autoscaler', '').lower() == 'true' %} - cluster-autoscaler {% endif %} +{% if pillar.get('enable_rescheduler', '').lower() == 'true' %} + - rescheduler +{% endif %} {% if pillar.get('network_policy_provider', '').lower() == 'calico' %} - calico.master {% endif %}