From 13ba177668675bc4e398dae6ce727945b4f3472b Mon Sep 17 00:00:00 2001 From: Marek Biskup Date: Tue, 2 Jun 2015 11:41:13 +0200 Subject: [PATCH] kube-addon-update.sh --- cluster/addons/README.md | 45 ++ .../influxdb/influxdb-service.yaml | 2 + cluster/saltbase/salt/kube-addons/init.sls | 30 ++ cluster/saltbase/salt/kube-addons/initd | 7 +- .../salt/kube-addons/kube-addon-update.sh | 443 ++++++++++++++++++ .../saltbase/salt/kube-addons/kube-addons.sh | 28 +- .../saltbase/salt/kube-master-addons/init.sls | 8 + .../saltbase/salt/kube-master-addons/initd | 7 +- .../kube-master-addons/kube-master-addons.sh | 2 + cluster/saltbase/salt/logrotate/init.sls | 2 +- cluster/saltbase/salt/monit/init.sls | 8 + cluster/saltbase/salt/monit/kube-addons | 5 + test/e2e/monitoring.go | 2 +- 13 files changed, 557 insertions(+), 32 deletions(-) create mode 100644 cluster/addons/README.md create mode 100755 cluster/saltbase/salt/kube-addons/kube-addon-update.sh create mode 100644 cluster/saltbase/salt/monit/kube-addons diff --git a/cluster/addons/README.md b/cluster/addons/README.md new file mode 100644 index 0000000000..b374d89a20 --- /dev/null +++ b/cluster/addons/README.md @@ -0,0 +1,45 @@ +# Cluster add-ons + +Cluster add-ons are Services and Replication Controllers (with pods) that are +shipped with the kubernetes binaries and whose update policy is also consistent +with the update of kubernetes cluster. + +On the clusterm the addons are kept in ```/etc/kubernetes/addons``` on the master node, in yaml files +(json is not supported at the moment). +Each add-on must specify the following label: ````kubernetes.io/cluster-service: true````. +Yaml files that do not define this label will be ignored. + +The naming convention for Replication Controllers is +```-```, where `````` is the same in consecutive +versions and `````` changes when the component is updated +(`````` must not contain ```-```). For instance, +```heapseter-controller-v1``` and ```heapster-controller-12``` are the +same controllers with two different versions, while ```heapseter-controller-v1``` +and ```heapster-newcontroller-12``` are treated as two different applications. +For services it is just `````` (with empty version number) +because we do not expect the service +name to change in consecutive versions. The naming convetion is important for add-on update. + +# Add-on update + +To update add-ons, just update the contents of ```/etc/kubernetes/addons``` +directory with the desired definition of add-ons. Then the system will take care +of: + +1. Removing the objects from the API server whose manifest was removed. + 1. This is done for add-ons in the system that do not have a manifest file with the + same basename +1. Creating objects from new manifests + 1. This is done for manifests that do not correspond to existing API objects + with the same basename +1. Updating objects whose basename is the samem but whose versions changed. + 1. The update is currently performed by removing the old object and creating + the new one. In the future, rolling update of replication controllers will + be implemented to keep the add-on services up and running during update of add-on + pods. + 1. Note that this cannot happen for Services as their version is always empty. + + + + +[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/README.md?pixel)]() diff --git a/cluster/addons/cluster-monitoring/influxdb/influxdb-service.yaml b/cluster/addons/cluster-monitoring/influxdb/influxdb-service.yaml index 08d7ed7594..344c087151 100644 --- a/cluster/addons/cluster-monitoring/influxdb/influxdb-service.yaml +++ b/cluster/addons/cluster-monitoring/influxdb/influxdb-service.yaml @@ -5,6 +5,8 @@ metadata: namespace: default labels: k8s-app: influxGrafana + kubernetes.io/cluster-service: "true" + kubernetes.io/name: "influxGrafana" spec: ports: - name: http diff --git a/cluster/saltbase/salt/kube-addons/init.sls b/cluster/saltbase/salt/kube-addons/init.sls index a129666b43..878b724445 100644 --- a/cluster/saltbase/salt/kube-addons/init.sls +++ b/cluster/saltbase/salt/kube-addons/init.sls @@ -1,3 +1,16 @@ +addon-dir-delete: + file.absent: + - name: /etc/kubernetes/addons + +addon-dir-create: + file.directory: + - name: /etc/kubernetes/addons + - user: root + - group: root + - mode: 0755 + - require: + - file: addon-dir-delete + {% if pillar.get('enable_cluster_monitoring', '').lower() == 'influxdb' %} /etc/kubernetes/addons/cluster-monitoring/influxdb: file.recurse: @@ -58,6 +71,13 @@ - group: root - mode: 755 +/etc/kubernetes/kube-addon-update.sh: + file.managed: + - source: salt://kube-addons/kube-addon-update.sh + - user: root + - group: root + - mode: 755 + {% if grains['os_family'] == 'RedHat' %} /usr/lib/systemd/system/kube-addons.service: @@ -77,6 +97,16 @@ {% endif %} +# Stop kube-addons service each time salt is executed, just in case +# there was a modification of addons. +# Actually, this should be handled by watching file changes, but +# somehow it doesn't work. +service-kube-addon-stop: + service.dead: + - name: kube-addons + kube-addons: service.running: - enable: True + - require: + - service: service-kube-addon-stop diff --git a/cluster/saltbase/salt/kube-addons/initd b/cluster/saltbase/salt/kube-addons/initd index 6b06e8c7cd..931877b37b 100644 --- a/cluster/saltbase/salt/kube-addons/initd +++ b/cluster/saltbase/salt/kube-addons/initd @@ -67,12 +67,7 @@ case "$1" in esac ;; status) - if [ ! -e ${PIDFILE} ]; then - exit 1 - fi - pid=$(cat ${PIDFILE}) - # Checks that ${pid} is running AND is us. - ps --no-headers ${pid} | grep ${SCRIPTNAME} > /dev/null || exit $? + status_of_proc -p $PIDFILE $KUBE_ADDONS_SH $NAME ;; restart|force-reload) diff --git a/cluster/saltbase/salt/kube-addons/kube-addon-update.sh b/cluster/saltbase/salt/kube-addons/kube-addon-update.sh new file mode 100755 index 0000000000..12293f728f --- /dev/null +++ b/cluster/saltbase/salt/kube-addons/kube-addon-update.sh @@ -0,0 +1,443 @@ +#!/bin/bash + +# Copyright 2015 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# The business logic for whether a given object should be created +# was already enforced by salt, and /etc/kubernetes/addons is the +# managed result is of that. Start everything below that directory. + +# Parameters +# $1 path to add-ons + + +# LIMITATIONS +# 1. controllers are not updated unless their name is changed +# 3. Services will not be updated unless their name is changed, +# but for services we acually want updates without name change. +# 4. Json files are not handled at all. Currently addons must be +# in yaml files +# 5. exit code is probably not always correct (I haven't checked +# carefully if it works in 100% cases) +# 6. There are no unittests +# 8. Will not work if the total length of paths to addons is greater than +# bash can handle. Probably it is not a problem: ARG_MAX=2097152 on GCE. +# 9. Performance issue: yaml files are read many times in a single execution. + +# cosmetic improvements to be done +# 1. improve the log function; add timestamp, file name, etc. +# 2. logging doesn't work from files that print things out. +# 3. kubectl prints the output to stderr (the output should be captured and then +# logged) + + + +# global config +KUBECTL=${TEST_KUBECTL:-/usr/local/bin/kubectl} # substitute for tests +NUM_TRIES_FOR_CREATE=${TEST_NUM_TRIES:-100} +DELAY_AFTER_CREATE_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} +NUM_TRIES_FOR_STOP=${TEST_NUM_TRIES:-100} +DELAY_AFTER_STOP_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} + +if [[ ! -x ${KUBECTL} ]]; then + echo "ERROR: kubectl command (${KUBECTL}) not found or is not executable" 1>&2 + exit 1 +fi + + +# remember that you can't log from functions that print some output (because +# logs are also printed on stdout) +# $1 level +# $2 message +function log() { + # manage log levels manually here + + # add the timestamp if you find it useful + case $1 in + DB3 ) +# echo "$1: $2" + ;; + DB2 ) +# echo "$1: $2" + ;; + DBG ) +# echo "$1: $2" + ;; + INFO ) + echo "$1: $2" + ;; + WRN ) + echo "$1: $2" + ;; + ERR ) + echo "$1: $2" + ;; + * ) + echo "INVALID_LOG_LEVEL $1: $2" + ;; + esac +} + +#$1 yaml file path +function get-object-kind-from-file() { + # prints to stdout, so log cannot be used + #WARNING: only yaml is supported + cat $1 | python -c ''' +try: + import pipes,sys,yaml + y = yaml.load(sys.stdin) + labels = y["metadata"]["labels"] + if ("kubernetes.io/cluster-service", "true") not in labels.iteritems(): + # all add-ons must have the label "kubernetes.io/cluster-service". + # Otherwise we are ignoring them (the update will not work anyway) + print "ERROR" + else: + print y["kind"] +except Exception, ex: + print "ERROR" + ''' +} + +# $1 yaml file path +function get-object-name-from-file() { + # prints to stdout, so log cannot be used + #WARNING: only yaml is supported + cat $1 | python -c ''' +try: + import pipes,sys,yaml + y = yaml.load(sys.stdin) + labels = y["metadata"]["labels"] + if ("kubernetes.io/cluster-service", "true") not in labels.iteritems(): + # all add-ons must have the label "kubernetes.io/cluster-service". + # Otherwise we are ignoring them (the update will not work anyway) + print "ERROR" + else: + print y["metadata"]["name"] +except Exception, ex: + print "ERROR" + ''' +} + +# $1 addon directory path +# $2 addon type (e.g. ReplicationController) +# echoes the string with paths to files containing addon for the given type +# works only for yaml files (!) (ignores json files) +function get-addons-from-disk() { + # prints to stdout, so log cannot be used + local -r addon_dir=$1 + local -r obj_type=$2 + local kind + local file_path + for file_path in $(find ${addon_dir} -name \*.yaml); do + kind=$(get-object-kind-from-file ${file_path}) + # WARNING: assumption that the topmost indentation is zero (I'm not sure yaml allows for topmost indentation) + if [[ "${kind}" == "${obj_type}" ]]; then + echo ${file_path} + fi + done +} + +# waits for all subprocesses +# returns 0 if all of them were successful and 1 otherwise +function wait-for-jobs() { + local rv=0 + local pid + for pid in $(jobs -p); do + wait ${pid} || (rv=1; log ERR "error in pid ${pid}") + log DB2 "pid ${pid} completed, current error code: ${rv}" + done + return ${rv} +} + + +function run-until-success() { + local -r command=$1 + local tries=$2 + local -r delay=$3 + local -r command_name=$1 + while [ ${tries} -gt 0 ]; do + log DBG "executing: '$command'" + # let's give the command as an argument to bash -c, so that we can use + # && and || inside the command itself + /bin/bash -c "${command}" && \ + log DB3 "== Successfully executed ${command_name} at $(date -Is) ==" && \ + return 0 + let tries=tries-1 + log INFO "== Failed to execute ${command_name} at $(date -Is). ${tries} tries remaining. ==" + sleep ${delay} + done + return 1 +} + +# $1 object type +function get-addons-from-server() { + local -r obj_type=$1 + "${KUBECTL}" get "${obj_type}" -o template -t "{{range.items}}{{.metadata.name}} {{end}}" --api-version=v1beta3 -l kubernetes.io/cluster-service=true +} + +# returns the characters after the last separator (including) +# If the separator is empty or if it doesn't appear in the string, +# an empty string is printed +# $1 input string +# $2 separator (must be single character, or empty) +function get-suffix() { + # prints to stdout, so log cannot be used + local -r input_string=$1 + local -r separator=$2 + local suffix + + if [[ "${separator}" == "" ]]; then + echo "" + return + fi + + if [[ "${input_string}" == *"${separator}"* ]]; then + suffix=$(echo "${input_string}" | rev | cut -d "${separator}" -f1 | rev) + echo "${separator}${suffix}" + else + echo "" + fi +} + +# returns the characters up to the last '-' (without it) +# $1 input string +# $2 separator +function get-basename() { + # prints to stdout, so log cannot be used + local -r input_string=$1 + local -r separator=$2 + local suffix + suffix="$(get-suffix ${input_string} ${separator})" + # this will strip the suffix (if matches) + echo ${input_string%$suffix} +} + +function stop-object() { + local -r obj_type=$1 + local -r obj_name=$2 + log INFO "Stopping ${obj_type} ${obj_name}" + run-until-success "${KUBECTL} stop ${obj_type} ${obj_name}" ${NUM_TRIES_FOR_STOP} ${DELAY_AFTER_STOP_ERROR_SEC} +} + +function create-object() { + local -r obj_type=$1 + local -r file_path=$2 + log INFO "Creating new ${obj_type} from file ${file_path}" + run-until-success "${KUBECTL} create -f ${file_path}" ${NUM_TRIES_FOR_CREATE} ${DELAY_AFTER_CREATE_ERROR_SEC} +} + +function update-object() { + local -r obj_type=$1 + local -r obj_name=$2 + local -r file_path=$3 + log INFO "updating the ${obj_type} ${obj_name} with the new definition ${file_path}" + stop-object ${obj_type} ${obj_name} + create-object ${obj_type} ${file_path} +} + +# deletes the objects from the server +# $1 object type +# $2 a list of object names +function stop-objects() { + local -r obj_type=$1 + local -r obj_names=$2 + local obj_name + for obj_name in ${obj_names}; do + stop-object ${obj_type} ${obj_names} & + done +} + +# creates objects from the given files +# $1 object type +# $2 a list of paths to definition files +function create-objects() { + local -r obj_type=$1 + local -r file_paths=$2 + local file_path + for file_path in ${file_paths}; do + create-object ${obj_type} ${file_path} & + done +} + +# updates objects +# $1 object type +# $2 a list of update specifications +# each update specification is a ';' separated pair: ; +function update-objects() { + local -r obj_type=$1 # ignored + local -r update_spec=$2 + local objdesc + for objdesc in ${update_spec}; do + IFS=';' read -a array <<< ${objdesc} + update-object ${obj_type} ${array[0]} ${array[1]} & + done +} + +# Global variables set by function match-objects. +for_delete="" # a list of object names to be deleted +for_update="" # a list of pairs ; for objects that should be updated +for_ignore="" # a list of object nanes that can be ignored +new_files="" # a list of file paths that weren't matched by any existing objects (these objects must be created now) + + +# $1 path to files with objects +# $2 object type in the API (ReplicationController or Service) +# $3 name separator (single character or empty) +function match-objects() { + local -r addon_dir=$1 + local -r obj_type=$2 + local -r separator=$3 + + # output variables (globals) + for_delete="" + for_update="" + for_ignore="" + new_files="" + + addon_names_on_server=$(get-addons-from-server "${obj_type}") + addon_paths_in_files=$(get-addons-from-disk "${addon_dir}" "${obj_type}") + + log DB2 "addon_names_on_server=${addon_names_on_server}" + log DB2 "addon_paths_in_files=${addon_paths_in_files}" + + local matched_files="" + + local basename_on_server="" + local name_on_server="" + local suffix_on_server="" + local name_from_file="" + local suffix_from_file="" + local found=0 + local addon_path="" + + for name_on_server in ${addon_names_on_server}; do + basename_on_server=$(get-basename ${name_on_server} ${separator}) + suffix_on_server="$(get-suffix ${name_on_server} ${separator})" + + log DB3 "Found existing addon ${name_on_server}, basename=${basename_on_server}" + + # check if the addon is present in the directory and decide + # what to do with it + # this is not optimal because we're reading the files over and over + # again. But for small number of addons it doesn't matter so much. + found=0 + for addon_path in ${addon_paths_in_files}; do + name_from_file=$(get-object-name-from-file ${addon_path}) + if [[ "${name_from_file}" == "ERROR" ]]; then + log INFO "Cannot read object name from ${addon_path}. Ignoring" + continue + else + log DB2 "Found object name '${name_from_file}' in file ${addon_path}" + fi + suffix_from_file="$(get-suffix ${name_from_file} ${separator})" + + log DB3 "matching: ${basename_on_server}${suffix_from_file} == ${name_from_file}" + if [[ "${basename_on_server}${suffix_from_file}" == "${name_from_file}" ]]; then + log DB3 "matched existing ${obj_type} ${name_on_server} to file ${addon_path}; suffix_on_server=${suffix_on_server}, suffix_from_file=${suffix_from_file}" + found=1 + matched_files="${matched_files} ${addon_path}" + if [[ "${suffix_on_server}" == "${suffix_from_file}" ]]; then + for_ignore="${for_ignore} ${name_from_file}" + else + for_update="${for_update} ${name_on_server};${addon_path}" + fi + break + fi + done + if [[ ${found} -eq 0 ]]; then + log DB2 "No definition file found for replication controller ${name_on_server}. Scheduling for deletion" + for_delete="${for_delete} ${name_on_server}" + fi + done + + log DB3 "matched_files=${matched_files}" + + for addon_path in ${addon_paths_in_files}; do + echo ${matched_files} | grep "${addon_path}" >/dev/null + if [[ $? -ne 0 ]]; then + new_files="${new_files} ${addon_path}" + fi + done +} + + + +function reconcile-objects() { + local -r addon_path=$1 + local -r obj_type=$2 + local -r separator=$3 # name separator + match-objects ${addon_path} ${obj_type} ${separator} + + log DBG "${obj_type}: for_delete=${for_delete}" + log DBG "${obj_type}: for_update=${for_update}" + log DBG "${obj_type}: for_ignore=${for_ignore}" + log DBG "${obj_type}: new_files=${new_files}" + + stop-objects "${obj_type}" "${for_delete}" + # wait for jobs below is a protection against changing the basename + # of a replication controllerm without changing the selector. + # If we don't wait, the new rc may be created before the old one is deleted + # In such case the old one will wait for all its pods to be gone, but the pods + # are created by the new replication controller. + # passing --cascade=false could solve the problem, but we want + # all orphan pods to be deleted. + wait-for-jobs + stopResult=$? + + create-objects "${obj_type}" "${new_files}" + update-objects "${obj_type}" "${for_update}" + + local obj + for obj in ${for_ignore}; do + log DB2 "The ${obj_type} ${obj} is already up to date" + done + + wait-for-jobs + createUpdateResult=$? + + if [[ ${stopResult} -eq 0 ]] && [[ ${createUpdateResult} -eq 0 ]]; then + return 0 + else + return 1 + fi +} + +function update-addons() { + local -r addon_path=$1 + # be careful, reconcile-objects uses global variables + reconcile-objects ${addon_path} ReplicationController "-" & + + # We don't expect service names to be versioned, so + # we match entire name, ignoring version suffix. + # That's why we pass an empty string as the version separator. + # If the service description differs on disk, the service should be recreated. + # This is not implemented in this version. + reconcile-objects ${addon_path} Service "" & + + wait-for-jobs + if [[ $? -eq 0 ]]; then + log INFO "== Kubernetes addon update completed successfully at $(date -Is) ==" + else + log WRN "== Kubernetes addon update completed with errors at $(date -Is) ==" + fi +} + +if [[ $# -ne 1 ]]; then + echo "Illegal number of parameters" 1>&2 + exit 1 +fi + +addon_path=$1 +update-addons ${addon_path} + diff --git a/cluster/saltbase/salt/kube-addons/kube-addons.sh b/cluster/saltbase/salt/kube-addons/kube-addons.sh index 11a306fd7e..bdfac45729 100644 --- a/cluster/saltbase/salt/kube-addons/kube-addons.sh +++ b/cluster/saltbase/salt/kube-addons/kube-addons.sh @@ -81,7 +81,7 @@ function create-resource-from-string() { local -r config_string=$1; local tries=$2; local -r delay=$3; - local -r config_name=$1; + local -r config_name=$4; while [ ${tries} -gt 0 ]; do echo "${config_string}" | ${KUBECTL} create -f - && \ echo "== Successfully started ${config_name} at $(date -Is)" && \ @@ -104,6 +104,7 @@ echo "== Kubernetes addon manager started at $(date -Is) ==" # at the same time as the services that use them. # NOTE: needs to run as root to read this file. # Read each line in the csv file of tokens. +# Expect errors when the script is started again. while read line; do # Split each line into the token and username. IFS=',' read -a parts <<< "${line}" @@ -120,22 +121,13 @@ for obj in $(find /etc/kubernetes/admission-controls \( -name \*.yaml -o -name \ echo "++ obj ${obj} is created ++" done -for obj in $(find /etc/kubernetes/addons \( -name \*.yaml -o -name \*.json \)); do - start_addon ${obj} 100 10 & - echo "++ addon ${obj} starting in pid $! ++" +# Check if the configuration has changed recently - in case the user +# created/updated/deleted the files on the master. +while true; do + #kube-addon-update.sh must be deployed in the same directory as this file + `dirname $0`/kube-addon-update.sh /etc/kubernetes/addons + sleep 600 done -noerrors="true" -for pid in $(jobs -p); do - wait ${pid} || noerrors="false" - echo "++ pid ${pid} complete ++" -done -if [ ${noerrors} == "true" ]; then - echo "== Kubernetes addon manager completed successfully at $(date -Is) ==" -else - echo "== Kubernetes addon manager completed with errors at $(date -Is) ==" -fi -# We stay around so that status checks by salt make it look like -# the service is good. (We could do this is other ways, but this -# is simple.) -sleep infinity + + diff --git a/cluster/saltbase/salt/kube-master-addons/init.sls b/cluster/saltbase/salt/kube-master-addons/init.sls index 0b3bfcf35c..9118605220 100644 --- a/cluster/saltbase/salt/kube-master-addons/init.sls +++ b/cluster/saltbase/salt/kube-master-addons/init.sls @@ -25,6 +25,14 @@ {% endif %} # Used to restart kube-master-addons service each time salt is run +# Actually, it doens't work (the service is not restarted), +# but master-addon service always terminates after it does it job, +# so it is (usually) not running and it will be started when +# salt is run. +# This salt state is not removed because there is a risk +# of introducing regression in 1.0. Please remove it afterwards. +# See also the salt config for kube-addons to see how to restart +# a service on demand. master-docker-image-tags: file.touch: - name: /srv/pillar/docker-images.sls diff --git a/cluster/saltbase/salt/kube-master-addons/initd b/cluster/saltbase/salt/kube-master-addons/initd index ac1d87eb28..6fef265a93 100644 --- a/cluster/saltbase/salt/kube-master-addons/initd +++ b/cluster/saltbase/salt/kube-master-addons/initd @@ -67,12 +67,7 @@ case "$1" in esac ;; status) - if [ ! -e ${PIDFILE} ]; then - exit 1 - fi - pid=$(cat ${PIDFILE}) - # Checks that ${pid} is running AND is us. - ps --no-headers ${pid} | grep ${SCRIPTNAME} > /dev/null || exit $? + status_of_proc -p $PIDFILE $KUBE_MASTER_ADDONS_SH $NAME ;; restart|force-reload) diff --git a/cluster/saltbase/salt/kube-master-addons/kube-master-addons.sh b/cluster/saltbase/salt/kube-master-addons/kube-master-addons.sh index c220053683..98419b7c61 100644 --- a/cluster/saltbase/salt/kube-master-addons/kube-master-addons.sh +++ b/cluster/saltbase/salt/kube-master-addons/kube-master-addons.sh @@ -38,3 +38,5 @@ while true; do done; +# Now exit. After kube-push, salt will notice that the service is down and it +# will start it and new docker images will be loaded. diff --git a/cluster/saltbase/salt/logrotate/init.sls b/cluster/saltbase/salt/logrotate/init.sls index eba783e7f3..43a528a58d 100644 --- a/cluster/saltbase/salt/logrotate/init.sls +++ b/cluster/saltbase/salt/logrotate/init.sls @@ -2,7 +2,7 @@ logrotate: pkg: - installed -{% set logrotate_files = ['kube-scheduler', 'kube-proxy', 'kubelet', 'kube-apiserver', 'kube-controller-manager'] %} +{% set logrotate_files = ['kube-scheduler', 'kube-proxy', 'kubelet', 'kube-apiserver', 'kube-controller-manager', 'kube-addons'] %} {% for file in logrotate_files %} /etc/logrotate.d/{{ file }}: file: diff --git a/cluster/saltbase/salt/monit/init.sls b/cluster/saltbase/salt/monit/init.sls index 525d65bcf2..9a189e4e8d 100644 --- a/cluster/saltbase/salt/monit/init.sls +++ b/cluster/saltbase/salt/monit/init.sls @@ -30,6 +30,14 @@ monit: - mode: 644 {% endif %} +/etc/monit/conf.d/kube-addons: + file: + - managed + - source: salt://monit/kube-addons + - user: root + - group: root + - mode: 644 + /etc/monit/monit_watcher.sh: file.managed: - source: salt://monit/monit_watcher.sh diff --git a/cluster/saltbase/salt/monit/kube-addons b/cluster/saltbase/salt/monit/kube-addons new file mode 100644 index 0000000000..0744594ab7 --- /dev/null +++ b/cluster/saltbase/salt/monit/kube-addons @@ -0,0 +1,5 @@ +check process kube-addons with pidfile /var/run/kube-addons.pid +group kube-addons +start program = "/etc/init.d/kube-addons start" +stop program = "/etc/init.d/kube-addons stop" +if does not exist then restart diff --git a/test/e2e/monitoring.go b/test/e2e/monitoring.go index 6fd2880ccb..4561643944 100644 --- a/test/e2e/monitoring.go +++ b/test/e2e/monitoring.go @@ -86,7 +86,7 @@ func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error return nil, err } if len(rcList.Items) != 1 { - return nil, fmt.Errorf("expected to find one replicat for RC with label %s but got %d", + return nil, fmt.Errorf("expected to find one replica for RC with label %s but got %d", rcLabel, len(rcList.Items)) } for _, rc := range rcList.Items {