Merge pull request #9128 from marekbiskup/addonUpgrade

Addon upgrade (partial implementation of #8107)
pull/6/head
krousey 2015-06-08 15:15:27 -07:00
commit 8a9e0e00bf
13 changed files with 557 additions and 32 deletions

45
cluster/addons/README.md Normal file
View File

@ -0,0 +1,45 @@
# Cluster add-ons
Cluster add-ons are Services and Replication Controllers (with pods) that are
shipped with the kubernetes binaries and whose update policy is also consistent
with the update of kubernetes cluster.
On the clusterm the addons are kept in ```/etc/kubernetes/addons``` on the master node, in yaml files
(json is not supported at the moment).
Each add-on must specify the following label: ````kubernetes.io/cluster-service: true````.
Yaml files that do not define this label will be ignored.
The naming convention for Replication Controllers is
```<basename>-<version>```, where ```<basename>``` is the same in consecutive
versions and ```<version>``` changes when the component is updated
(```<version>``` must not contain ```-```). For instance,
```heapseter-controller-v1``` and ```heapster-controller-12``` are the
same controllers with two different versions, while ```heapseter-controller-v1```
and ```heapster-newcontroller-12``` are treated as two different applications.
For services it is just ```<basename>``` (with empty version number)
because we do not expect the service
name to change in consecutive versions. The naming convetion is important for add-on update.
# Add-on update
To update add-ons, just update the contents of ```/etc/kubernetes/addons```
directory with the desired definition of add-ons. Then the system will take care
of:
1. Removing the objects from the API server whose manifest was removed.
1. This is done for add-ons in the system that do not have a manifest file with the
same basename
1. Creating objects from new manifests
1. This is done for manifests that do not correspond to existing API objects
with the same basename
1. Updating objects whose basename is the samem but whose versions changed.
1. The update is currently performed by removing the old object and creating
the new one. In the future, rolling update of replication controllers will
be implemented to keep the add-on services up and running during update of add-on
pods.
1. Note that this cannot happen for Services as their version is always empty.
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/cluster/addons/README.md?pixel)]()

View File

@ -5,6 +5,8 @@ metadata:
namespace: default
labels:
k8s-app: influxGrafana
kubernetes.io/cluster-service: "true"
kubernetes.io/name: "influxGrafana"
spec:
ports:
- name: http

View File

@ -1,3 +1,16 @@
addon-dir-delete:
file.absent:
- name: /etc/kubernetes/addons
addon-dir-create:
file.directory:
- name: /etc/kubernetes/addons
- user: root
- group: root
- mode: 0755
- require:
- file: addon-dir-delete
{% if pillar.get('enable_cluster_monitoring', '').lower() == 'influxdb' %}
/etc/kubernetes/addons/cluster-monitoring/influxdb:
file.recurse:
@ -58,6 +71,13 @@
- group: root
- mode: 755
/etc/kubernetes/kube-addon-update.sh:
file.managed:
- source: salt://kube-addons/kube-addon-update.sh
- user: root
- group: root
- mode: 755
{% if grains['os_family'] == 'RedHat' %}
/usr/lib/systemd/system/kube-addons.service:
@ -77,6 +97,16 @@
{% endif %}
# Stop kube-addons service each time salt is executed, just in case
# there was a modification of addons.
# Actually, this should be handled by watching file changes, but
# somehow it doesn't work.
service-kube-addon-stop:
service.dead:
- name: kube-addons
kube-addons:
service.running:
- enable: True
- require:
- service: service-kube-addon-stop

View File

@ -67,12 +67,7 @@ case "$1" in
esac
;;
status)
if [ ! -e ${PIDFILE} ]; then
exit 1
fi
pid=$(cat ${PIDFILE})
# Checks that ${pid} is running AND is us.
ps --no-headers ${pid} | grep ${SCRIPTNAME} > /dev/null || exit $?
status_of_proc -p $PIDFILE $KUBE_ADDONS_SH $NAME
;;
restart|force-reload)

View File

@ -0,0 +1,443 @@
#!/bin/bash
# Copyright 2015 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The business logic for whether a given object should be created
# was already enforced by salt, and /etc/kubernetes/addons is the
# managed result is of that. Start everything below that directory.
# Parameters
# $1 path to add-ons
# LIMITATIONS
# 1. controllers are not updated unless their name is changed
# 3. Services will not be updated unless their name is changed,
# but for services we acually want updates without name change.
# 4. Json files are not handled at all. Currently addons must be
# in yaml files
# 5. exit code is probably not always correct (I haven't checked
# carefully if it works in 100% cases)
# 6. There are no unittests
# 8. Will not work if the total length of paths to addons is greater than
# bash can handle. Probably it is not a problem: ARG_MAX=2097152 on GCE.
# 9. Performance issue: yaml files are read many times in a single execution.
# cosmetic improvements to be done
# 1. improve the log function; add timestamp, file name, etc.
# 2. logging doesn't work from files that print things out.
# 3. kubectl prints the output to stderr (the output should be captured and then
# logged)
# global config
KUBECTL=${TEST_KUBECTL:-/usr/local/bin/kubectl} # substitute for tests
NUM_TRIES_FOR_CREATE=${TEST_NUM_TRIES:-100}
DELAY_AFTER_CREATE_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10}
NUM_TRIES_FOR_STOP=${TEST_NUM_TRIES:-100}
DELAY_AFTER_STOP_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10}
if [[ ! -x ${KUBECTL} ]]; then
echo "ERROR: kubectl command (${KUBECTL}) not found or is not executable" 1>&2
exit 1
fi
# remember that you can't log from functions that print some output (because
# logs are also printed on stdout)
# $1 level
# $2 message
function log() {
# manage log levels manually here
# add the timestamp if you find it useful
case $1 in
DB3 )
# echo "$1: $2"
;;
DB2 )
# echo "$1: $2"
;;
DBG )
# echo "$1: $2"
;;
INFO )
echo "$1: $2"
;;
WRN )
echo "$1: $2"
;;
ERR )
echo "$1: $2"
;;
* )
echo "INVALID_LOG_LEVEL $1: $2"
;;
esac
}
#$1 yaml file path
function get-object-kind-from-file() {
# prints to stdout, so log cannot be used
#WARNING: only yaml is supported
cat $1 | python -c '''
try:
import pipes,sys,yaml
y = yaml.load(sys.stdin)
labels = y["metadata"]["labels"]
if ("kubernetes.io/cluster-service", "true") not in labels.iteritems():
# all add-ons must have the label "kubernetes.io/cluster-service".
# Otherwise we are ignoring them (the update will not work anyway)
print "ERROR"
else:
print y["kind"]
except Exception, ex:
print "ERROR"
'''
}
# $1 yaml file path
function get-object-name-from-file() {
# prints to stdout, so log cannot be used
#WARNING: only yaml is supported
cat $1 | python -c '''
try:
import pipes,sys,yaml
y = yaml.load(sys.stdin)
labels = y["metadata"]["labels"]
if ("kubernetes.io/cluster-service", "true") not in labels.iteritems():
# all add-ons must have the label "kubernetes.io/cluster-service".
# Otherwise we are ignoring them (the update will not work anyway)
print "ERROR"
else:
print y["metadata"]["name"]
except Exception, ex:
print "ERROR"
'''
}
# $1 addon directory path
# $2 addon type (e.g. ReplicationController)
# echoes the string with paths to files containing addon for the given type
# works only for yaml files (!) (ignores json files)
function get-addons-from-disk() {
# prints to stdout, so log cannot be used
local -r addon_dir=$1
local -r obj_type=$2
local kind
local file_path
for file_path in $(find ${addon_dir} -name \*.yaml); do
kind=$(get-object-kind-from-file ${file_path})
# WARNING: assumption that the topmost indentation is zero (I'm not sure yaml allows for topmost indentation)
if [[ "${kind}" == "${obj_type}" ]]; then
echo ${file_path}
fi
done
}
# waits for all subprocesses
# returns 0 if all of them were successful and 1 otherwise
function wait-for-jobs() {
local rv=0
local pid
for pid in $(jobs -p); do
wait ${pid} || (rv=1; log ERR "error in pid ${pid}")
log DB2 "pid ${pid} completed, current error code: ${rv}"
done
return ${rv}
}
function run-until-success() {
local -r command=$1
local tries=$2
local -r delay=$3
local -r command_name=$1
while [ ${tries} -gt 0 ]; do
log DBG "executing: '$command'"
# let's give the command as an argument to bash -c, so that we can use
# && and || inside the command itself
/bin/bash -c "${command}" && \
log DB3 "== Successfully executed ${command_name} at $(date -Is) ==" && \
return 0
let tries=tries-1
log INFO "== Failed to execute ${command_name} at $(date -Is). ${tries} tries remaining. =="
sleep ${delay}
done
return 1
}
# $1 object type
function get-addons-from-server() {
local -r obj_type=$1
"${KUBECTL}" get "${obj_type}" -o template -t "{{range.items}}{{.metadata.name}} {{end}}" --api-version=v1beta3 -l kubernetes.io/cluster-service=true
}
# returns the characters after the last separator (including)
# If the separator is empty or if it doesn't appear in the string,
# an empty string is printed
# $1 input string
# $2 separator (must be single character, or empty)
function get-suffix() {
# prints to stdout, so log cannot be used
local -r input_string=$1
local -r separator=$2
local suffix
if [[ "${separator}" == "" ]]; then
echo ""
return
fi
if [[ "${input_string}" == *"${separator}"* ]]; then
suffix=$(echo "${input_string}" | rev | cut -d "${separator}" -f1 | rev)
echo "${separator}${suffix}"
else
echo ""
fi
}
# returns the characters up to the last '-' (without it)
# $1 input string
# $2 separator
function get-basename() {
# prints to stdout, so log cannot be used
local -r input_string=$1
local -r separator=$2
local suffix
suffix="$(get-suffix ${input_string} ${separator})"
# this will strip the suffix (if matches)
echo ${input_string%$suffix}
}
function stop-object() {
local -r obj_type=$1
local -r obj_name=$2
log INFO "Stopping ${obj_type} ${obj_name}"
run-until-success "${KUBECTL} stop ${obj_type} ${obj_name}" ${NUM_TRIES_FOR_STOP} ${DELAY_AFTER_STOP_ERROR_SEC}
}
function create-object() {
local -r obj_type=$1
local -r file_path=$2
log INFO "Creating new ${obj_type} from file ${file_path}"
run-until-success "${KUBECTL} create -f ${file_path}" ${NUM_TRIES_FOR_CREATE} ${DELAY_AFTER_CREATE_ERROR_SEC}
}
function update-object() {
local -r obj_type=$1
local -r obj_name=$2
local -r file_path=$3
log INFO "updating the ${obj_type} ${obj_name} with the new definition ${file_path}"
stop-object ${obj_type} ${obj_name}
create-object ${obj_type} ${file_path}
}
# deletes the objects from the server
# $1 object type
# $2 a list of object names
function stop-objects() {
local -r obj_type=$1
local -r obj_names=$2
local obj_name
for obj_name in ${obj_names}; do
stop-object ${obj_type} ${obj_names} &
done
}
# creates objects from the given files
# $1 object type
# $2 a list of paths to definition files
function create-objects() {
local -r obj_type=$1
local -r file_paths=$2
local file_path
for file_path in ${file_paths}; do
create-object ${obj_type} ${file_path} &
done
}
# updates objects
# $1 object type
# $2 a list of update specifications
# each update specification is a ';' separated pair: <object name>;<file path>
function update-objects() {
local -r obj_type=$1 # ignored
local -r update_spec=$2
local objdesc
for objdesc in ${update_spec}; do
IFS=';' read -a array <<< ${objdesc}
update-object ${obj_type} ${array[0]} ${array[1]} &
done
}
# Global variables set by function match-objects.
for_delete="" # a list of object names to be deleted
for_update="" # a list of pairs <obj_name>;<filePath> for objects that should be updated
for_ignore="" # a list of object nanes that can be ignored
new_files="" # a list of file paths that weren't matched by any existing objects (these objects must be created now)
# $1 path to files with objects
# $2 object type in the API (ReplicationController or Service)
# $3 name separator (single character or empty)
function match-objects() {
local -r addon_dir=$1
local -r obj_type=$2
local -r separator=$3
# output variables (globals)
for_delete=""
for_update=""
for_ignore=""
new_files=""
addon_names_on_server=$(get-addons-from-server "${obj_type}")
addon_paths_in_files=$(get-addons-from-disk "${addon_dir}" "${obj_type}")
log DB2 "addon_names_on_server=${addon_names_on_server}"
log DB2 "addon_paths_in_files=${addon_paths_in_files}"
local matched_files=""
local basename_on_server=""
local name_on_server=""
local suffix_on_server=""
local name_from_file=""
local suffix_from_file=""
local found=0
local addon_path=""
for name_on_server in ${addon_names_on_server}; do
basename_on_server=$(get-basename ${name_on_server} ${separator})
suffix_on_server="$(get-suffix ${name_on_server} ${separator})"
log DB3 "Found existing addon ${name_on_server}, basename=${basename_on_server}"
# check if the addon is present in the directory and decide
# what to do with it
# this is not optimal because we're reading the files over and over
# again. But for small number of addons it doesn't matter so much.
found=0
for addon_path in ${addon_paths_in_files}; do
name_from_file=$(get-object-name-from-file ${addon_path})
if [[ "${name_from_file}" == "ERROR" ]]; then
log INFO "Cannot read object name from ${addon_path}. Ignoring"
continue
else
log DB2 "Found object name '${name_from_file}' in file ${addon_path}"
fi
suffix_from_file="$(get-suffix ${name_from_file} ${separator})"
log DB3 "matching: ${basename_on_server}${suffix_from_file} == ${name_from_file}"
if [[ "${basename_on_server}${suffix_from_file}" == "${name_from_file}" ]]; then
log DB3 "matched existing ${obj_type} ${name_on_server} to file ${addon_path}; suffix_on_server=${suffix_on_server}, suffix_from_file=${suffix_from_file}"
found=1
matched_files="${matched_files} ${addon_path}"
if [[ "${suffix_on_server}" == "${suffix_from_file}" ]]; then
for_ignore="${for_ignore} ${name_from_file}"
else
for_update="${for_update} ${name_on_server};${addon_path}"
fi
break
fi
done
if [[ ${found} -eq 0 ]]; then
log DB2 "No definition file found for replication controller ${name_on_server}. Scheduling for deletion"
for_delete="${for_delete} ${name_on_server}"
fi
done
log DB3 "matched_files=${matched_files}"
for addon_path in ${addon_paths_in_files}; do
echo ${matched_files} | grep "${addon_path}" >/dev/null
if [[ $? -ne 0 ]]; then
new_files="${new_files} ${addon_path}"
fi
done
}
function reconcile-objects() {
local -r addon_path=$1
local -r obj_type=$2
local -r separator=$3 # name separator
match-objects ${addon_path} ${obj_type} ${separator}
log DBG "${obj_type}: for_delete=${for_delete}"
log DBG "${obj_type}: for_update=${for_update}"
log DBG "${obj_type}: for_ignore=${for_ignore}"
log DBG "${obj_type}: new_files=${new_files}"
stop-objects "${obj_type}" "${for_delete}"
# wait for jobs below is a protection against changing the basename
# of a replication controllerm without changing the selector.
# If we don't wait, the new rc may be created before the old one is deleted
# In such case the old one will wait for all its pods to be gone, but the pods
# are created by the new replication controller.
# passing --cascade=false could solve the problem, but we want
# all orphan pods to be deleted.
wait-for-jobs
stopResult=$?
create-objects "${obj_type}" "${new_files}"
update-objects "${obj_type}" "${for_update}"
local obj
for obj in ${for_ignore}; do
log DB2 "The ${obj_type} ${obj} is already up to date"
done
wait-for-jobs
createUpdateResult=$?
if [[ ${stopResult} -eq 0 ]] && [[ ${createUpdateResult} -eq 0 ]]; then
return 0
else
return 1
fi
}
function update-addons() {
local -r addon_path=$1
# be careful, reconcile-objects uses global variables
reconcile-objects ${addon_path} ReplicationController "-" &
# We don't expect service names to be versioned, so
# we match entire name, ignoring version suffix.
# That's why we pass an empty string as the version separator.
# If the service description differs on disk, the service should be recreated.
# This is not implemented in this version.
reconcile-objects ${addon_path} Service "" &
wait-for-jobs
if [[ $? -eq 0 ]]; then
log INFO "== Kubernetes addon update completed successfully at $(date -Is) =="
else
log WRN "== Kubernetes addon update completed with errors at $(date -Is) =="
fi
}
if [[ $# -ne 1 ]]; then
echo "Illegal number of parameters" 1>&2
exit 1
fi
addon_path=$1
update-addons ${addon_path}

View File

@ -81,7 +81,7 @@ function create-resource-from-string() {
local -r config_string=$1;
local tries=$2;
local -r delay=$3;
local -r config_name=$1;
local -r config_name=$4;
while [ ${tries} -gt 0 ]; do
echo "${config_string}" | ${KUBECTL} create -f - && \
echo "== Successfully started ${config_name} at $(date -Is)" && \
@ -104,6 +104,7 @@ echo "== Kubernetes addon manager started at $(date -Is) =="
# at the same time as the services that use them.
# NOTE: needs to run as root to read this file.
# Read each line in the csv file of tokens.
# Expect errors when the script is started again.
while read line; do
# Split each line into the token and username.
IFS=',' read -a parts <<< "${line}"
@ -120,22 +121,13 @@ for obj in $(find /etc/kubernetes/admission-controls \( -name \*.yaml -o -name \
echo "++ obj ${obj} is created ++"
done
for obj in $(find /etc/kubernetes/addons \( -name \*.yaml -o -name \*.json \)); do
start_addon ${obj} 100 10 &
echo "++ addon ${obj} starting in pid $! ++"
# Check if the configuration has changed recently - in case the user
# created/updated/deleted the files on the master.
while true; do
#kube-addon-update.sh must be deployed in the same directory as this file
`dirname $0`/kube-addon-update.sh /etc/kubernetes/addons
sleep 600
done
noerrors="true"
for pid in $(jobs -p); do
wait ${pid} || noerrors="false"
echo "++ pid ${pid} complete ++"
done
if [ ${noerrors} == "true" ]; then
echo "== Kubernetes addon manager completed successfully at $(date -Is) =="
else
echo "== Kubernetes addon manager completed with errors at $(date -Is) =="
fi
# We stay around so that status checks by salt make it look like
# the service is good. (We could do this is other ways, but this
# is simple.)
sleep infinity

View File

@ -25,6 +25,14 @@
{% endif %}
# Used to restart kube-master-addons service each time salt is run
# Actually, it doens't work (the service is not restarted),
# but master-addon service always terminates after it does it job,
# so it is (usually) not running and it will be started when
# salt is run.
# This salt state is not removed because there is a risk
# of introducing regression in 1.0. Please remove it afterwards.
# See also the salt config for kube-addons to see how to restart
# a service on demand.
master-docker-image-tags:
file.touch:
- name: /srv/pillar/docker-images.sls

View File

@ -67,12 +67,7 @@ case "$1" in
esac
;;
status)
if [ ! -e ${PIDFILE} ]; then
exit 1
fi
pid=$(cat ${PIDFILE})
# Checks that ${pid} is running AND is us.
ps --no-headers ${pid} | grep ${SCRIPTNAME} > /dev/null || exit $?
status_of_proc -p $PIDFILE $KUBE_MASTER_ADDONS_SH $NAME
;;
restart|force-reload)

View File

@ -38,3 +38,5 @@ while true; do
done;
# Now exit. After kube-push, salt will notice that the service is down and it
# will start it and new docker images will be loaded.

View File

@ -2,7 +2,7 @@ logrotate:
pkg:
- installed
{% set logrotate_files = ['kube-scheduler', 'kube-proxy', 'kubelet', 'kube-apiserver', 'kube-controller-manager'] %}
{% set logrotate_files = ['kube-scheduler', 'kube-proxy', 'kubelet', 'kube-apiserver', 'kube-controller-manager', 'kube-addons'] %}
{% for file in logrotate_files %}
/etc/logrotate.d/{{ file }}:
file:

View File

@ -30,6 +30,14 @@ monit:
- mode: 644
{% endif %}
/etc/monit/conf.d/kube-addons:
file:
- managed
- source: salt://monit/kube-addons
- user: root
- group: root
- mode: 644
/etc/monit/monit_watcher.sh:
file.managed:
- source: salt://monit/monit_watcher.sh

View File

@ -0,0 +1,5 @@
check process kube-addons with pidfile /var/run/kube-addons.pid
group kube-addons
start program = "/etc/init.d/kube-addons start"
stop program = "/etc/init.d/kube-addons stop"
if does not exist then restart

View File

@ -86,7 +86,7 @@ func verifyExpectedRcsExistAndGetExpectedPods(c *client.Client) ([]string, error
return nil, err
}
if len(rcList.Items) != 1 {
return nil, fmt.Errorf("expected to find one replicat for RC with label %s but got %d",
return nil, fmt.Errorf("expected to find one replica for RC with label %s but got %d",
rcLabel, len(rcList.Items))
}
for _, rc := range rcList.Items {