diff --git a/contrib/ansible/roles/kubernetes-addons/files/kube-addon-update.sh b/contrib/ansible/roles/kubernetes-addons/files/kube-addon-update.sh index 731c632024..93f0469b40 100755 --- a/contrib/ansible/roles/kubernetes-addons/files/kube-addon-update.sh +++ b/contrib/ansible/roles/kubernetes-addons/files/kube-addon-update.sh @@ -47,16 +47,19 @@ KUBECTL=${TEST_KUBECTL:-} # substitute for tests KUBECTL=${KUBECTL:-${KUBECTL_BIN:-}} KUBECTL=${KUBECTL:-/usr/local/bin/kubectl} -NUM_TRIES_FOR_CREATE=${TEST_NUM_TRIES:-100} -DELAY_AFTER_CREATE_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} -NUM_TRIES_FOR_STOP=${TEST_NUM_TRIES:-100} -DELAY_AFTER_STOP_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} - if [[ ! -x ${KUBECTL} ]]; then echo "ERROR: kubectl command (${KUBECTL}) not found or is not executable" 1>&2 exit 1 fi +# If an add-on definition is incorrect, or a definition has just disappeared +# from the local directory, the script will still keep on retrying. +# The script does not end until all retries are done, so +# one invalid manifest may block updates of other add-ons. +# Be careful how you set these parameters +NUM_TRIES=1 # will be updated based on input parameters +DELAY_AFTER_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10} + # remember that you can't log from functions that print some output (because # logs are also printed on stdout) @@ -112,9 +115,11 @@ except Exception, ex: } # $1 yaml file path -function get-object-name-from-file() { +# returns a string of the form / (we call it nsnames) +function get-object-nsname-from-file() { # prints to stdout, so log cannot be used #WARNING: only yaml is supported + #addons that do not specify a namespace are assumed to be in "default". cat $1 | python -c ''' try: import pipes,sys,yaml @@ -125,7 +130,10 @@ try: # Otherwise we are ignoring them (the update will not work anyway) print "ERROR" else: - print y["metadata"]["name"] + try: + print "%s/%s" % (y["metadata"]["namespace"], y["metadata"]["name"]) + except Exception, ex: + print "default/%s" % y["metadata"]["name"] except Exception, ex: print "ERROR" ''' @@ -135,7 +143,7 @@ except Exception, ex: # $2 addon type (e.g. ReplicationController) # echoes the string with paths to files containing addon for the given type # works only for yaml files (!) (ignores json files) -function get-addons-from-disk() { +function get-addon-paths-from-disk() { # prints to stdout, so log cannot be used local -r addon_dir=$1 local -r obj_type=$2 @@ -183,9 +191,10 @@ function run-until-success() { } # $1 object type -function get-addons-from-server() { +# returns a list of / pairs (nsnames) +function get-addon-nsnames-from-server() { local -r obj_type=$1 - "${KUBECTL}" get "${obj_type}" -o template -t "{{range.items}}{{.metadata.name}} {{end}}" --api-version=v1 -l kubernetes.io/cluster-service=true + "${KUBECTL}" get "${obj_type}" --all-namespaces -o template -t "{{range.items}}{{.metadata.namespace}}/{{.metadata.name}} {{end}}" --api-version=v1 -l kubernetes.io/cluster-service=true } # returns the characters after the last separator (including) @@ -227,36 +236,52 @@ function get-basename() { function stop-object() { local -r obj_type=$1 - local -r obj_name=$2 - log INFO "Stopping ${obj_type} ${obj_name}" - run-until-success "${KUBECTL} stop ${obj_type} ${obj_name}" ${NUM_TRIES_FOR_STOP} ${DELAY_AFTER_STOP_ERROR_SEC} + local -r namespace=$2 + local -r obj_name=$3 + log INFO "Stopping ${obj_type} ${namespace}/${obj_name}" + + run-until-success "${KUBECTL} stop --namespace=${namespace} ${obj_type} ${obj_name}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC} } function create-object() { local -r obj_type=$1 local -r file_path=$2 - log INFO "Creating new ${obj_type} from file ${file_path}" - run-until-success "${KUBECTL} create -f ${file_path}" ${NUM_TRIES_FOR_CREATE} ${DELAY_AFTER_CREATE_ERROR_SEC} + + local nsname_from_file + nsname_from_file=$(get-object-nsname-from-file ${file_path}) + if [[ "${nsname_from_file}" == "ERROR" ]]; then + log INFO "Cannot read object name from ${file_path}. Ignoring" + return 1 + fi + IFS='/' read namespace obj_name <<< "${nsname_from_file}" + + log INFO "Creating new ${obj_type} from file ${file_path} in namespace ${namespace}, name: ${obj_name}" + # this will keep on failing if the ${file_path} disappeared in the meantime. + # Do not use too many retries. + run-until-success "${KUBECTL} create --namespace=${namespace} -f ${file_path}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC} } function update-object() { local -r obj_type=$1 - local -r obj_name=$2 - local -r file_path=$3 - log INFO "updating the ${obj_type} ${obj_name} with the new definition ${file_path}" - stop-object ${obj_type} ${obj_name} + local -r namespace=$2 + local -r obj_name=$3 + local -r file_path=$4 + log INFO "updating the ${obj_type} ${namespace}/${obj_name} with the new definition ${file_path}" + stop-object ${obj_type} ${namespace} ${obj_name} create-object ${obj_type} ${file_path} } # deletes the objects from the server # $1 object type -# $2 a list of object names +# $2 a list of object nsnames function stop-objects() { local -r obj_type=$1 - local -r obj_names=$2 + local -r obj_nsnames=$2 + local namespace local obj_name - for obj_name in ${obj_names}; do - stop-object ${obj_type} ${obj_names} & + for nsname in ${obj_nsnames}; do + IFS='/' read namespace obj_name <<< "${nsname}" + stop-object ${obj_type} ${namespace} ${obj_name} & done } @@ -268,6 +293,12 @@ function create-objects() { local -r file_paths=$2 local file_path for file_path in ${file_paths}; do + # Remember that the file may have disappear by now + # But we don't want to check it here because + # such race condition may always happen after + # we check it. Let's have the race + # condition happen a bit more often so that + # we see that our tests pass anyway. create-object ${obj_type} ${file_path} & done } @@ -275,22 +306,28 @@ function create-objects() { # updates objects # $1 object type # $2 a list of update specifications -# each update specification is a ';' separated pair: ; +# each update specification is a ';' separated pair: ; function update-objects() { local -r obj_type=$1 # ignored local -r update_spec=$2 local objdesc + local nsname + local obj_name + local namespace + for objdesc in ${update_spec}; do - IFS=';' read -a array <<< ${objdesc} - update-object ${obj_type} ${array[0]} ${array[1]} & + IFS=';' read nsname file_path <<< "${objdesc}" + IFS='/' read namespace obj_name <<< "${nsname}" + + update-object ${obj_type} ${namespace} ${obj_name} ${file_path} & done } # Global variables set by function match-objects. -for_delete="" # a list of object names to be deleted -for_update="" # a list of pairs ; for objects that should be updated -for_ignore="" # a list of object nanes that can be ignored -new_files="" # a list of file paths that weren't matched by any existing objects (these objects must be created now) +nsnames_for_delete="" # a list of object nsnames to be deleted +for_update="" # a list of pairs ; for objects that should be updated +nsnames_for_ignore="" # a list of object nsnames that will be ignored +new_files="" # a list of file paths that weren't matched by any existing objects (these objects must be created now) # $1 path to files with objects @@ -302,32 +339,36 @@ function match-objects() { local -r separator=$3 # output variables (globals) - for_delete="" + nsnames_for_delete="" for_update="" - for_ignore="" + nsnames_for_ignore="" new_files="" - addon_names_on_server=$(get-addons-from-server "${obj_type}") - addon_paths_in_files=$(get-addons-from-disk "${addon_dir}" "${obj_type}") + addon_nsnames_on_server=$(get-addon-nsnames-from-server "${obj_type}") + addon_paths_in_files=$(get-addon-paths-from-disk "${addon_dir}" "${obj_type}") - log DB2 "addon_names_on_server=${addon_names_on_server}" + log DB2 "addon_nsnames_on_server=${addon_nsnames_on_server}" log DB2 "addon_paths_in_files=${addon_paths_in_files}" local matched_files="" - local basename_on_server="" - local name_on_server="" + local basensname_on_server="" + local nsname_on_server="" local suffix_on_server="" - local name_from_file="" + local nsname_from_file="" local suffix_from_file="" local found=0 local addon_path="" - for name_on_server in ${addon_names_on_server}; do - basename_on_server=$(get-basename ${name_on_server} ${separator}) - suffix_on_server="$(get-suffix ${name_on_server} ${separator})" + # objects that were moved between namespaces will have different nsname + # because the namespace is included. So they will be treated + # like different objects and not updated but deleted and created again + # (in the current version update is also delete+create, so it does not matter) + for nsname_on_server in ${addon_nsnames_on_server}; do + basensname_on_server=$(get-basename ${nsname_on_server} ${separator}) + suffix_on_server="$(get-suffix ${nsname_on_server} ${separator})" - log DB3 "Found existing addon ${name_on_server}, basename=${basename_on_server}" + log DB3 "Found existing addon ${nsname_on_server}, basename=${basensname_on_server}" # check if the addon is present in the directory and decide # what to do with it @@ -335,36 +376,41 @@ function match-objects() { # again. But for small number of addons it doesn't matter so much. found=0 for addon_path in ${addon_paths_in_files}; do - name_from_file=$(get-object-name-from-file ${addon_path}) - if [[ "${name_from_file}" == "ERROR" ]]; then + nsname_from_file=$(get-object-nsname-from-file ${addon_path}) + if [[ "${nsname_from_file}" == "ERROR" ]]; then log INFO "Cannot read object name from ${addon_path}. Ignoring" continue else - log DB2 "Found object name '${name_from_file}' in file ${addon_path}" + log DB2 "Found object name '${nsname_from_file}' in file ${addon_path}" fi - suffix_from_file="$(get-suffix ${name_from_file} ${separator})" + suffix_from_file="$(get-suffix ${nsname_from_file} ${separator})" - log DB3 "matching: ${basename_on_server}${suffix_from_file} == ${name_from_file}" - if [[ "${basename_on_server}${suffix_from_file}" == "${name_from_file}" ]]; then - log DB3 "matched existing ${obj_type} ${name_on_server} to file ${addon_path}; suffix_on_server=${suffix_on_server}, suffix_from_file=${suffix_from_file}" + log DB3 "matching: ${basensname_on_server}${suffix_from_file} == ${nsname_from_file}" + if [[ "${basensname_on_server}${suffix_from_file}" == "${nsname_from_file}" ]]; then + log DB3 "matched existing ${obj_type} ${nsname_on_server} to file ${addon_path}; suffix_on_server=${suffix_on_server}, suffix_from_file=${suffix_from_file}" found=1 matched_files="${matched_files} ${addon_path}" if [[ "${suffix_on_server}" == "${suffix_from_file}" ]]; then - for_ignore="${for_ignore} ${name_from_file}" + nsnames_for_ignore="${nsnames_for_ignore} ${nsname_from_file}" else - for_update="${for_update} ${name_on_server};${addon_path}" + for_update="${for_update} ${nsname_on_server};${addon_path}" fi break fi done if [[ ${found} -eq 0 ]]; then - log DB2 "No definition file found for replication controller ${name_on_server}. Scheduling for deletion" - for_delete="${for_delete} ${name_on_server}" + log DB2 "No definition file found for replication controller ${nsname_on_server}. Scheduling for deletion" + nsnames_for_delete="${nsnames_for_delete} ${nsname_on_server}" fi done log DB3 "matched_files=${matched_files}" + + # note that if the addon file is invalid (or got removed after listing files + # but before we managed to match it) it will not be matched to any + # of the existing objects. So we will treat it as a new file + # and try to create its object. for addon_path in ${addon_paths_in_files}; do echo ${matched_files} | grep "${addon_path}" >/dev/null if [[ $? -ne 0 ]]; then @@ -381,12 +427,12 @@ function reconcile-objects() { local -r separator=$3 # name separator match-objects ${addon_path} ${obj_type} ${separator} - log DBG "${obj_type}: for_delete=${for_delete}" + log DBG "${obj_type}: nsnames_for_delete=${nsnames_for_delete}" log DBG "${obj_type}: for_update=${for_update}" - log DBG "${obj_type}: for_ignore=${for_ignore}" + log DBG "${obj_type}: nsnames_for_ignore=${nsnames_for_ignore}" log DBG "${obj_type}: new_files=${new_files}" - stop-objects "${obj_type}" "${for_delete}" + stop-objects "${obj_type}" "${nsnames_for_delete}" # wait for jobs below is a protection against changing the basename # of a replication controllerm without changing the selector. # If we don't wait, the new rc may be created before the old one is deleted @@ -400,9 +446,9 @@ function reconcile-objects() { create-objects "${obj_type}" "${new_files}" update-objects "${obj_type}" "${for_update}" - local obj - for obj in ${for_ignore}; do - log DB2 "The ${obj_type} ${obj} is already up to date" + local nsname + for nsname in ${nsnames_for_ignore}; do + log DB2 "The ${obj_type} ${nsname} is already up to date" done wait-for-jobs @@ -435,11 +481,21 @@ function update-addons() { fi } -if [[ $# -ne 1 ]]; then - echo "Illegal number of parameters" 1>&2 +# input parameters: +# $1 input directory +# $2 retry period in seconds - the script will retry api-server errors for approximately +# this amound of time (it is not very precise), at interval equal $DELAY_AFTER_ERROR_SEC. +# + +if [[ $# -ne 2 ]]; then + echo "Illegal number of parameters. Usage $0 addon-dir [retry-period]" 1>&2 exit 1 fi +NUM_TRIES=$(($2 / ${DELAY_AFTER_ERROR_SEC})) +if [[ ${NUM_TRIES} -le 0 ]]; then + NUM_TRIES=1 +fi + addon_path=$1 update-addons ${addon_path} - diff --git a/contrib/ansible/roles/kubernetes-addons/files/kube-addons.sh b/contrib/ansible/roles/kubernetes-addons/files/kube-addons.sh index 6781ca73f9..29e6ed9b2e 100644 --- a/contrib/ansible/roles/kubernetes-addons/files/kube-addons.sh +++ b/contrib/ansible/roles/kubernetes-addons/files/kube-addons.sh @@ -21,6 +21,8 @@ KUBECTL=${KUBECTL_BIN:-/usr/local/bin/kubectl} ADDON_CHECK_INTERVAL_SEC=${TEST_ADDON_CHECK_INTERVAL_SEC:-600} +SYSTEM_NAMESPACE=kube-system + token_dir=${TOKEN_DIR:-/srv/kubernetes} function create-kubeconfig-secret() { @@ -49,6 +51,7 @@ contexts: - context: cluster: local user: ${username} + namespace: ${SYSTEM_NAMESPACE} name: service-account-context current-context: service-account-context EOF @@ -69,6 +72,7 @@ contexts: - context: cluster: local user: ${username} + namespace: ${SYSTEM_NAMESPACE} name: service-account-context current-context: service-account-context EOF @@ -84,36 +88,39 @@ metadata: name: token-${safe_username} type: Opaque EOF - create-resource-from-string "${secretyaml}" 100 10 "Secret-for-token-for-user-${username}" & -# TODO: label the secrets with special label so kubectl does not show these? + create-resource-from-string "${secretyaml}" 100 10 "Secret-for-token-for-user-${username}" "${SYSTEM_NAMESPACE}" & } # $1 filename of addon to start. # $2 count of tries to start the addon. # $3 delay in seconds between two consecutive tries +# $4 namespace function start_addon() { local -r addon_filename=$1; local -r tries=$2; local -r delay=$3; + local -r namespace=$4 - create-resource-from-string "$(cat ${addon_filename})" "${tries}" "${delay}" "${addon_filename}" + create-resource-from-string "$(cat ${addon_filename})" "${tries}" "${delay}" "${addon_filename}" "${namespace}" } # $1 string with json or yaml. # $2 count of tries to start the addon. # $3 delay in seconds between two consecutive tries -# $3 name of this object to use when logging about it. +# $4 name of this object to use when logging about it. +# $5 namespace for this object function create-resource-from-string() { local -r config_string=$1; local tries=$2; local -r delay=$3; local -r config_name=$4; + local -r namespace=$5; while [ ${tries} -gt 0 ]; do - echo "${config_string}" | ${KUBECTL} create -f - && \ - echo "== Successfully started ${config_name} at $(date -Is)" && \ + echo "${config_string}" | ${KUBECTL} --namespace="${namespace}" create -f - && \ + echo "== Successfully started ${config_name} in namespace ${namespace} at $(date -Is)" && \ return 0; let tries=tries-1; - echo "== Failed to start ${config_name} at $(date -Is). ${tries} tries remaining. ==" + echo "== Failed to start ${config_name} in namespace ${namespace} at $(date -Is). ${tries} tries remaining. ==" sleep ${delay}; done return 1; @@ -122,7 +129,7 @@ function create-resource-from-string() { # The business logic for whether a given object should be created # was already enforced by salt, and /etc/kubernetes/addons is the # managed result is of that. Start everything below that directory. -echo "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC}==" +echo "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC} ==" # Load the kube-env, which has all the environment variables we care # about, in a flat yaml format. @@ -136,6 +143,18 @@ for k,v in yaml.load(sys.stdin).iteritems(): ''' < "${kube_env_yaml}") fi +# Create the namespace that will be used to host the cluster-level add-ons. +start_addon /etc/kubernetes/addons/namespace.yaml 100 10 "" & + +# Wait for the default service account to be created in the kube-system namespace. +token_found="" +while [ -z "${token_found}" ]; do + sleep .5 + token_found=$(${KUBECTL} get --namespace="${SYSTEM_NAMESPACE}" serviceaccount default -o template -t "{{with index .secrets 0}}{{.name}}{{end}}" || true) +done + +echo "== default service account in the ${SYSTEM_NAMESPACE} namespace has token ${token_found} ==" + # Generate secrets for "internal service accounts". # TODO(etune): move to a completely yaml/object based # workflow so that service accounts can be created @@ -143,6 +162,7 @@ fi # NOTE: needs to run as root to read this file. # Read each line in the csv file of tokens. # Expect errors when the script is started again. +# NOTE: secrets are created asynchronously, in background. while read line; do # Split each line into the token and username. IFS=',' read -a parts <<< "${line}" @@ -154,25 +174,29 @@ while read line; do else # Set the server to https://kubernetes. Pods/components that # do not have DNS available will have to override the server. - create-kubeconfig-secret "${token}" "${username}" "https://kubernetes" + create-kubeconfig-secret "${token}" "${username}" "https://kubernetes.default" fi -done < ${token_dir}/known_tokens.csv +done < "${token_dir}/known_tokens.csv" # Create admission_control objects if defined before any other addon services. If the limits # are defined in a namespace other than default, we should still create the limits for the # default namespace. for obj in $(find /etc/kubernetes/admission-controls \( -name \*.yaml -o -name \*.json \)); do - start_addon ${obj} 100 10 & + start_addon ${obj} 100 10 default & echo "++ obj ${obj} is created ++" done # Check if the configuration has changed recently - in case the user # created/updated/deleted the files on the master. while true; do + start_sec=$(date +"%s") #kube-addon-update.sh must be deployed in the same directory as this file - `dirname $0`/kube-addon-update.sh /etc/kubernetes/addons - sleep $ADDON_CHECK_INTERVAL_SEC + `dirname $0`/kube-addon-update.sh /etc/kubernetes/addons ${ADDON_CHECK_INTERVAL_SEC} + end_sec=$(date +"%s") + len_sec=$((${end_sec}-${start_sec})) + # subtract the time passed from the sleep time + if [[ ${len_sec} -lt ${ADDON_CHECK_INTERVAL_SEC} ]]; then + sleep_time=$((${ADDON_CHECK_INTERVAL_SEC}-${len_sec})) + sleep ${sleep_time} + fi done - - - diff --git a/contrib/ansible/roles/kubernetes-addons/tasks/main.yml b/contrib/ansible/roles/kubernetes-addons/tasks/main.yml index 775edd9669..739f87415e 100644 --- a/contrib/ansible/roles/kubernetes-addons/tasks/main.yml +++ b/contrib/ansible/roles/kubernetes-addons/tasks/main.yml @@ -11,6 +11,12 @@ state=directory sudo: no +- name: Make sure the system services namespace exists + get_url: + url=https://raw.githubusercontent.com/GoogleCloudPlatform/kubernetes/master/cluster/saltbase/salt/kube-addons/namespace.yaml + dest="{{ kube_config_dir }}/addons/" + force=yes + - include: dns.yml when: dns_setup