Merge pull request #63377 from Cynerva/gkk/fix-status-messages

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

juju: Make kubernetes-master status handling more robust

**What this PR does / why we need it**:

This fixes a charm hook failure that can occur after restarting kubernetes-master units in Juju-based Kubernetes deployments.

This should also hopefully make our status handling in kubernetes-master more robust in general, by making sure all of our `blocked`, `waiting`, and `active` status messages are handled in one place, on hook exit.

**Which issue(s) this PR fixes**:

https://github.com/juju-solutions/bundle-canonical-kubernetes/issues/523

**Release note**:

```release-note
NONE
```
pull/8/head
Kubernetes Submit Queue 2018-05-02 23:06:41 -07:00 committed by GitHub
commit 51e97bfa6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 51 additions and 46 deletions

View File

@ -228,13 +228,6 @@ def migrate_from_pre_snaps():
os.remove(file)
@when('kubernetes-master.upgrade-needed')
@when_not('kubernetes-master.upgrade-specified')
def upgrade_needed_status():
msg = 'Needs manual upgrade, run the upgrade action'
hookenv.status_set('blocked', msg)
@when('kubernetes-master.upgrade-specified')
def do_upgrade():
install_snaps()
@ -378,8 +371,6 @@ def get_keys_from_leader(keys, overwrite_local=False):
contents = leader_get(k)
# Default to logging the warning and wait for leader data to be set
if contents is None:
msg = "Waiting on leaders crypto keys."
hookenv.status_set('waiting', msg)
hookenv.log('Missing content for file {}'.format(k))
return False
# Write out the file and move on to the next item
@ -397,24 +388,61 @@ def set_app_version():
hookenv.application_version_set(version.split(b' v')[-1].rstrip())
@when('cdk-addons.configured', 'kube-api-endpoint.available',
'kube-control.connected')
@when_not('kubernetes-master.upgrade-needed')
def idle_status(kube_api, kube_control):
''' Signal at the end of the run that we are running. '''
if not all_kube_system_pods_running():
hookenv.status_set('waiting', 'Waiting for kube-system pods to start')
elif hookenv.config('service-cidr') != service_cidr():
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
hookenv.status_set('active', msg)
else:
@hookenv.atexit
def set_final_status():
''' Set the final status of the charm as we leave hook execution '''
if not is_state('kube-api-endpoint.available'):
hookenv.status_set('blocked', 'Waiting for kube-api-endpoint relation')
return
if not is_state('kube-control.connected'):
hookenv.status_set('blocked', 'Waiting for workers.')
return
upgrade_needed = is_state('kubernetes-master.upgrade-needed')
upgrade_specified = is_state('kubernetes-master.upgrade-specified')
if upgrade_needed and not upgrade_specified:
msg = 'Needs manual upgrade, run the upgrade action'
hookenv.status_set('blocked', msg)
return
if is_state('kubernetes-master.components.started'):
# All services should be up and running at this point. Double-check...
failing_services = master_services_down()
if len(failing_services) == 0:
hookenv.status_set('active', 'Kubernetes master running.')
else:
if len(failing_services) != 0:
msg = 'Stopped services: {}'.format(','.join(failing_services))
hookenv.status_set('blocked', msg)
return
is_leader = is_state('leadership.is_leader')
authentication_setup = is_state('authentication.setup')
if not is_leader and not authentication_setup:
hookenv.status_set('waiting', 'Waiting on leaders crypto keys.')
return
components_started = is_state('kubernetes-master.components.started')
addons_configured = is_state('cdk-addons.configured')
if components_started and not addons_configured:
hookenv.status_set('waiting', 'Waiting to retry addon deployment')
return
if addons_configured and not all_kube_system_pods_running():
hookenv.status_set('waiting', 'Waiting for kube-system pods to start')
return
if hookenv.config('service-cidr') != service_cidr():
msg = 'WARN: cannot change service-cidr, still using ' + service_cidr()
hookenv.status_set('active', msg)
return
gpu_available = is_state('kube-control.gpu.available')
gpu_enabled = is_state('kubernetes-master.gpu.enabled')
if gpu_available and not gpu_enabled:
msg = 'GPUs available. Set allow-privileged="auto" to enable.'
hookenv.status_set('active', msg)
return
hookenv.status_set('active', 'Kubernetes master running.')
def master_services_down():
@ -540,18 +568,6 @@ def create_service_configs(kube_control):
remove_state('authentication.setup')
@when_not('kube-control.connected')
def missing_kube_control():
"""Inform the operator master is waiting for a relation to workers.
If deploying via bundle this won't happen, but if operator is upgrading a
a charm in a deployment that pre-dates the kube-control relation, it'll be
missing.
"""
hookenv.status_set('blocked', 'Waiting for workers.')
@when('kube-api-endpoint.available')
def push_service_data(kube_api):
''' Send configuration to the load balancer, and close access to the
@ -656,7 +672,6 @@ def configure_cdk_addons():
]
check_call(['snap', 'set', 'cdk-addons'] + args)
if not addons_ready():
hookenv.status_set('waiting', 'Waiting to retry addon deployment')
remove_state('cdk-addons.configured')
return
@ -896,10 +911,6 @@ def on_gpu_available(kube_control):
config = hookenv.config()
if (config['allow-privileged'].lower() == "false" and
kube_version < (1, 9)):
hookenv.status_set(
'active',
'GPUs available. Set allow-privileged="auto" to enable.'
)
return
remove_state('kubernetes-master.components.started')
@ -941,24 +952,18 @@ def shutdown():
def restart_apiserver():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-apiserver')
host.service_restart('snap.kube-apiserver.daemon')
hookenv.status_set(prev_state, prev_msg)
def restart_controller_manager():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-controller-manager')
host.service_restart('snap.kube-controller-manager.daemon')
hookenv.status_set(prev_state, prev_msg)
def restart_scheduler():
prev_state, prev_msg = hookenv.status_get()
hookenv.status_set('maintenance', 'Restarting kube-scheduler')
host.service_restart('snap.kube-scheduler.daemon')
hookenv.status_set(prev_state, prev_msg)
def arch():