Merge pull request #51534 from deads2k/scheduler-01-dont-panic

Automatic merge from submit-queue (batch tested with PRs 51574, 51534, 49257, 44680, 48836)

update scheduler to return structured errors instead of process exit

The scheduler Run method returns an error that is properly handled at higher levels.  Instead of existing the process, we should return the error and handle it at higher level logic to allow testing of error conditions and composition of commands.  The changes are relatively minor.

@sjenning @aveshagarwal
pull/6/head
Kubernetes Submit Queue 2017-08-31 23:13:18 -07:00 committed by GitHub
commit 1d0d3db2bc
2 changed files with 25 additions and 17 deletions

View File

@ -30,6 +30,7 @@ go_library(
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apiserver/pkg/server/healthz:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/informers:go_default_library",

View File

@ -28,6 +28,7 @@ import (
"k8s.io/apiserver/pkg/server/healthz"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
@ -105,14 +106,14 @@ func Run(s *options.SchedulerServer) error {
informerFactory.WaitForCacheSync(stop)
controller.WaitForCacheSync("scheduler", stop, podInformer.Informer().HasSynced)
run := func(_ <-chan struct{}) {
run := func(stopCh <-chan struct{}) {
sched.Run()
select {}
<-stopCh
}
if !s.LeaderElection.LeaderElect {
run(nil)
panic("unreachable")
run(stop)
return fmt.Errorf("finished without leader elect")
}
id, err := os.Hostname()
@ -129,23 +130,29 @@ func Run(s *options.SchedulerServer) error {
EventRecorder: recorder,
})
if err != nil {
glog.Fatalf("error creating lock: %v", err)
return fmt.Errorf("error creating lock: %v", err)
}
leaderelection.RunOrDie(leaderelection.LeaderElectionConfig{
Lock: rl,
LeaseDuration: s.LeaderElection.LeaseDuration.Duration,
RenewDeadline: s.LeaderElection.RenewDeadline.Duration,
RetryPeriod: s.LeaderElection.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
glog.Fatalf("lost master")
leaderElector, err := leaderelection.NewLeaderElector(
leaderelection.LeaderElectionConfig{
Lock: rl,
LeaseDuration: s.LeaderElection.LeaseDuration.Duration,
RenewDeadline: s.LeaderElection.RenewDeadline.Duration,
RetryPeriod: s.LeaderElection.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
utilruntime.HandleError(fmt.Errorf("lost master"))
},
},
},
})
})
if err != nil {
return err
}
panic("unreachable")
leaderElector.Run()
return fmt.Errorf("lost lease")
}
func startHTTP(s *options.SchedulerServer) {