mirror of https://github.com/hashicorp/consul
Dan Upton
2 years ago
committed by
GitHub
17 changed files with 498 additions and 54 deletions
@ -0,0 +1,20 @@
|
||||
package controller |
||||
|
||||
import ( |
||||
"github.com/hashicorp/consul/proto-public/pbresource" |
||||
) |
||||
|
||||
// ForType begins building a Controller for the given resource type.
|
||||
func ForType(managedType *pbresource.Type) Controller { |
||||
return Controller{managedType: managedType} |
||||
} |
||||
|
||||
// Controller runs a reconciliation loop to respond to changes in resources and
|
||||
// their dependencies. It is heavily inspired by Kubernetes' controller pattern:
|
||||
// https://kubernetes.io/docs/concepts/architecture/controller/
|
||||
//
|
||||
// Use the builder methods in this package (starting with ForType) to construct
|
||||
// a controller, and then pass it to a Manager to be executed.
|
||||
type Controller struct { |
||||
managedType *pbresource.Type |
||||
} |
@ -0,0 +1,22 @@
|
||||
package controller |
||||
|
||||
import ( |
||||
"context" |
||||
|
||||
"github.com/hashicorp/go-hclog" |
||||
) |
||||
|
||||
// controllerRunner contains the actual implementation of running a controller
|
||||
// including creating watches, calling the reconciler, handling retries, etc.
|
||||
type controllerRunner struct { |
||||
ctrl Controller |
||||
logger hclog.Logger |
||||
} |
||||
|
||||
func (c *controllerRunner) run(ctx context.Context) error { |
||||
c.logger.Debug("controller running") |
||||
defer c.logger.Debug("controller stopping") |
||||
|
||||
<-ctx.Done() |
||||
return ctx.Err() |
||||
} |
@ -0,0 +1,24 @@
|
||||
package controller |
||||
|
||||
// Lease is used to ensure controllers are run as singletons (i.e. one leader-
|
||||
// elected instance per cluster).
|
||||
//
|
||||
// Currently, this is just an abstraction over Raft leadership. In the future,
|
||||
// we'll build a backend-agnostic leasing system into the Resource Service which
|
||||
// will allow us to balance controllers between many servers.
|
||||
type Lease interface { |
||||
// Held returns whether we are the current lease-holders.
|
||||
Held() bool |
||||
|
||||
// Changed returns a channel on which you can receive notifications whenever
|
||||
// the lease is acquired or lost.
|
||||
Changed() <-chan struct{} |
||||
} |
||||
|
||||
type raftLease struct { |
||||
m *Manager |
||||
ch <-chan struct{} |
||||
} |
||||
|
||||
func (l *raftLease) Held() bool { return l.m.raftLeader.Load() } |
||||
func (l *raftLease) Changed() <-chan struct{} { return l.ch } |
@ -0,0 +1,89 @@
|
||||
package controller |
||||
|
||||
import ( |
||||
"context" |
||||
"sync" |
||||
"sync/atomic" |
||||
|
||||
"github.com/hashicorp/go-hclog" |
||||
|
||||
"github.com/hashicorp/consul/internal/resource" |
||||
) |
||||
|
||||
// Manager is responsible for scheduling the execution of controllers.
|
||||
type Manager struct { |
||||
logger hclog.Logger |
||||
|
||||
raftLeader atomic.Bool |
||||
|
||||
mu sync.Mutex |
||||
running bool |
||||
controllers []Controller |
||||
leaseChans []chan struct{} |
||||
} |
||||
|
||||
// NewManager creates a Manager. logger will be used by the Manager, and as the
|
||||
// base logger for controllers when one is not specified using WithLogger.
|
||||
func NewManager(logger hclog.Logger) *Manager { |
||||
return &Manager{logger: logger} |
||||
} |
||||
|
||||
// Register the given controller to be executed by the Manager. Cannot be called
|
||||
// once the Manager is running.
|
||||
func (m *Manager) Register(ctrl Controller) { |
||||
m.mu.Lock() |
||||
defer m.mu.Unlock() |
||||
|
||||
if m.running { |
||||
panic("cannot register additional controllers after calling Run") |
||||
} |
||||
|
||||
m.controllers = append(m.controllers, ctrl) |
||||
} |
||||
|
||||
// Run the Manager and start executing controllers until the given context is
|
||||
// canceled. Cannot be called more than once.
|
||||
func (m *Manager) Run(ctx context.Context) { |
||||
m.mu.Lock() |
||||
defer m.mu.Unlock() |
||||
|
||||
if m.running { |
||||
panic("cannot call Run more than once") |
||||
} |
||||
m.running = true |
||||
|
||||
for _, desc := range m.controllers { |
||||
runner := &controllerRunner{ |
||||
ctrl: desc, |
||||
logger: m.logger.With("managed_type", resource.ToGVK(desc.managedType)), |
||||
} |
||||
go newSupervisor(runner.run, m.newLeaseLocked()).run(ctx) |
||||
} |
||||
} |
||||
|
||||
// SetRaftLeader notifies the Manager of Raft leadership changes. Controllers
|
||||
// are currently only executed on the Raft leader, so calling this method will
|
||||
// cause the Manager to spin them up/down accordingly.
|
||||
func (m *Manager) SetRaftLeader(leader bool) { |
||||
m.raftLeader.Store(leader) |
||||
|
||||
m.mu.Lock() |
||||
defer m.mu.Unlock() |
||||
|
||||
for _, ch := range m.leaseChans { |
||||
select { |
||||
case ch <- struct{}{}: |
||||
default: |
||||
// Do not block if there's nothing receiving on ch (because the supervisor is
|
||||
// busy doing something else). Note that ch has a buffer of 1, so we'll never
|
||||
// miss the notification that something has changed so we need to re-evaluate
|
||||
// the lease.
|
||||
} |
||||
} |
||||
} |
||||
|
||||
func (m *Manager) newLeaseLocked() Lease { |
||||
ch := make(chan struct{}, 1) |
||||
m.leaseChans = append(m.leaseChans, ch) |
||||
return &raftLease{m: m, ch: ch} |
||||
} |
@ -0,0 +1,140 @@
|
||||
package controller |
||||
|
||||
import ( |
||||
"context" |
||||
"time" |
||||
|
||||
"github.com/hashicorp/consul/lib/retry" |
||||
) |
||||
|
||||
// flapThreshold is the minimum amount of time between restarts for us *not* to
|
||||
// consider a controller to be stuck in a crash-loop.
|
||||
const flapThreshold = 2 * time.Second |
||||
|
||||
// supervisor keeps a task running, restarting it on-error, for as long as the
|
||||
// given lease is held. When the lease is lost, the context given to the task
|
||||
// will be canceled. If the task persistently fails (i.e. the controller is in
|
||||
// a crash-loop) supervisor will use exponential backoff to delay restarts.
|
||||
type supervisor struct { |
||||
task task |
||||
lease Lease |
||||
|
||||
running bool |
||||
startedAt time.Time |
||||
errCh chan error |
||||
cancelTask context.CancelFunc |
||||
|
||||
backoff *retry.Waiter |
||||
backoffUntil time.Time |
||||
backoffTimerCh <-chan time.Time |
||||
} |
||||
|
||||
func newSupervisor(task task, lease Lease) *supervisor { |
||||
return &supervisor{ |
||||
task: task, |
||||
lease: lease, |
||||
errCh: make(chan error), |
||||
backoff: &retry.Waiter{ |
||||
MinFailures: 1, |
||||
MinWait: 500 * time.Millisecond, |
||||
MaxWait: time.Minute, |
||||
Jitter: retry.NewJitter(25), |
||||
}, |
||||
} |
||||
} |
||||
|
||||
type task func(context.Context) error |
||||
|
||||
func (s *supervisor) run(ctx context.Context) { |
||||
for { |
||||
if s.shouldStart() { |
||||
s.startTask(ctx) |
||||
} else if s.shouldStop() { |
||||
s.stopTask() |
||||
} |
||||
|
||||
select { |
||||
// Outer context canceled.
|
||||
case <-ctx.Done(): |
||||
if s.cancelTask != nil { |
||||
s.cancelTask() |
||||
} |
||||
return |
||||
|
||||
// Task stopped running.
|
||||
case err := <-s.errCh: |
||||
stopBackoffTimer := s.handleError(err) |
||||
if stopBackoffTimer != nil { |
||||
defer stopBackoffTimer() |
||||
} |
||||
|
||||
// Unblock when the lease is acquired/lost, or the backoff timer fires.
|
||||
case <-s.lease.Changed(): |
||||
case <-s.backoffTimerCh: |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (s *supervisor) shouldStart() bool { |
||||
if s.running { |
||||
return false |
||||
} |
||||
|
||||
if !s.lease.Held() { |
||||
return false |
||||
} |
||||
|
||||
if time.Now().Before(s.backoffUntil) { |
||||
return false |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
func (s *supervisor) startTask(ctx context.Context) { |
||||
if s.cancelTask != nil { |
||||
s.cancelTask() |
||||
} |
||||
|
||||
taskCtx, cancelTask := context.WithCancel(ctx) |
||||
s.cancelTask = cancelTask |
||||
s.startedAt = time.Now() |
||||
s.running = true |
||||
|
||||
go func() { |
||||
err := s.task(taskCtx) |
||||
|
||||
select { |
||||
case s.errCh <- err: |
||||
case <-ctx.Done(): |
||||
} |
||||
}() |
||||
} |
||||
|
||||
func (s *supervisor) shouldStop() bool { |
||||
return s.running && !s.lease.Held() |
||||
} |
||||
|
||||
func (s *supervisor) stopTask() { |
||||
s.cancelTask() |
||||
s.backoff.Reset() |
||||
s.running = false |
||||
} |
||||
|
||||
func (s *supervisor) handleError(err error) func() bool { |
||||
s.running = false |
||||
|
||||
if time.Since(s.startedAt) > flapThreshold { |
||||
s.backoff.Reset() |
||||
s.backoffUntil = time.Time{} |
||||
} else { |
||||
delay := s.backoff.WaitDuration() |
||||
s.backoffUntil = time.Now().Add(delay) |
||||
|
||||
timer := time.NewTimer(delay) |
||||
s.backoffTimerCh = timer.C |
||||
return timer.Stop |
||||
} |
||||
|
||||
return nil |
||||
} |
@ -0,0 +1,118 @@
|
||||
package controller |
||||
|
||||
import ( |
||||
"context" |
||||
"errors" |
||||
"sync/atomic" |
||||
"testing" |
||||
"time" |
||||
) |
||||
|
||||
func TestSupervise(t *testing.T) { |
||||
t.Parallel() |
||||
|
||||
ctx, cancel := context.WithCancel(context.Background()) |
||||
t.Cleanup(cancel) |
||||
|
||||
runCh := make(chan struct{}) |
||||
stopCh := make(chan struct{}) |
||||
errCh := make(chan error) |
||||
|
||||
task := func(taskCtx context.Context) error { |
||||
runCh <- struct{}{} |
||||
|
||||
select { |
||||
case err := <-errCh: |
||||
return err |
||||
case <-taskCtx.Done(): |
||||
stopCh <- struct{}{} |
||||
return taskCtx.Err() |
||||
} |
||||
} |
||||
|
||||
lease := newTestLease() |
||||
|
||||
go newSupervisor(task, lease).run(ctx) |
||||
|
||||
select { |
||||
case <-runCh: |
||||
t.Fatal("task should not be running before lease is held") |
||||
case <-time.After(500 * time.Millisecond): |
||||
} |
||||
|
||||
lease.acquired() |
||||
|
||||
select { |
||||
case <-runCh: |
||||
case <-time.After(500 * time.Millisecond): |
||||
t.Fatal("task not running after lease is acquired") |
||||
} |
||||
|
||||
select { |
||||
case <-stopCh: |
||||
t.Fatal("task should not have stopped before lease is lost") |
||||
case <-time.After(500 * time.Millisecond): |
||||
} |
||||
|
||||
lease.lost() |
||||
|
||||
select { |
||||
case <-stopCh: |
||||
case <-time.After(500 * time.Millisecond): |
||||
t.Fatal("task still running after lease was lost") |
||||
} |
||||
|
||||
select { |
||||
case <-runCh: |
||||
t.Fatal("task should not be run again before lease is re-acquired") |
||||
case <-time.After(500 * time.Millisecond): |
||||
} |
||||
|
||||
lease.acquired() |
||||
|
||||
select { |
||||
case <-runCh: |
||||
case <-time.After(500 * time.Millisecond): |
||||
t.Fatal("task not running after lease is re-acquired") |
||||
} |
||||
|
||||
errCh <- errors.New("KABOOM") |
||||
|
||||
select { |
||||
case <-runCh: |
||||
case <-time.After(2 * time.Second): |
||||
t.Fatal("task was not restarted") |
||||
} |
||||
|
||||
cancel() |
||||
|
||||
select { |
||||
case <-stopCh: |
||||
case <-time.After(500 * time.Millisecond): |
||||
t.Fatal("task still running after parent context was canceled") |
||||
} |
||||
} |
||||
|
||||
func newTestLease() *testLease { |
||||
return &testLease{ch: make(chan struct{}, 1)} |
||||
} |
||||
|
||||
type testLease struct { |
||||
held atomic.Bool |
||||
ch chan struct{} |
||||
} |
||||
|
||||
func (l *testLease) Held() bool { return l.held.Load() } |
||||
func (l *testLease) Changed() <-chan struct{} { return l.ch } |
||||
|
||||
func (l *testLease) acquired() { l.setHeld(true) } |
||||
func (l *testLease) lost() { l.setHeld(false) } |
||||
|
||||
func (l *testLease) setHeld(held bool) { |
||||
l.held.Store(held) |
||||
|
||||
select { |
||||
case l.ch <- struct{}{}: |
||||
default: |
||||
} |
||||
} |
@ -0,0 +1,13 @@
|
||||
package demo |
||||
|
||||
import "github.com/hashicorp/consul/internal/controller" |
||||
|
||||
// RegisterControllers registers controllers for the demo types. Should only be
|
||||
// called in dev mode.
|
||||
func RegisterControllers(mgr *controller.Manager) { |
||||
mgr.Register(artistController()) |
||||
} |
||||
|
||||
func artistController() controller.Controller { |
||||
return controller.ForType(TypeV2Artist) |
||||
} |
Loading…
Reference in new issue