Allow panics and unhandled errors to be reported to external targets

It would be better if Kubernetes could be instrumented to report panics
and handled but unreturned errors in controller loops to a remote
service.  This commit introduces settable handlers for HandleCrash and a new
HandleError that can be overriden to report errors to a remote service
for analysis.  HandleError() in particular is for control loops that do
not return an error, and so there is no ability to report those errors
other than in log files.

ReplicationController is provided as an example.
pull/6/head
Clayton Coleman 2015-01-26 20:23:46 -05:00
parent c65f83f424
commit 04b864a91f
2 changed files with 73 additions and 11 deletions

View File

@ -34,26 +34,53 @@ import (
// For testing, bypass HandleCrash.
var ReallyCrash bool
// PanicHandlers is a list of functions which will be invoked when a panic happens.
var PanicHandlers = []func(interface{}){logPanic}
// HandleCrash simply catches a crash and logs an error. Meant to be called via defer.
func HandleCrash() {
if ReallyCrash {
return
}
r := recover()
if r != nil {
callers := ""
for i := 0; true; i++ {
_, file, line, ok := runtime.Caller(i)
if !ok {
break
}
callers = callers + fmt.Sprintf("%v:%v\n", file, line)
if r := recover(); r != nil {
for _, fn := range PanicHandlers {
fn(r)
}
glog.Infof("Recovered from panic: %#v (%v)\n%v", r, r, callers)
}
}
// logPanic logs the caller tree when a panic occurs.
func logPanic(r interface{}) {
callers := ""
for i := 0; true; i++ {
_, file, line, ok := runtime.Caller(i)
if !ok {
break
}
callers = callers + fmt.Sprintf("%v:%v\n", file, line)
}
glog.Infof("Recovered from panic: %#v (%v)\n%v", r, r, callers)
}
// ErrorHandlers is a list of functions which will be invoked when an unreturnable
// error occurs.
var ErrorHandlers = []func(error){logError}
// HandlerError is a method to invoke when a non-user facing piece of code cannot
// return an error and needs to indicate it has been ignored. Invoking this method
// is preferable to logging the error - the default behavior is to log but the
// errors may be sent to a remote server for analysis.
func HandleError(err error) {
for _, fn := range ErrorHandlers {
fn(err)
}
}
// logError prints an error with the call stack of the location it was reported
func logError(err error) {
glog.ErrorDepth(2, err)
}
// Forever loops forever running f every period. Catches any panics, and keeps going.
func Forever(f func(), period time.Duration) {
Until(f, period, nil)

View File

@ -18,6 +18,7 @@ package util
import (
"encoding/json"
"fmt"
"reflect"
"testing"
@ -59,6 +60,40 @@ func TestHandleCrash(t *testing.T) {
}
}
func TestCustomHandleCrash(t *testing.T) {
old := PanicHandlers
defer func() { PanicHandlers = old }()
var result interface{}
PanicHandlers = []func(interface{}){
func(r interface{}) {
result = r
},
}
func() {
defer HandleCrash()
panic("test")
}()
if result != "test" {
t.Errorf("did not receive custom handler")
}
}
func TestCustomHandleError(t *testing.T) {
old := ErrorHandlers
defer func() { ErrorHandlers = old }()
var result error
ErrorHandlers = []func(error){
func(err error) {
result = err
},
}
err := fmt.Errorf("test")
HandleError(err)
if result != err {
t.Errorf("did not receive custom handler")
}
}
func TestNewIntOrStringFromInt(t *testing.T) {
i := NewIntOrStringFromInt(93)
if i.Kind != IntstrInt || i.IntVal != 93 {