From 95e294f55390b0a643beb78a91fe729fdaa600fb Mon Sep 17 00:00:00 2001 From: hunterlong Date: Fri, 12 Jun 2020 18:13:28 -0700 Subject: [PATCH] prometheus metrics for better debugging --- go.mod | 1 + go.sum | 6 +++ handlers/middleware.go | 26 ++++++++-- handlers/prometheus.go | 16 +++--- handlers/routes.go | 9 +++- notifiers/mobile.go | 2 +- types/failures/database.go | 9 +++- types/hits/database.go | 5 ++ types/metrics/metrics.go | 104 +++++++++++++++++++++++++++++++++++++ types/services/methods.go | 1 + types/services/routine.go | 2 + utils/metrics.go | 26 +--------- utils/utils.go | 9 ++-- 13 files changed, 169 insertions(+), 47 deletions(-) create mode 100644 types/metrics/metrics.go diff --git a/go.mod b/go.mod index f34d3d4d..93871658 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/mitchellh/mapstructure v1.2.2 // indirect github.com/pelletier/go-toml v1.7.0 // indirect github.com/pkg/errors v0.9.1 + github.com/prometheus/client_golang v0.9.3 github.com/russross/blackfriday/v2 v2.0.1 github.com/sirupsen/logrus v1.4.2 github.com/spf13/afero v1.2.2 // indirect diff --git a/go.sum b/go.sum index 4c7af747..8f61dedf 100755 --- a/go.sum +++ b/go.sum @@ -26,6 +26,7 @@ github.com/ararog/timeago v0.0.0-20160328174124-e9969cf18b8d/go.mod h1:EcJ034Spb github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0 h1:HWo1m869IqiPhD389kmkxeTalrjNbbJTC8LXupb+sl0= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= @@ -200,6 +201,7 @@ github.com/mattn/go-sqlite3 v2.0.1+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW github.com/mattn/go-sqlite3 v2.0.3+incompatible h1:gXHsfypPkaMZrKbD5209QV9jbUTJKjyR5WD3HYQSd+U= github.com/mattn/go-sqlite3 v2.0.3+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= +github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mediocregopher/mediocre-go-lib v0.0.0-20181029021733-cb65787f37ed/go.mod h1:dSsfyI2zABAdhcbvkXqgxOxrCsbYeHCPgrZkku60dSg= github.com/mediocregopher/radix/v3 v3.3.0/go.mod h1:EmfVyvspXz1uZEyPBMyGK+kjWiKQGvsUt6O3Pj+LDCQ= @@ -243,13 +245,17 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3 h1:9iH4JKXLzFbOAdtqv/a+j8aewx2Y8lAjAydhbaScPF8= github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0 h1:7etb9YClo3a6HjLzfl6rIQaU+FDfi0VSX39io3aQ+DM= github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084 h1:sofwID9zm4tzrgykg80hfFph1mryUeLRsUfoocVVmRY= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= diff --git a/handlers/middleware.go b/handlers/middleware.go index 5430e403..b8d7bfc6 100644 --- a/handlers/middleware.go +++ b/handlers/middleware.go @@ -5,6 +5,9 @@ import ( "crypto/subtle" "encoding/json" "fmt" + "github.com/gorilla/mux" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/statping/statping/types/core" "github.com/statping/statping/types/errors" "github.com/statping/statping/utils" @@ -16,8 +19,12 @@ import ( ) var ( - authUser string - authPass string + authUser string + authPass string + httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "http_duration_seconds", + Help: "Duration of HTTP requests.", + }, []string{"path"}) ) // Gzip Compression @@ -122,7 +129,7 @@ func authenticated(handler func(w http.ResponseWriter, r *http.Request), redirec } // readOnly is a middleware function to check if user is a User before running original request -func readOnly(handler func(w http.ResponseWriter, r *http.Request), redirect bool) http.Handler { +func readOnly(handler http.Handler, redirect bool) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if !IsReadAuthenticated(r) { if redirect { @@ -132,7 +139,7 @@ func readOnly(handler func(w http.ResponseWriter, r *http.Request), redirect boo } return } - handler(w, r) + handler.ServeHTTP(w, r) }) } @@ -166,6 +173,17 @@ func cached(duration, contentType string, handler func(w http.ResponseWriter, r }) } +// prometheusMiddleware implements mux.MiddlewareFunc. +func prometheusMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + route := mux.CurrentRoute(r) + path, _ := route.GetPathTemplate() + timer := prometheus.NewTimer(httpDuration.WithLabelValues(path)) + next.ServeHTTP(w, r) + timer.ObserveDuration() + }) +} + func DecodeJSON(r *http.Request, obj interface{}) error { decoder := json.NewDecoder(r.Body) err := decoder.Decode(&obj) diff --git a/handlers/prometheus.go b/handlers/prometheus.go index 7c29e130..85002603 100644 --- a/handlers/prometheus.go +++ b/handlers/prometheus.go @@ -2,6 +2,7 @@ package handlers import ( "fmt" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/statping/statping/types/failures" "github.com/statping/statping/types/services" "github.com/statping/statping/utils" @@ -38,7 +39,11 @@ func hex2int(hexStr string) uint64 { return uint64(result) } -func prometheusHandler(w http.ResponseWriter, r *http.Request) { +func prometheusHandler() http.Handler { + return promhttp.Handler() +} + +func prometheusOldHandler(w http.ResponseWriter, r *http.Request) { promValues = []string{} prefix = utils.Params.GetString("PREFIX") if prefix != "" { @@ -51,8 +56,6 @@ func prometheusHandler(w http.ResponseWriter, r *http.Request) { var m runtime.MemStats runtime.ReadMemStats(&m) - httpMetrics := utils.GetHttpMetrics() - promValues = append(promValues, "# Statping Prometheus Exporter") PrometheusComment("Statping Totals") @@ -101,13 +104,8 @@ func prometheusHandler(w http.ResponseWriter, r *http.Request) { } } - PrometheusComment("HTTP Metrics") - PrometheusKeyValue("http_errors", httpMetrics.Errors) - PrometheusKeyValue("http_requests", httpMetrics.Requests) - PrometheusKeyValue("http_bytes", httpMetrics.Bytes) - PrometheusKeyValue("http_request_milliseconds", httpMetrics.Milliseconds) - // https://golang.org/pkg/runtime/#MemStats + PrometheusComment("Golang Metrics") PrometheusKeyValue("go_heap_allocated", m.Alloc) PrometheusKeyValue("go_total_allocated", m.TotalAlloc) diff --git a/handlers/routes.go b/handlers/routes.go index 54ad971c..26f27016 100644 --- a/handlers/routes.go +++ b/handlers/routes.go @@ -4,11 +4,14 @@ import ( "fmt" sentryhttp "github.com/getsentry/sentry-go/http" "github.com/gorilla/mux" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/statping/statping/source" "github.com/statping/statping/types/core" "github.com/statping/statping/utils" "net/http" "net/http/pprof" + + _ "github.com/statping/statping/types/metrics" ) var ( @@ -27,6 +30,7 @@ func Router() *mux.Router { CacheStorage = NewStorage() r := mux.NewRouter().StrictSlash(true) + r.Use(prometheusMiddleware) authUser := utils.Params.GetString("AUTH_USERNAME") authPass := utils.Params.GetString("AUTH_PASSWORD") @@ -86,6 +90,7 @@ func Router() *mux.Router { api := r.NewRoute().Subrouter() api.Use(apiMiddleware) + api.Use(prometheusMiddleware) // API Routes r.Handle("/api", scoped(apiIndexHandler)) @@ -113,7 +118,7 @@ func Router() *mux.Router { // API GROUPS Routes api.Handle("/api/groups", scoped(apiAllGroupHandler)).Methods("GET") api.Handle("/api/groups", authenticated(apiCreateGroupHandler, false)).Methods("POST") - api.Handle("/api/groups/{id}", readOnly(apiGroupHandler, false)).Methods("GET") + api.Handle("/api/groups/{id}", readOnly(http.HandlerFunc(apiGroupHandler), false)).Methods("GET") api.Handle("/api/groups/{id}", authenticated(apiGroupUpdateHandler, false)).Methods("POST") api.Handle("/api/groups/{id}", authenticated(apiGroupDeleteHandler, false)).Methods("DELETE") api.Handle("/api/reorder/groups", authenticated(apiGroupReorderHandler, false)).Methods("POST") @@ -174,7 +179,7 @@ func Router() *mux.Router { r.Handle("/checkin/{api}", http.HandlerFunc(checkinHitHandler)) // API Generic Routes - r.Handle("/metrics", readOnly(prometheusHandler, false)) + r.Handle("/metrics", readOnly(promhttp.Handler(), false)) r.Handle("/health", http.HandlerFunc(healthCheckHandler)) r.Handle("/.well-known/", http.StripPrefix("/.well-known/", http.FileServer(http.Dir(dir+"/.well-known")))) r.NotFoundHandler = http.HandlerFunc(error404Handler) diff --git a/notifiers/mobile.go b/notifiers/mobile.go index 3cbbfa87..05eb5b9d 100644 --- a/notifiers/mobile.go +++ b/notifiers/mobile.go @@ -135,7 +135,7 @@ func pushRequest(msg *pushArray) ([]byte, error) { return nil, err } url := "https://push.statping.com/api/push" - body, _, err = utils.HttpRequest(url, "POST", "application/json", nil, bytes.NewBuffer(body), time.Duration(20*time.Second), true, nil) + body, _, err = utils.HttpRequest(url, "POST", "application/json", nil, bytes.NewBuffer(body), time.Duration(20*time.Second), false, nil) return body, err } diff --git a/types/failures/database.go b/types/failures/database.go index 200cff32..5d1b2a1a 100644 --- a/types/failures/database.go +++ b/types/failures/database.go @@ -1,6 +1,9 @@ package failures -import "github.com/statping/statping/database" +import ( + "github.com/statping/statping/database" + "github.com/statping/statping/types/metrics" +) var db database.Database @@ -18,6 +21,10 @@ func All() []*Failure { return failures } +func (f *Failure) AfterCreate() { + metrics.Inc("failure", f.Service) +} + func (f *Failure) Create() error { q := db.Create(f) return q.Error() diff --git a/types/hits/database.go b/types/hits/database.go index 67f69027..552af148 100644 --- a/types/hits/database.go +++ b/types/hits/database.go @@ -2,6 +2,7 @@ package hits import ( "github.com/statping/statping/database" + "github.com/statping/statping/types/metrics" "github.com/statping/statping/utils" ) @@ -13,6 +14,10 @@ func SetDB(database database.Database) { db = database.Model(&Hit{}) } +func (h *Hit) AfterCreate() { + metrics.Inc("success", h.Service) +} + func (h *Hit) Create() error { q := db.Create(h) return q.Error() diff --git a/types/metrics/metrics.go b/types/metrics/metrics.go new file mode 100644 index 00000000..065feb0b --- /dev/null +++ b/types/metrics/metrics.go @@ -0,0 +1,104 @@ +package metrics + +import ( + "fmt" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + serviceOnline = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "service_online", + Help: "How many failures occur for a service", + }, + []string{"service"}, + ) + + serviceFailures = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "service_failures", + Help: "How many failures occur for a service", + }, + []string{"service"}, + ) + + serviceSuccess = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "service_success", + Help: "How many successful requests for a service", + }, + []string{"service"}, + ) + + serviceLatencyDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "service_request_duration", + Help: "How many successful requests for a service", + }, + []string{"service"}, + ) + + utilsHttpRequestDur = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "http_requests_duration", + Help: "How many successful requests for a service", + }, + []string{"url", "method"}, + ) + + utilsHttpRequestBytes = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "http_response_bytes", + Help: "How many successful requests for a service", + }, + []string{"url", "method"}, + ) +) + +func init() { + prometheus.MustRegister( + serviceOnline, + serviceFailures, + serviceSuccess, + serviceLatencyDuration, + utilsHttpRequestDur, + utilsHttpRequestBytes, + ) +} + +func Histo(method string, value float64, labels ...interface{}) { + switch method { + case "latency": + serviceLatencyDuration.WithLabelValues(convert(labels)...).Observe(value) + case "duration": + utilsHttpRequestDur.WithLabelValues(convert(labels)...).Observe(value) + case "bytes": + utilsHttpRequestBytes.WithLabelValues(convert(labels)...).Observe(value) + } +} + +func Inc(method string, labels ...interface{}) { + switch method { + case "failure": + serviceFailures.WithLabelValues(convert(labels)...).Inc() + case "success": + serviceSuccess.WithLabelValues(convert(labels)...).Inc() + } +} + +func Add(method string, value float64, labels ...interface{}) { + switch method { + case "failure": + serviceFailures.WithLabelValues(convert(labels)...).Add(value) + case "success": + serviceSuccess.WithLabelValues(convert(labels)...).Add(value) + } +} + +func convert(vals []interface{}) []string { + var out []string + for _, v := range vals { + out = append(out, fmt.Sprintf("%v", v)) + } + return out +} diff --git a/types/services/methods.go b/types/services/methods.go index 7a37c62b..98842429 100644 --- a/types/services/methods.go +++ b/types/services/methods.go @@ -253,6 +253,7 @@ func SelectAllServices(start bool) (map[int64]*Service, error) { return allServices, nil } for _, s := range all() { + if start { CheckinProcess(s) } diff --git a/types/services/routine.go b/types/services/routine.go index 99c9f0bf..46b58e02 100644 --- a/types/services/routine.go +++ b/types/services/routine.go @@ -4,6 +4,7 @@ import ( "bytes" "crypto/tls" "fmt" + "github.com/statping/statping/types/metrics" "google.golang.org/grpc" "net" "net/http" @@ -272,6 +273,7 @@ func CheckHttp(s *Service, record bool) (*Service, error) { return s, err } s.Latency = utils.Now().Sub(t1).Microseconds() + metrics.Histo("latency", utils.Now().Sub(t1).Seconds(), s.Id) s.LastResponse = string(content) s.LastStatusCode = res.StatusCode diff --git a/utils/metrics.go b/utils/metrics.go index bc067b44..489c2808 100644 --- a/utils/metrics.go +++ b/utils/metrics.go @@ -1,29 +1,5 @@ package utils -import "time" - -func init() { - httpMetric = new(Metrics) -} - var ( - httpMetric *Metrics - StartTime = Now() + StartTime = Now() ) - -type Metrics struct { - Requests int64 - Errors int64 - Bytes int64 - Milliseconds int64 - OnlineTime time.Time -} - -func (h *Metrics) Reset() { - httpMetric = new(Metrics) -} - -func GetHttpMetrics() *Metrics { - defer httpMetric.Reset() - return httpMetric -} diff --git a/utils/utils.go b/utils/utils.go index d028726d..87696d36 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "errors" "fmt" + "github.com/statping/statping/types/metrics" "io" "io/ioutil" "net" @@ -192,7 +193,6 @@ func HttpRequest(url, method string, content interface{}, headers []string, body var req *http.Request t1 := Now() if req, err = http.NewRequest(method, url, body); err != nil { - httpMetric.Errors++ return nil, nil, err } req.Header.Set("User-Agent", "Statping") @@ -254,7 +254,6 @@ func HttpRequest(url, method string, content interface{}, headers []string, body } if resp, err = client.Do(req); err != nil { - httpMetric.Errors++ return nil, resp, err } defer resp.Body.Close() @@ -262,10 +261,10 @@ func HttpRequest(url, method string, content interface{}, headers []string, body if err != nil { return nil, resp, err } + // record HTTP metrics - httpMetric.Requests++ - httpMetric.Milliseconds += Now().Sub(t1).Milliseconds() / httpMetric.Requests - httpMetric.Bytes += int64(len(contents)) + metrics.Histo("bytes", float64(len(contents)), url, method) + metrics.Histo("duration", Now().Sub(t1).Seconds(), url, method) return contents, resp, err }