prometheus metric updates

pull/668/head
hunterlong 2020-06-16 02:42:21 -07:00
parent c3418e2473
commit d4a557d2f8
7 changed files with 119 additions and 33 deletions

View File

@ -9,6 +9,7 @@ import (
"github.com/statping/statping/source" "github.com/statping/statping/source"
"github.com/statping/statping/types/configs" "github.com/statping/statping/types/configs"
"github.com/statping/statping/types/core" "github.com/statping/statping/types/core"
"github.com/statping/statping/types/metrics"
"github.com/statping/statping/types/services" "github.com/statping/statping/types/services"
"github.com/statping/statping/utils" "github.com/statping/statping/utils"
"os" "os"
@ -174,6 +175,8 @@ func InitApp() error {
if _, err := core.Select(); err != nil { if _, err := core.Select(); err != nil {
return err return err
} }
// init prometheus metrics
metrics.InitMetrics()
// select all services in database and store services in a mapping of Service pointers // select all services in database and store services in a mapping of Service pointers
if _, err := services.SelectAllServices(true); err != nil { if _, err := services.SelectAllServices(true); err != nil {
return err return err

View File

@ -4,6 +4,7 @@ import (
"database/sql" "database/sql"
"fmt" "fmt"
"github.com/jinzhu/gorm" "github.com/jinzhu/gorm"
"github.com/statping/statping/types/metrics"
"github.com/statping/statping/utils" "github.com/statping/statping/utils"
"strings" "strings"
"time" "time"
@ -119,6 +120,21 @@ func (it *Db) ChunkSize() int {
} }
} }
func Routine() {
for {
if database.DB() == nil {
time.Sleep(5 * time.Second)
continue
}
stats := database.DB().Stats()
metrics.Database("connections", float64(stats.OpenConnections))
metrics.Database("in_use", float64(stats.InUse))
metrics.Database("idle", float64(stats.Idle))
time.Sleep(5 * time.Second)
}
}
func (it *Db) GormDB() *gorm.DB { func (it *Db) GormDB() *gorm.DB {
return it.Database return it.Database
} }
@ -184,6 +200,7 @@ func Openw(dialect string, args ...interface{}) (db Database, err error) {
return nil, err return nil, err
} }
database = Wrap(gormdb) database = Wrap(gormdb)
go Routine()
return database, err return database, err
} }

View File

@ -7,9 +7,9 @@ import (
"fmt" "fmt"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/statping/statping/types/core" "github.com/statping/statping/types/core"
"github.com/statping/statping/types/errors" "github.com/statping/statping/types/errors"
"github.com/statping/statping/types/metrics"
"github.com/statping/statping/utils" "github.com/statping/statping/utils"
"io" "io"
"net/http" "net/http"
@ -21,10 +21,6 @@ import (
var ( var (
authUser string authUser string
authPass string authPass string
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "http_duration_seconds",
Help: "Duration of HTTP requests.",
}, []string{"path"})
) )
// Gzip Compression // Gzip Compression
@ -178,7 +174,7 @@ func prometheusMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
route := mux.CurrentRoute(r) route := mux.CurrentRoute(r)
path, _ := route.GetPathTemplate() path, _ := route.GetPathTemplate()
timer := prometheus.NewTimer(httpDuration.WithLabelValues(path)) timer := prometheus.NewTimer(metrics.Timer(path))
next.ServeHTTP(w, r) next.ServeHTTP(w, r)
timer.ObserveDuration() timer.ObserveDuration()
}) })

39
types/metrics/database.go Normal file
View File

@ -0,0 +1,39 @@
package metrics
import "github.com/prometheus/client_golang/prometheus"
var (
// service is online if set to 1, offline if 0
databaseConnections = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "statping",
Name: "database_connections",
Help: "If service is online",
}, nil,
)
databaseInUse = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "statping",
Name: "database_connections_in_use",
Help: "If service is online",
}, nil,
)
databaseIdle = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "statping",
Name: "database_connections_idle",
Help: "If service is online",
}, nil,
)
)
func Database(method string, value float64) {
switch method {
case "connections":
databaseConnections.WithLabelValues().Set(value)
case "in_use":
databaseInUse.WithLabelValues().Set(value)
case "idle":
databaseIdle.WithLabelValues().Set(value)
}
}

View File

@ -8,37 +8,46 @@ import (
var ( var (
utilsHttpRequestDur = prometheus.NewHistogramVec( utilsHttpRequestDur = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: "statping",
Name: "http_requests_duration", Name: "http_requests_duration",
Help: "How many successful requests for a service", Help: "Duration for h",
}, },
[]string{"url", "method"}, []string{"url", "method"},
) )
utilsHttpRequestBytes = prometheus.NewHistogramVec( utilsHttpRequestBytes = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Namespace: "statping",
Name: "http_response_bytes", Name: "http_response_bytes",
Help: "How many successful requests for a service", Help: "Response in bytes for a HTTP services",
}, },
[]string{"url", "method"}, []string{"url", "method"},
) )
httpDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "statping",
Name: "http_duration_seconds",
Help: "Duration of HTTP requests from the utils package",
}, []string{"path"})
) )
func init() { func InitMetrics() {
prometheus.MustRegister( prometheus.MustRegister(
serviceOnline, serviceOnline,
serviceFailures, serviceFailures,
serviceSuccess, serviceSuccess,
serviceStatusCode, serviceStatusCode,
serviceLatencyDuration, serviceDuration,
utilsHttpRequestDur, utilsHttpRequestDur,
utilsHttpRequestBytes, utilsHttpRequestBytes,
httpDuration,
databaseConnections,
) )
} }
func Histo(method string, value float64, labels ...interface{}) { func Histo(method string, value float64, labels ...interface{}) {
switch method { switch method {
case "latency":
serviceLatencyDuration.WithLabelValues(convert(labels)...).Observe(value)
case "duration": case "duration":
utilsHttpRequestDur.WithLabelValues(convert(labels)...).Observe(value) utilsHttpRequestDur.WithLabelValues(convert(labels)...).Observe(value)
case "bytes": case "bytes":
@ -46,9 +55,17 @@ func Histo(method string, value float64, labels ...interface{}) {
} }
} }
func Timer(labels ...interface{}) prometheus.Observer {
return httpDuration.WithLabelValues(convert(labels)...)
}
func ServiceTimer(labels ...interface{}) prometheus.Observer {
return serviceDuration.WithLabelValues(convert(labels)...)
}
func Gauge(method string, value float64, labels ...interface{}) { func Gauge(method string, value float64, labels ...interface{}) {
switch method { switch method {
case "service": case "status_code":
serviceStatusCode.WithLabelValues(convert(labels)...).Set(value) serviceStatusCode.WithLabelValues(convert(labels)...).Set(value)
case "online": case "online":
serviceOnline.WithLabelValues(convert(labels)...).Set(value) serviceOnline.WithLabelValues(convert(labels)...).Set(value)

View File

@ -6,15 +6,17 @@ var (
// service is online if set to 1, offline if 0 // service is online if set to 1, offline if 0
serviceOnline = prometheus.NewGaugeVec( serviceOnline = prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: "statping",
Name: "service_online", Name: "service_online",
Help: "If service is online", Help: "If service is online",
}, },
[]string{"service"}, []string{"service", "name", "type"},
) )
// service failures // service failures
serviceFailures = prometheus.NewCounterVec( serviceFailures = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "statping",
Name: "service_failures", Name: "service_failures",
Help: "How many failures occur for a service", Help: "How many failures occur for a service",
}, },
@ -24,6 +26,7 @@ var (
// successful hits for a service // successful hits for a service
serviceSuccess = prometheus.NewCounterVec( serviceSuccess = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Namespace: "statping",
Name: "service_success", Name: "service_success",
Help: "How many successful requests for a service", Help: "How many successful requests for a service",
}, },
@ -31,10 +34,11 @@ var (
) )
// service check latency // service check latency
serviceLatencyDuration = prometheus.NewHistogramVec( serviceDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{ prometheus.HistogramOpts{
Name: "service_latency", Namespace: "statping",
Help: "How many successful requests for a service", Name: "service_duration",
Help: "Service request duration for a success response",
}, },
[]string{"service"}, []string{"service"},
) )
@ -42,6 +46,7 @@ var (
// http status code for a service // http status code for a service
serviceStatusCode = prometheus.NewGaugeVec( serviceStatusCode = prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: "statping",
Name: "service_status_code", Name: "service_status_code",
Help: "HTTP Status code for a service", Help: "HTTP Status code for a service",
}, },

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"crypto/tls" "crypto/tls"
"fmt" "fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/statping/statping/types/metrics" "github.com/statping/statping/types/metrics"
"google.golang.org/grpc" "google.golang.org/grpc"
"net" "net"
@ -87,6 +88,8 @@ func isIPv6(address string) bool {
// checkIcmp will send a ICMP ping packet to the service // checkIcmp will send a ICMP ping packet to the service
func CheckIcmp(s *Service, record bool) (*Service, error) { func CheckIcmp(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck() defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
if err := utils.Ping(s.Domain, s.Timeout); err != nil { if err := utils.Ping(s.Domain, s.Timeout); err != nil {
if record { if record {
@ -102,6 +105,8 @@ func CheckIcmp(s *Service, record bool) (*Service, error) {
// CheckGrpc will check a gRPC service // CheckGrpc will check a gRPC service
func CheckGrpc(s *Service, record bool) (*Service, error) { func CheckGrpc(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck() defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
dnsLookup, err := dnsCheck(s) dnsLookup, err := dnsCheck(s)
if err != nil { if err != nil {
@ -147,6 +152,8 @@ func CheckGrpc(s *Service, record bool) (*Service, error) {
// checkTcp will check a TCP service // checkTcp will check a TCP service
func CheckTcp(s *Service, record bool) (*Service, error) { func CheckTcp(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck() defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
dnsLookup, err := dnsCheck(s) dnsLookup, err := dnsCheck(s)
if err != nil { if err != nil {
@ -212,6 +219,8 @@ func (s *Service) updateLastCheck() {
// checkHttp will check a HTTP service // checkHttp will check a HTTP service
func CheckHttp(s *Service, record bool) (*Service, error) { func CheckHttp(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck() defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
dnsLookup, err := dnsCheck(s) dnsLookup, err := dnsCheck(s)
if err != nil { if err != nil {
@ -273,10 +282,10 @@ func CheckHttp(s *Service, record bool) (*Service, error) {
return s, err return s, err
} }
s.Latency = utils.Now().Sub(t1).Microseconds() s.Latency = utils.Now().Sub(t1).Microseconds()
metrics.Histo("latency", utils.Now().Sub(t1).Seconds(), s.Id)
s.LastResponse = string(content) s.LastResponse = string(content)
s.LastStatusCode = res.StatusCode s.LastStatusCode = res.StatusCode
metrics.Gauge("service", float64(res.StatusCode), s.Id)
metrics.Gauge("status_code", float64(res.StatusCode), s.Id)
if s.Expected.String != "" { if s.Expected.String != "" {
match, err := regexp.MatchString(s.Expected.String, string(content)) match, err := regexp.MatchString(s.Expected.String, string(content))
@ -320,9 +329,9 @@ func recordSuccess(s *Service) {
fmt.Sprintf("Service #%d '%v' Successful Response: %s | Lookup in: %s | Online: %v | Interval: %d seconds", s.Id, s.Name, humanMicro(hit.Latency), humanMicro(hit.PingTime), s.Online, s.Interval)) fmt.Sprintf("Service #%d '%v' Successful Response: %s | Lookup in: %s | Online: %v | Interval: %d seconds", s.Id, s.Name, humanMicro(hit.Latency), humanMicro(hit.PingTime), s.Online, s.Interval))
s.LastLookupTime = hit.PingTime s.LastLookupTime = hit.PingTime
s.LastLatency = hit.Latency s.LastLatency = hit.Latency
metrics.Gauge("online", 1., s.Id, s.Name, s.Type)
sendSuccess(s) sendSuccess(s)
s.SuccessNotified = true s.SuccessNotified = true
metrics.Gauge("online", 1., s.Id)
} }
func AddNotifier(n ServiceNotifier) { func AddNotifier(n ServiceNotifier) {
@ -374,8 +383,8 @@ func recordFailure(s *Service, issue string) {
s.Online = false s.Online = false
s.SuccessNotified = false s.SuccessNotified = false
s.DownText = s.DowntimeText() s.DownText = s.DowntimeText()
metrics.Gauge("online", 0., s.Id, s.Name, s.Type)
sendFailure(s, fail) sendFailure(s, fail)
metrics.Gauge("online", 0., s.Id)
} }
func sendFailure(s *Service, f *failures.Failure) { func sendFailure(s *Service, f *failures.Failure) {