prometheus metric updates

pull/668/head
hunterlong 2020-06-16 02:42:21 -07:00
parent c3418e2473
commit d4a557d2f8
7 changed files with 119 additions and 33 deletions

View File

@ -9,6 +9,7 @@ import (
"github.com/statping/statping/source"
"github.com/statping/statping/types/configs"
"github.com/statping/statping/types/core"
"github.com/statping/statping/types/metrics"
"github.com/statping/statping/types/services"
"github.com/statping/statping/utils"
"os"
@ -174,6 +175,8 @@ func InitApp() error {
if _, err := core.Select(); err != nil {
return err
}
// init prometheus metrics
metrics.InitMetrics()
// select all services in database and store services in a mapping of Service pointers
if _, err := services.SelectAllServices(true); err != nil {
return err

View File

@ -4,6 +4,7 @@ import (
"database/sql"
"fmt"
"github.com/jinzhu/gorm"
"github.com/statping/statping/types/metrics"
"github.com/statping/statping/utils"
"strings"
"time"
@ -119,6 +120,21 @@ func (it *Db) ChunkSize() int {
}
}
func Routine() {
for {
if database.DB() == nil {
time.Sleep(5 * time.Second)
continue
}
stats := database.DB().Stats()
metrics.Database("connections", float64(stats.OpenConnections))
metrics.Database("in_use", float64(stats.InUse))
metrics.Database("idle", float64(stats.Idle))
time.Sleep(5 * time.Second)
}
}
func (it *Db) GormDB() *gorm.DB {
return it.Database
}
@ -184,6 +200,7 @@ func Openw(dialect string, args ...interface{}) (db Database, err error) {
return nil, err
}
database = Wrap(gormdb)
go Routine()
return database, err
}

View File

@ -7,9 +7,9 @@ import (
"fmt"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/statping/statping/types/core"
"github.com/statping/statping/types/errors"
"github.com/statping/statping/types/metrics"
"github.com/statping/statping/utils"
"io"
"net/http"
@ -19,12 +19,8 @@ import (
)
var (
authUser string
authPass string
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "http_duration_seconds",
Help: "Duration of HTTP requests.",
}, []string{"path"})
authUser string
authPass string
)
// Gzip Compression
@ -178,7 +174,7 @@ func prometheusMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
route := mux.CurrentRoute(r)
path, _ := route.GetPathTemplate()
timer := prometheus.NewTimer(httpDuration.WithLabelValues(path))
timer := prometheus.NewTimer(metrics.Timer(path))
next.ServeHTTP(w, r)
timer.ObserveDuration()
})

39
types/metrics/database.go Normal file
View File

@ -0,0 +1,39 @@
package metrics
import "github.com/prometheus/client_golang/prometheus"
var (
// service is online if set to 1, offline if 0
databaseConnections = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "statping",
Name: "database_connections",
Help: "If service is online",
}, nil,
)
databaseInUse = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "statping",
Name: "database_connections_in_use",
Help: "If service is online",
}, nil,
)
databaseIdle = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: "statping",
Name: "database_connections_idle",
Help: "If service is online",
}, nil,
)
)
func Database(method string, value float64) {
switch method {
case "connections":
databaseConnections.WithLabelValues().Set(value)
case "in_use":
databaseInUse.WithLabelValues().Set(value)
case "idle":
databaseIdle.WithLabelValues().Set(value)
}
}

View File

@ -8,37 +8,46 @@ import (
var (
utilsHttpRequestDur = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_requests_duration",
Help: "How many successful requests for a service",
Namespace: "statping",
Name: "http_requests_duration",
Help: "Duration for h",
},
[]string{"url", "method"},
)
utilsHttpRequestBytes = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_response_bytes",
Help: "How many successful requests for a service",
Namespace: "statping",
Name: "http_response_bytes",
Help: "Response in bytes for a HTTP services",
},
[]string{"url", "method"},
)
httpDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "statping",
Name: "http_duration_seconds",
Help: "Duration of HTTP requests from the utils package",
}, []string{"path"})
)
func init() {
func InitMetrics() {
prometheus.MustRegister(
serviceOnline,
serviceFailures,
serviceSuccess,
serviceStatusCode,
serviceLatencyDuration,
serviceDuration,
utilsHttpRequestDur,
utilsHttpRequestBytes,
httpDuration,
databaseConnections,
)
}
func Histo(method string, value float64, labels ...interface{}) {
switch method {
case "latency":
serviceLatencyDuration.WithLabelValues(convert(labels)...).Observe(value)
case "duration":
utilsHttpRequestDur.WithLabelValues(convert(labels)...).Observe(value)
case "bytes":
@ -46,9 +55,17 @@ func Histo(method string, value float64, labels ...interface{}) {
}
}
func Timer(labels ...interface{}) prometheus.Observer {
return httpDuration.WithLabelValues(convert(labels)...)
}
func ServiceTimer(labels ...interface{}) prometheus.Observer {
return serviceDuration.WithLabelValues(convert(labels)...)
}
func Gauge(method string, value float64, labels ...interface{}) {
switch method {
case "service":
case "status_code":
serviceStatusCode.WithLabelValues(convert(labels)...).Set(value)
case "online":
serviceOnline.WithLabelValues(convert(labels)...).Set(value)

View File

@ -6,17 +6,19 @@ var (
// service is online if set to 1, offline if 0
serviceOnline = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_online",
Help: "If service is online",
Namespace: "statping",
Name: "service_online",
Help: "If service is online",
},
[]string{"service"},
[]string{"service", "name", "type"},
)
// service failures
serviceFailures = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "service_failures",
Help: "How many failures occur for a service",
Namespace: "statping",
Name: "service_failures",
Help: "How many failures occur for a service",
},
[]string{"service"},
)
@ -24,17 +26,19 @@ var (
// successful hits for a service
serviceSuccess = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "service_success",
Help: "How many successful requests for a service",
Namespace: "statping",
Name: "service_success",
Help: "How many successful requests for a service",
},
[]string{"service"},
)
// service check latency
serviceLatencyDuration = prometheus.NewHistogramVec(
serviceDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "service_latency",
Help: "How many successful requests for a service",
Namespace: "statping",
Name: "service_duration",
Help: "Service request duration for a success response",
},
[]string{"service"},
)
@ -42,8 +46,9 @@ var (
// http status code for a service
serviceStatusCode = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "service_status_code",
Help: "HTTP Status code for a service",
Namespace: "statping",
Name: "service_status_code",
Help: "HTTP Status code for a service",
},
[]string{"service"},
)

View File

@ -4,6 +4,7 @@ import (
"bytes"
"crypto/tls"
"fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/statping/statping/types/metrics"
"google.golang.org/grpc"
"net"
@ -87,6 +88,8 @@ func isIPv6(address string) bool {
// checkIcmp will send a ICMP ping packet to the service
func CheckIcmp(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
if err := utils.Ping(s.Domain, s.Timeout); err != nil {
if record {
@ -102,6 +105,8 @@ func CheckIcmp(s *Service, record bool) (*Service, error) {
// CheckGrpc will check a gRPC service
func CheckGrpc(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
dnsLookup, err := dnsCheck(s)
if err != nil {
@ -147,6 +152,8 @@ func CheckGrpc(s *Service, record bool) (*Service, error) {
// checkTcp will check a TCP service
func CheckTcp(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
dnsLookup, err := dnsCheck(s)
if err != nil {
@ -212,6 +219,8 @@ func (s *Service) updateLastCheck() {
// checkHttp will check a HTTP service
func CheckHttp(s *Service, record bool) (*Service, error) {
defer s.updateLastCheck()
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
defer timer.ObserveDuration()
dnsLookup, err := dnsCheck(s)
if err != nil {
@ -273,10 +282,10 @@ func CheckHttp(s *Service, record bool) (*Service, error) {
return s, err
}
s.Latency = utils.Now().Sub(t1).Microseconds()
metrics.Histo("latency", utils.Now().Sub(t1).Seconds(), s.Id)
s.LastResponse = string(content)
s.LastStatusCode = res.StatusCode
metrics.Gauge("service", float64(res.StatusCode), s.Id)
metrics.Gauge("status_code", float64(res.StatusCode), s.Id)
if s.Expected.String != "" {
match, err := regexp.MatchString(s.Expected.String, string(content))
@ -320,9 +329,9 @@ func recordSuccess(s *Service) {
fmt.Sprintf("Service #%d '%v' Successful Response: %s | Lookup in: %s | Online: %v | Interval: %d seconds", s.Id, s.Name, humanMicro(hit.Latency), humanMicro(hit.PingTime), s.Online, s.Interval))
s.LastLookupTime = hit.PingTime
s.LastLatency = hit.Latency
metrics.Gauge("online", 1., s.Id, s.Name, s.Type)
sendSuccess(s)
s.SuccessNotified = true
metrics.Gauge("online", 1., s.Id)
}
func AddNotifier(n ServiceNotifier) {
@ -374,8 +383,8 @@ func recordFailure(s *Service, issue string) {
s.Online = false
s.SuccessNotified = false
s.DownText = s.DowntimeText()
metrics.Gauge("online", 0., s.Id, s.Name, s.Type)
sendFailure(s, fail)
metrics.Gauge("online", 0., s.Id)
}
func sendFailure(s *Service, f *failures.Failure) {