mirror of https://github.com/statping/statping
prometheus metric updates
parent
c3418e2473
commit
d4a557d2f8
|
@ -9,6 +9,7 @@ import (
|
|||
"github.com/statping/statping/source"
|
||||
"github.com/statping/statping/types/configs"
|
||||
"github.com/statping/statping/types/core"
|
||||
"github.com/statping/statping/types/metrics"
|
||||
"github.com/statping/statping/types/services"
|
||||
"github.com/statping/statping/utils"
|
||||
"os"
|
||||
|
@ -174,6 +175,8 @@ func InitApp() error {
|
|||
if _, err := core.Select(); err != nil {
|
||||
return err
|
||||
}
|
||||
// init prometheus metrics
|
||||
metrics.InitMetrics()
|
||||
// select all services in database and store services in a mapping of Service pointers
|
||||
if _, err := services.SelectAllServices(true); err != nil {
|
||||
return err
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"database/sql"
|
||||
"fmt"
|
||||
"github.com/jinzhu/gorm"
|
||||
"github.com/statping/statping/types/metrics"
|
||||
"github.com/statping/statping/utils"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -119,6 +120,21 @@ func (it *Db) ChunkSize() int {
|
|||
}
|
||||
}
|
||||
|
||||
func Routine() {
|
||||
for {
|
||||
if database.DB() == nil {
|
||||
time.Sleep(5 * time.Second)
|
||||
continue
|
||||
}
|
||||
stats := database.DB().Stats()
|
||||
metrics.Database("connections", float64(stats.OpenConnections))
|
||||
metrics.Database("in_use", float64(stats.InUse))
|
||||
metrics.Database("idle", float64(stats.Idle))
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func (it *Db) GormDB() *gorm.DB {
|
||||
return it.Database
|
||||
}
|
||||
|
@ -184,6 +200,7 @@ func Openw(dialect string, args ...interface{}) (db Database, err error) {
|
|||
return nil, err
|
||||
}
|
||||
database = Wrap(gormdb)
|
||||
go Routine()
|
||||
return database, err
|
||||
}
|
||||
|
||||
|
|
|
@ -7,9 +7,9 @@ import (
|
|||
"fmt"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/statping/statping/types/core"
|
||||
"github.com/statping/statping/types/errors"
|
||||
"github.com/statping/statping/types/metrics"
|
||||
"github.com/statping/statping/utils"
|
||||
"io"
|
||||
"net/http"
|
||||
|
@ -19,12 +19,8 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
authUser string
|
||||
authPass string
|
||||
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "http_duration_seconds",
|
||||
Help: "Duration of HTTP requests.",
|
||||
}, []string{"path"})
|
||||
authUser string
|
||||
authPass string
|
||||
)
|
||||
|
||||
// Gzip Compression
|
||||
|
@ -178,7 +174,7 @@ func prometheusMiddleware(next http.Handler) http.Handler {
|
|||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
route := mux.CurrentRoute(r)
|
||||
path, _ := route.GetPathTemplate()
|
||||
timer := prometheus.NewTimer(httpDuration.WithLabelValues(path))
|
||||
timer := prometheus.NewTimer(metrics.Timer(path))
|
||||
next.ServeHTTP(w, r)
|
||||
timer.ObserveDuration()
|
||||
})
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
package metrics
|
||||
|
||||
import "github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
var (
|
||||
// service is online if set to 1, offline if 0
|
||||
databaseConnections = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "statping",
|
||||
Name: "database_connections",
|
||||
Help: "If service is online",
|
||||
}, nil,
|
||||
)
|
||||
databaseInUse = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "statping",
|
||||
Name: "database_connections_in_use",
|
||||
Help: "If service is online",
|
||||
}, nil,
|
||||
)
|
||||
databaseIdle = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "statping",
|
||||
Name: "database_connections_idle",
|
||||
Help: "If service is online",
|
||||
}, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func Database(method string, value float64) {
|
||||
switch method {
|
||||
case "connections":
|
||||
databaseConnections.WithLabelValues().Set(value)
|
||||
case "in_use":
|
||||
databaseInUse.WithLabelValues().Set(value)
|
||||
case "idle":
|
||||
databaseIdle.WithLabelValues().Set(value)
|
||||
}
|
||||
}
|
|
@ -8,37 +8,46 @@ import (
|
|||
var (
|
||||
utilsHttpRequestDur = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "http_requests_duration",
|
||||
Help: "How many successful requests for a service",
|
||||
Namespace: "statping",
|
||||
Name: "http_requests_duration",
|
||||
Help: "Duration for h",
|
||||
},
|
||||
[]string{"url", "method"},
|
||||
)
|
||||
|
||||
utilsHttpRequestBytes = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "http_response_bytes",
|
||||
Help: "How many successful requests for a service",
|
||||
Namespace: "statping",
|
||||
Name: "http_response_bytes",
|
||||
Help: "Response in bytes for a HTTP services",
|
||||
},
|
||||
[]string{"url", "method"},
|
||||
)
|
||||
|
||||
httpDuration = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Namespace: "statping",
|
||||
Name: "http_duration_seconds",
|
||||
Help: "Duration of HTTP requests from the utils package",
|
||||
}, []string{"path"})
|
||||
)
|
||||
|
||||
func init() {
|
||||
func InitMetrics() {
|
||||
prometheus.MustRegister(
|
||||
serviceOnline,
|
||||
serviceFailures,
|
||||
serviceSuccess,
|
||||
serviceStatusCode,
|
||||
serviceLatencyDuration,
|
||||
serviceDuration,
|
||||
utilsHttpRequestDur,
|
||||
utilsHttpRequestBytes,
|
||||
httpDuration,
|
||||
databaseConnections,
|
||||
)
|
||||
}
|
||||
|
||||
func Histo(method string, value float64, labels ...interface{}) {
|
||||
switch method {
|
||||
case "latency":
|
||||
serviceLatencyDuration.WithLabelValues(convert(labels)...).Observe(value)
|
||||
case "duration":
|
||||
utilsHttpRequestDur.WithLabelValues(convert(labels)...).Observe(value)
|
||||
case "bytes":
|
||||
|
@ -46,9 +55,17 @@ func Histo(method string, value float64, labels ...interface{}) {
|
|||
}
|
||||
}
|
||||
|
||||
func Timer(labels ...interface{}) prometheus.Observer {
|
||||
return httpDuration.WithLabelValues(convert(labels)...)
|
||||
}
|
||||
|
||||
func ServiceTimer(labels ...interface{}) prometheus.Observer {
|
||||
return serviceDuration.WithLabelValues(convert(labels)...)
|
||||
}
|
||||
|
||||
func Gauge(method string, value float64, labels ...interface{}) {
|
||||
switch method {
|
||||
case "service":
|
||||
case "status_code":
|
||||
serviceStatusCode.WithLabelValues(convert(labels)...).Set(value)
|
||||
case "online":
|
||||
serviceOnline.WithLabelValues(convert(labels)...).Set(value)
|
||||
|
|
|
@ -6,17 +6,19 @@ var (
|
|||
// service is online if set to 1, offline if 0
|
||||
serviceOnline = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "service_online",
|
||||
Help: "If service is online",
|
||||
Namespace: "statping",
|
||||
Name: "service_online",
|
||||
Help: "If service is online",
|
||||
},
|
||||
[]string{"service"},
|
||||
[]string{"service", "name", "type"},
|
||||
)
|
||||
|
||||
// service failures
|
||||
serviceFailures = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "service_failures",
|
||||
Help: "How many failures occur for a service",
|
||||
Namespace: "statping",
|
||||
Name: "service_failures",
|
||||
Help: "How many failures occur for a service",
|
||||
},
|
||||
[]string{"service"},
|
||||
)
|
||||
|
@ -24,17 +26,19 @@ var (
|
|||
// successful hits for a service
|
||||
serviceSuccess = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "service_success",
|
||||
Help: "How many successful requests for a service",
|
||||
Namespace: "statping",
|
||||
Name: "service_success",
|
||||
Help: "How many successful requests for a service",
|
||||
},
|
||||
[]string{"service"},
|
||||
)
|
||||
|
||||
// service check latency
|
||||
serviceLatencyDuration = prometheus.NewHistogramVec(
|
||||
serviceDuration = prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
Name: "service_latency",
|
||||
Help: "How many successful requests for a service",
|
||||
Namespace: "statping",
|
||||
Name: "service_duration",
|
||||
Help: "Service request duration for a success response",
|
||||
},
|
||||
[]string{"service"},
|
||||
)
|
||||
|
@ -42,8 +46,9 @@ var (
|
|||
// http status code for a service
|
||||
serviceStatusCode = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "service_status_code",
|
||||
Help: "HTTP Status code for a service",
|
||||
Namespace: "statping",
|
||||
Name: "service_status_code",
|
||||
Help: "HTTP Status code for a service",
|
||||
},
|
||||
[]string{"service"},
|
||||
)
|
||||
|
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/statping/statping/types/metrics"
|
||||
"google.golang.org/grpc"
|
||||
"net"
|
||||
|
@ -87,6 +88,8 @@ func isIPv6(address string) bool {
|
|||
// checkIcmp will send a ICMP ping packet to the service
|
||||
func CheckIcmp(s *Service, record bool) (*Service, error) {
|
||||
defer s.updateLastCheck()
|
||||
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
if err := utils.Ping(s.Domain, s.Timeout); err != nil {
|
||||
if record {
|
||||
|
@ -102,6 +105,8 @@ func CheckIcmp(s *Service, record bool) (*Service, error) {
|
|||
// CheckGrpc will check a gRPC service
|
||||
func CheckGrpc(s *Service, record bool) (*Service, error) {
|
||||
defer s.updateLastCheck()
|
||||
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
dnsLookup, err := dnsCheck(s)
|
||||
if err != nil {
|
||||
|
@ -147,6 +152,8 @@ func CheckGrpc(s *Service, record bool) (*Service, error) {
|
|||
// checkTcp will check a TCP service
|
||||
func CheckTcp(s *Service, record bool) (*Service, error) {
|
||||
defer s.updateLastCheck()
|
||||
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
dnsLookup, err := dnsCheck(s)
|
||||
if err != nil {
|
||||
|
@ -212,6 +219,8 @@ func (s *Service) updateLastCheck() {
|
|||
// checkHttp will check a HTTP service
|
||||
func CheckHttp(s *Service, record bool) (*Service, error) {
|
||||
defer s.updateLastCheck()
|
||||
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Id))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
dnsLookup, err := dnsCheck(s)
|
||||
if err != nil {
|
||||
|
@ -273,10 +282,10 @@ func CheckHttp(s *Service, record bool) (*Service, error) {
|
|||
return s, err
|
||||
}
|
||||
s.Latency = utils.Now().Sub(t1).Microseconds()
|
||||
metrics.Histo("latency", utils.Now().Sub(t1).Seconds(), s.Id)
|
||||
s.LastResponse = string(content)
|
||||
s.LastStatusCode = res.StatusCode
|
||||
metrics.Gauge("service", float64(res.StatusCode), s.Id)
|
||||
|
||||
metrics.Gauge("status_code", float64(res.StatusCode), s.Id)
|
||||
|
||||
if s.Expected.String != "" {
|
||||
match, err := regexp.MatchString(s.Expected.String, string(content))
|
||||
|
@ -320,9 +329,9 @@ func recordSuccess(s *Service) {
|
|||
fmt.Sprintf("Service #%d '%v' Successful Response: %s | Lookup in: %s | Online: %v | Interval: %d seconds", s.Id, s.Name, humanMicro(hit.Latency), humanMicro(hit.PingTime), s.Online, s.Interval))
|
||||
s.LastLookupTime = hit.PingTime
|
||||
s.LastLatency = hit.Latency
|
||||
metrics.Gauge("online", 1., s.Id, s.Name, s.Type)
|
||||
sendSuccess(s)
|
||||
s.SuccessNotified = true
|
||||
metrics.Gauge("online", 1., s.Id)
|
||||
}
|
||||
|
||||
func AddNotifier(n ServiceNotifier) {
|
||||
|
@ -374,8 +383,8 @@ func recordFailure(s *Service, issue string) {
|
|||
s.Online = false
|
||||
s.SuccessNotified = false
|
||||
s.DownText = s.DowntimeText()
|
||||
metrics.Gauge("online", 0., s.Id, s.Name, s.Type)
|
||||
sendFailure(s, fail)
|
||||
metrics.Gauge("online", 0., s.Id)
|
||||
}
|
||||
|
||||
func sendFailure(s *Service, f *failures.Failure) {
|
||||
|
|
Loading…
Reference in New Issue