mirror of https://github.com/statping/statping
Second Cut (#10)
parent
708bbf98e5
commit
6592252755
|
@ -3,7 +3,7 @@ admin_password: admin
|
|||
admin_user: superadmin
|
||||
allow_reports: false
|
||||
base_path: ""
|
||||
cleanup_interval: 12h0m0s
|
||||
cleanup_interval: 24h0m0s
|
||||
cmd_file: /bin/bash
|
||||
db_conn: postgres
|
||||
db_database: ${DATABASE_NAME}
|
||||
|
@ -22,7 +22,7 @@ max_life_conn: 180
|
|||
max_open_conn: 25
|
||||
name: Razorpay Status Page
|
||||
postgres_sslmode: require
|
||||
remove_after: 2160h0m0s
|
||||
remove_after: 720h0m0s
|
||||
sample_data: true
|
||||
sass: /usr/local/bin/sass
|
||||
use_assets: true
|
||||
|
|
|
@ -19,6 +19,13 @@ type serviceOrder struct {
|
|||
Order int `json:"order"`
|
||||
}
|
||||
|
||||
var (
|
||||
zeroTime time.Time
|
||||
zeroBool bool
|
||||
zeroInt int
|
||||
zeroInt64 int64
|
||||
)
|
||||
|
||||
func findService(r *http.Request) (*services.Service, error) {
|
||||
vars := mux.Vars(r)
|
||||
id := utils.ToInt(vars["id"])
|
||||
|
@ -132,19 +139,31 @@ func apiServicePatchHandler(w http.ResponseWriter, r *http.Request) {
|
|||
|
||||
func apiServiceUpdateHandler(w http.ResponseWriter, r *http.Request) {
|
||||
service, err := findService(r)
|
||||
|
||||
if err != nil {
|
||||
sendErrorJson(err, w, r)
|
||||
return
|
||||
}
|
||||
if err := DecodeJSON(r, &service); err != nil {
|
||||
|
||||
s2 := *service
|
||||
|
||||
s2.SubServicesDetails = map[int64]services.SubService{}
|
||||
|
||||
if err := DecodeJSON(r, &s2); err != nil {
|
||||
sendErrorJson(err, w, r)
|
||||
return
|
||||
}
|
||||
if err := service.Update(); err != nil {
|
||||
|
||||
s2.LastProcessingTime = zeroTime
|
||||
s2.Online = zeroBool
|
||||
s2.FailureCounter = zeroInt
|
||||
s2.CurrentDowntime = zeroInt64
|
||||
|
||||
if err := s2.Update(); err != nil {
|
||||
sendErrorJson(err, w, r)
|
||||
return
|
||||
}
|
||||
go service.CheckService(true)
|
||||
go s2.CheckService(true)
|
||||
sendJsonAction(service, "update", w, r)
|
||||
}
|
||||
|
||||
|
|
|
@ -80,6 +80,12 @@ func Find(id int64) (*Service, error) {
|
|||
return srv, res.Error()
|
||||
}
|
||||
|
||||
func FindFirstFromDB(id int64) (*Service, error) {
|
||||
var srv = Service{}
|
||||
res := db.First(&srv, id)
|
||||
return &srv, res.Error()
|
||||
}
|
||||
|
||||
func FindOne(id int64) (*Service, error) {
|
||||
srv := allServices[id]
|
||||
if srv == nil {
|
||||
|
|
|
@ -434,36 +434,40 @@ func CheckCollection(s *Service, record bool) (*Service, error) {
|
|||
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Name))
|
||||
defer timer.ObserveDuration()
|
||||
|
||||
hcStartTime := time.Now()
|
||||
|
||||
combinedStatus := STATUS_UP
|
||||
var impactedSubService SubService
|
||||
var latency, pingtime int64
|
||||
downCount := 0
|
||||
|
||||
for id, subServiceDetail := range s.SubServicesDetails {
|
||||
if subService, err := FindOne(id); err != nil {
|
||||
if subService, err := FindFirstFromDB(id); err != nil {
|
||||
log.Errorf("[Ignored]Failed to find Sub Service : %s %s %s %s", s.Id, s.Name, id, subServiceDetail.DisplayName)
|
||||
continue
|
||||
} else {
|
||||
hit := subService.LastHit()
|
||||
failure := subService.LastFailure()
|
||||
pingtime = hit.PingTime
|
||||
if failure.CreatedAt.After(hit.CreatedAt) {
|
||||
pingtime = failure.PingTime
|
||||
if !subService.Online && subService.CurrentDowntime > 0 {
|
||||
downtimeType := STATUS_DOWN
|
||||
if d, de := downtimes.Find(subService.CurrentDowntime); de != nil {
|
||||
log.Errorf("[Ignored]Failed to find Sub Service Downtime : %s %s %s %s", s.Id, s.Name, id, subServiceDetail.DisplayName)
|
||||
continue
|
||||
} else {
|
||||
downtimeType = d.SubStatus
|
||||
}
|
||||
|
||||
if combinedStatus != STATUS_DOWN {
|
||||
switch subServiceDetail.DependencyType {
|
||||
case CRITICAL:
|
||||
combinedStatus = HandleEmptyStatus(failure.Type)
|
||||
combinedStatus = HandleEmptyStatus(downtimeType)
|
||||
impactedSubService = subServiceDetail
|
||||
case DELAYED, PARTIAL:
|
||||
combinedStatus = STATUS_DEGRADED
|
||||
if failure.Type == STATUS_DOWN {
|
||||
if downtimeType == STATUS_DOWN {
|
||||
downCount++
|
||||
}
|
||||
impactedSubService = subServiceDetail
|
||||
}
|
||||
}
|
||||
}
|
||||
latency += pingtime
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -471,8 +475,8 @@ func CheckCollection(s *Service, record bool) (*Service, error) {
|
|||
combinedStatus = STATUS_DOWN
|
||||
}
|
||||
|
||||
s.Latency = latency
|
||||
s.PingTime = latency
|
||||
s.Latency = time.Now().Sub(hcStartTime).Milliseconds()
|
||||
s.PingTime = time.Now().Sub(hcStartTime).Milliseconds()
|
||||
s.LastFailureType = combinedStatus
|
||||
if combinedStatus == STATUS_DOWN || combinedStatus == STATUS_DEGRADED {
|
||||
if record {
|
||||
|
@ -571,7 +575,7 @@ func (s *Service) CheckService(record bool) (err error) {
|
|||
func (s *Service) HandleDowntime(err error, record bool) {
|
||||
if err != nil {
|
||||
s.FailureCounter++
|
||||
if s.FailureCounter >= s.GetFtc() {
|
||||
if s.FailureCounter >= s.GetFtc() || s.Type == "collection" {
|
||||
|
||||
s.Online = false
|
||||
|
||||
|
@ -591,7 +595,15 @@ func (s *Service) HandleDowntime(err error, record bool) {
|
|||
}
|
||||
|
||||
downtime.End = time.Now()
|
||||
downtime.SubStatus = ApplyStatus(downtime.SubStatus, HandleEmptyStatus(s.LastFailureType), STATUS_DEGRADED)
|
||||
newStatus := HandleEmptyStatus(s.LastFailureType)
|
||||
|
||||
if downtime.SubStatus != "" && downtime.SubStatus != newStatus {
|
||||
downtime.Id = 0
|
||||
downtime.Start = time.Now().Add(time.Duration(-s.Interval) * (time.Second))
|
||||
}
|
||||
|
||||
downtime.SubStatus = newStatus
|
||||
|
||||
downtime.Failures = s.FailureCounter
|
||||
|
||||
if downtime.Id > 0 {
|
||||
|
|
|
@ -67,7 +67,7 @@ type Service struct {
|
|||
Incidents []*incidents.Incident `gorm:"foreignkey:service;association_foreignkey:id" json:"incidents,omitempty" yaml:"incidents"`
|
||||
Checkins []*checkins.Checkin `gorm:"foreignkey:service;association_foreignkey:id" json:"checkins,omitempty" yaml:"-" scope:"user,admin"`
|
||||
Failures []*failures.Failure `gorm:"-" json:"failures,omitempty" yaml:"-" scope:"user,admin"`
|
||||
LastProcessingTime time.Time `gorm:"column:last_processing_time" json:"last_processing_time"`
|
||||
LastProcessingTime time.Time `gorm:"column:last_processing_time" json:"-"`
|
||||
|
||||
notifyAfterCount int64 `gorm:"column:notify_after_count" yaml:"-"`
|
||||
prevOnline bool `gorm:"column:prev_online" yaml:"-"`
|
||||
|
|
Loading…
Reference in New Issue