Second Cut (#10)

pull/1062/head
Amrendra Singh 2021-09-06 20:42:40 +05:30 committed by GitHub
parent 708bbf98e5
commit 6592252755
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 57 additions and 20 deletions

View File

@ -3,7 +3,7 @@ admin_password: admin
admin_user: superadmin
allow_reports: false
base_path: ""
cleanup_interval: 12h0m0s
cleanup_interval: 24h0m0s
cmd_file: /bin/bash
db_conn: postgres
db_database: ${DATABASE_NAME}
@ -22,7 +22,7 @@ max_life_conn: 180
max_open_conn: 25
name: Razorpay Status Page
postgres_sslmode: require
remove_after: 2160h0m0s
remove_after: 720h0m0s
sample_data: true
sass: /usr/local/bin/sass
use_assets: true

View File

@ -19,6 +19,13 @@ type serviceOrder struct {
Order int `json:"order"`
}
var (
zeroTime time.Time
zeroBool bool
zeroInt int
zeroInt64 int64
)
func findService(r *http.Request) (*services.Service, error) {
vars := mux.Vars(r)
id := utils.ToInt(vars["id"])
@ -132,19 +139,31 @@ func apiServicePatchHandler(w http.ResponseWriter, r *http.Request) {
func apiServiceUpdateHandler(w http.ResponseWriter, r *http.Request) {
service, err := findService(r)
if err != nil {
sendErrorJson(err, w, r)
return
}
if err := DecodeJSON(r, &service); err != nil {
s2 := *service
s2.SubServicesDetails = map[int64]services.SubService{}
if err := DecodeJSON(r, &s2); err != nil {
sendErrorJson(err, w, r)
return
}
if err := service.Update(); err != nil {
s2.LastProcessingTime = zeroTime
s2.Online = zeroBool
s2.FailureCounter = zeroInt
s2.CurrentDowntime = zeroInt64
if err := s2.Update(); err != nil {
sendErrorJson(err, w, r)
return
}
go service.CheckService(true)
go s2.CheckService(true)
sendJsonAction(service, "update", w, r)
}

View File

@ -80,6 +80,12 @@ func Find(id int64) (*Service, error) {
return srv, res.Error()
}
func FindFirstFromDB(id int64) (*Service, error) {
var srv = Service{}
res := db.First(&srv, id)
return &srv, res.Error()
}
func FindOne(id int64) (*Service, error) {
srv := allServices[id]
if srv == nil {

View File

@ -434,36 +434,40 @@ func CheckCollection(s *Service, record bool) (*Service, error) {
timer := prometheus.NewTimer(metrics.ServiceTimer(s.Name))
defer timer.ObserveDuration()
hcStartTime := time.Now()
combinedStatus := STATUS_UP
var impactedSubService SubService
var latency, pingtime int64
downCount := 0
for id, subServiceDetail := range s.SubServicesDetails {
if subService, err := FindOne(id); err != nil {
if subService, err := FindFirstFromDB(id); err != nil {
log.Errorf("[Ignored]Failed to find Sub Service : %s %s %s %s", s.Id, s.Name, id, subServiceDetail.DisplayName)
continue
} else {
hit := subService.LastHit()
failure := subService.LastFailure()
pingtime = hit.PingTime
if failure.CreatedAt.After(hit.CreatedAt) {
pingtime = failure.PingTime
if !subService.Online && subService.CurrentDowntime > 0 {
downtimeType := STATUS_DOWN
if d, de := downtimes.Find(subService.CurrentDowntime); de != nil {
log.Errorf("[Ignored]Failed to find Sub Service Downtime : %s %s %s %s", s.Id, s.Name, id, subServiceDetail.DisplayName)
continue
} else {
downtimeType = d.SubStatus
}
if combinedStatus != STATUS_DOWN {
switch subServiceDetail.DependencyType {
case CRITICAL:
combinedStatus = HandleEmptyStatus(failure.Type)
combinedStatus = HandleEmptyStatus(downtimeType)
impactedSubService = subServiceDetail
case DELAYED, PARTIAL:
combinedStatus = STATUS_DEGRADED
if failure.Type == STATUS_DOWN {
if downtimeType == STATUS_DOWN {
downCount++
}
impactedSubService = subServiceDetail
}
}
}
latency += pingtime
}
}
@ -471,8 +475,8 @@ func CheckCollection(s *Service, record bool) (*Service, error) {
combinedStatus = STATUS_DOWN
}
s.Latency = latency
s.PingTime = latency
s.Latency = time.Now().Sub(hcStartTime).Milliseconds()
s.PingTime = time.Now().Sub(hcStartTime).Milliseconds()
s.LastFailureType = combinedStatus
if combinedStatus == STATUS_DOWN || combinedStatus == STATUS_DEGRADED {
if record {
@ -571,7 +575,7 @@ func (s *Service) CheckService(record bool) (err error) {
func (s *Service) HandleDowntime(err error, record bool) {
if err != nil {
s.FailureCounter++
if s.FailureCounter >= s.GetFtc() {
if s.FailureCounter >= s.GetFtc() || s.Type == "collection" {
s.Online = false
@ -591,7 +595,15 @@ func (s *Service) HandleDowntime(err error, record bool) {
}
downtime.End = time.Now()
downtime.SubStatus = ApplyStatus(downtime.SubStatus, HandleEmptyStatus(s.LastFailureType), STATUS_DEGRADED)
newStatus := HandleEmptyStatus(s.LastFailureType)
if downtime.SubStatus != "" && downtime.SubStatus != newStatus {
downtime.Id = 0
downtime.Start = time.Now().Add(time.Duration(-s.Interval) * (time.Second))
}
downtime.SubStatus = newStatus
downtime.Failures = s.FailureCounter
if downtime.Id > 0 {

View File

@ -67,7 +67,7 @@ type Service struct {
Incidents []*incidents.Incident `gorm:"foreignkey:service;association_foreignkey:id" json:"incidents,omitempty" yaml:"incidents"`
Checkins []*checkins.Checkin `gorm:"foreignkey:service;association_foreignkey:id" json:"checkins,omitempty" yaml:"-" scope:"user,admin"`
Failures []*failures.Failure `gorm:"-" json:"failures,omitempty" yaml:"-" scope:"user,admin"`
LastProcessingTime time.Time `gorm:"column:last_processing_time" json:"last_processing_time"`
LastProcessingTime time.Time `gorm:"column:last_processing_time" json:"-"`
notifyAfterCount int64 `gorm:"column:notify_after_count" yaml:"-"`
prevOnline bool `gorm:"column:prev_online" yaml:"-"`