mirror of https://github.com/openspug/spug
U 增加告警信息内容
parent
b801720470
commit
875f86dcbd
|
@ -12,27 +12,30 @@ logging.captureWarnings(True)
|
|||
|
||||
|
||||
def site_check(url):
|
||||
status_code = -1
|
||||
try:
|
||||
res = requests.get(url, timeout=10, verify=False)
|
||||
status_code = res.status_code
|
||||
finally:
|
||||
return status_code == 200
|
||||
return 200 <= res.status_code < 400, f'返回状态码:{res.status_code}'
|
||||
except Exception as e:
|
||||
return False, f'异常信息:{e}'
|
||||
|
||||
|
||||
def port_check(addr, port):
|
||||
sock = socket()
|
||||
sock.settimeout(5)
|
||||
return sock.connect_ex((addr, int(port))) == 0
|
||||
try:
|
||||
sock = socket()
|
||||
sock.settimeout(5)
|
||||
sock.connect((addr, int(port)))
|
||||
return True, None
|
||||
except Exception as e:
|
||||
return False, f'异常信息:{e}'
|
||||
|
||||
|
||||
def host_executor(host, pkey, command):
|
||||
exit_code = -1
|
||||
try:
|
||||
cli = SSH(host.hostname, host.port, host.username, pkey=pkey)
|
||||
exit_code, _ = cli.exec_command(command)
|
||||
finally:
|
||||
return exit_code == 0
|
||||
exit_code, out = cli.exec_command(command)
|
||||
return exit_code == 0, out.decode()
|
||||
except Exception as e:
|
||||
return False, f'异常信息:{e}'
|
||||
|
||||
|
||||
def dispatch(tp, addr, extra):
|
||||
|
|
|
@ -13,7 +13,8 @@ from apps.monitor.executors import dispatch
|
|||
from apps.monitor.utils import seconds_to_human
|
||||
from apps.notify.models import Notify
|
||||
from django.conf import settings
|
||||
from libs import spug, AttrDict, human_datetime
|
||||
from libs import spug, AttrDict, human_datetime, human_diff_time
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import json
|
||||
import time
|
||||
|
@ -40,22 +41,25 @@ class Scheduler:
|
|||
notify_grp=obj.notify_grp,
|
||||
notify_mode=obj.notify_mode)
|
||||
|
||||
def _do_notify(self, event, obj):
|
||||
grp = json.loads(obj.notify_grp)
|
||||
def _do_notify(self, event, obj, out):
|
||||
obj.out = out
|
||||
obj.grp = json.loads(obj.notify_grp)
|
||||
if event == '2':
|
||||
obj.duration = human_diff_time(datetime.now(), datetime.fromtimestamp(obj.latest_fault_time))
|
||||
for mode in json.loads(obj.notify_mode):
|
||||
if mode == '1':
|
||||
spug.notify_by_wx(event, obj.name, grp)
|
||||
spug.notify_by_wx(event, obj)
|
||||
elif mode == '3':
|
||||
spug.notify_by_dd(event, obj.name, grp)
|
||||
spug.notify_by_dd(event, obj)
|
||||
elif mode == '4':
|
||||
spug.notify_by_email(event, obj.name, grp)
|
||||
spug.notify_by_email(event, obj)
|
||||
|
||||
def _handle_notify(self, obj, old_status):
|
||||
def _handle_notify(self, obj, old_status, out):
|
||||
if obj.latest_status == 0:
|
||||
if old_status == 1:
|
||||
self._record_alarm(obj, '2')
|
||||
logger.info(f'{human_datetime()} recover job_id: {obj.id}')
|
||||
self._do_notify('2', obj)
|
||||
self._do_notify('2', obj, out)
|
||||
else:
|
||||
if obj.fault_times >= obj.threshold:
|
||||
if time.time() - obj.latest_notify_time >= obj.quiet * 60:
|
||||
|
@ -63,7 +67,7 @@ class Scheduler:
|
|||
obj.save()
|
||||
self._record_alarm(obj, '1')
|
||||
logger.info(f'{human_datetime()} notify job_id: {obj.id}')
|
||||
self._do_notify('1', obj)
|
||||
self._do_notify('1', obj, out)
|
||||
|
||||
def _handle_event(self, event):
|
||||
close_old_connections()
|
||||
|
@ -78,11 +82,12 @@ class Scheduler:
|
|||
logger.info(f'EVENT_JOB_ERROR: job_id {event.job_id} exception: {event.exception}')
|
||||
Notify.make_notify('monitor', '1', f'{obj.name} - 执行异常', f'{event.exception}')
|
||||
elif event.code == EVENT_JOB_EXECUTED:
|
||||
is_ok, out = event.retval
|
||||
obj = Detection.objects.filter(pk=event.job_id).first()
|
||||
old_status = obj.latest_status
|
||||
obj.latest_status = 0 if event.retval else 1
|
||||
obj.latest_status = 0 if is_ok else 1
|
||||
obj.latest_run_time = human_datetime(event.scheduled_run_time)
|
||||
if old_status in [0, None] and event.retval is False:
|
||||
if old_status in [0, None] and is_ok is False:
|
||||
obj.latest_fault_time = int(time.time())
|
||||
if obj.latest_status == 0:
|
||||
obj.latest_notify_time = 0
|
||||
|
@ -90,7 +95,7 @@ class Scheduler:
|
|||
else:
|
||||
obj.fault_times += 1
|
||||
obj.save()
|
||||
self._handle_notify(obj, old_status)
|
||||
self._handle_notify(obj, old_status, out)
|
||||
|
||||
def _init(self):
|
||||
self.scheduler.start()
|
||||
|
|
|
@ -18,8 +18,8 @@ def _parse_args(grp):
|
|||
return spug_key, sum([json.loads(x.contacts) for x in Group.objects.filter(id__in=grp)], [])
|
||||
|
||||
|
||||
def notify_by_wx(event, subject, n_grp):
|
||||
spug_key, u_ids = _parse_args(n_grp)
|
||||
def notify_by_wx(event, obj):
|
||||
spug_key, u_ids = _parse_args(obj.grp)
|
||||
if not spug_key:
|
||||
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未配置报警服务调用凭据,请在系统管理/系统设置/报警服务设置中配置。')
|
||||
return
|
||||
|
@ -28,7 +28,9 @@ def notify_by_wx(event, subject, n_grp):
|
|||
data = {
|
||||
'token': spug_key,
|
||||
'event': event,
|
||||
'subject': subject,
|
||||
'subject': obj.name,
|
||||
'desc': obj.out,
|
||||
'remark': f'故障持续{obj.duration}' if event == '2' else None,
|
||||
'users': list(users)
|
||||
}
|
||||
requests.post(f'{spug_server}/apis/notify/wx/', json=data)
|
||||
|
@ -36,21 +38,25 @@ def notify_by_wx(event, subject, n_grp):
|
|||
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的微信Token。')
|
||||
|
||||
|
||||
def notify_by_email(event, subject, grp):
|
||||
spug_key, u_ids = _parse_args(grp)
|
||||
def notify_by_email(event, obj):
|
||||
spug_key, u_ids = _parse_args(obj.grp)
|
||||
users = set(x.email for x in Contact.objects.filter(id__in=u_ids, email__isnull=False))
|
||||
if users:
|
||||
mail_service = json.loads(AppSetting.get_default('mail_service', '{}'))
|
||||
body = ['告警名称:' + obj.name, '告警时间:' + human_datetime(), '告警描述:' + obj.out]
|
||||
if event == '2':
|
||||
body.append('故障持续:' + obj.duration)
|
||||
if mail_service.get('server'):
|
||||
event_map = {'1': '告警', '2': '恢复'}
|
||||
subject = f'{event_map[event]}-{subject}'
|
||||
event_map = {'1': '告警发生', '2': '告警恢复'}
|
||||
subject = f'{event_map[event]}-{obj.name}'
|
||||
mail = Mail(**mail_service)
|
||||
mail.send_text_mail(users, subject, f'{subject}\r\n\r\n自动发送,请勿回复。')
|
||||
mail.send_text_mail(users, subject, '\r\n'.join(body) + '\r\n\r\n自动发送,请勿回复。')
|
||||
elif spug_key:
|
||||
data = {
|
||||
'token': spug_key,
|
||||
'event': event,
|
||||
'subject': subject,
|
||||
'subject': obj.name,
|
||||
'body': '\r\n'.join(body),
|
||||
'users': list(users)
|
||||
}
|
||||
requests.post(f'{spug_server}/apis/notify/mail/', json=data)
|
||||
|
@ -60,22 +66,23 @@ def notify_by_email(event, subject, grp):
|
|||
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的邮件地址。')
|
||||
|
||||
|
||||
def notify_by_dd(event, subject, grp):
|
||||
_, u_ids = _parse_args(grp)
|
||||
def notify_by_dd(event, obj):
|
||||
_, u_ids = _parse_args(obj.grp)
|
||||
users = set(x.ding for x in Contact.objects.filter(id__in=u_ids, ding__isnull=False))
|
||||
if users:
|
||||
texts = [
|
||||
'## %s ## ' % '监控告警通知' if event == '1' else '告警恢复通知',
|
||||
f'**告警名称:** <font color="#{"f90202" if event == "1" else "8ece60"}">{subject}</font> ',
|
||||
'## %s ## ' % ('监控告警通知' if event == '1' else '告警恢复通知'),
|
||||
f'**告警名称:** <font color="#{"f90202" if event == "1" else "8ece60"}">{obj.name}</font> ',
|
||||
f'**告警时间:** {human_datetime()} ',
|
||||
'**告警描述:** %s ' % '请在运维平台监控中心查看详情' if event == '1' else '告警已恢复',
|
||||
'> ###### 来自 Spug运维平台'
|
||||
f'**告警描述:** {obj.out} ',
|
||||
]
|
||||
if event == '2':
|
||||
texts.append(f'**持续时间:** {obj.duration} ')
|
||||
data = {
|
||||
'msgtype': 'markdown',
|
||||
'markdown': {
|
||||
'title': '监控告警通知',
|
||||
'text': '\n\n'.join(texts)
|
||||
'text': '\n\n'.join(texts) + '\n\n> ###### 来自 Spug运维平台'
|
||||
}
|
||||
}
|
||||
for url in users:
|
||||
|
|
Loading…
Reference in New Issue