mirror of https://github.com/openspug/spug
U 增加告警信息内容
parent
b801720470
commit
875f86dcbd
|
@ -12,27 +12,30 @@ logging.captureWarnings(True)
|
||||||
|
|
||||||
|
|
||||||
def site_check(url):
|
def site_check(url):
|
||||||
status_code = -1
|
|
||||||
try:
|
try:
|
||||||
res = requests.get(url, timeout=10, verify=False)
|
res = requests.get(url, timeout=10, verify=False)
|
||||||
status_code = res.status_code
|
return 200 <= res.status_code < 400, f'返回状态码:{res.status_code}'
|
||||||
finally:
|
except Exception as e:
|
||||||
return status_code == 200
|
return False, f'异常信息:{e}'
|
||||||
|
|
||||||
|
|
||||||
def port_check(addr, port):
|
def port_check(addr, port):
|
||||||
sock = socket()
|
try:
|
||||||
sock.settimeout(5)
|
sock = socket()
|
||||||
return sock.connect_ex((addr, int(port))) == 0
|
sock.settimeout(5)
|
||||||
|
sock.connect((addr, int(port)))
|
||||||
|
return True, None
|
||||||
|
except Exception as e:
|
||||||
|
return False, f'异常信息:{e}'
|
||||||
|
|
||||||
|
|
||||||
def host_executor(host, pkey, command):
|
def host_executor(host, pkey, command):
|
||||||
exit_code = -1
|
|
||||||
try:
|
try:
|
||||||
cli = SSH(host.hostname, host.port, host.username, pkey=pkey)
|
cli = SSH(host.hostname, host.port, host.username, pkey=pkey)
|
||||||
exit_code, _ = cli.exec_command(command)
|
exit_code, out = cli.exec_command(command)
|
||||||
finally:
|
return exit_code == 0, out.decode()
|
||||||
return exit_code == 0
|
except Exception as e:
|
||||||
|
return False, f'异常信息:{e}'
|
||||||
|
|
||||||
|
|
||||||
def dispatch(tp, addr, extra):
|
def dispatch(tp, addr, extra):
|
||||||
|
|
|
@ -13,7 +13,8 @@ from apps.monitor.executors import dispatch
|
||||||
from apps.monitor.utils import seconds_to_human
|
from apps.monitor.utils import seconds_to_human
|
||||||
from apps.notify.models import Notify
|
from apps.notify.models import Notify
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from libs import spug, AttrDict, human_datetime
|
from libs import spug, AttrDict, human_datetime, human_diff_time
|
||||||
|
from datetime import datetime
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
@ -40,22 +41,25 @@ class Scheduler:
|
||||||
notify_grp=obj.notify_grp,
|
notify_grp=obj.notify_grp,
|
||||||
notify_mode=obj.notify_mode)
|
notify_mode=obj.notify_mode)
|
||||||
|
|
||||||
def _do_notify(self, event, obj):
|
def _do_notify(self, event, obj, out):
|
||||||
grp = json.loads(obj.notify_grp)
|
obj.out = out
|
||||||
|
obj.grp = json.loads(obj.notify_grp)
|
||||||
|
if event == '2':
|
||||||
|
obj.duration = human_diff_time(datetime.now(), datetime.fromtimestamp(obj.latest_fault_time))
|
||||||
for mode in json.loads(obj.notify_mode):
|
for mode in json.loads(obj.notify_mode):
|
||||||
if mode == '1':
|
if mode == '1':
|
||||||
spug.notify_by_wx(event, obj.name, grp)
|
spug.notify_by_wx(event, obj)
|
||||||
elif mode == '3':
|
elif mode == '3':
|
||||||
spug.notify_by_dd(event, obj.name, grp)
|
spug.notify_by_dd(event, obj)
|
||||||
elif mode == '4':
|
elif mode == '4':
|
||||||
spug.notify_by_email(event, obj.name, grp)
|
spug.notify_by_email(event, obj)
|
||||||
|
|
||||||
def _handle_notify(self, obj, old_status):
|
def _handle_notify(self, obj, old_status, out):
|
||||||
if obj.latest_status == 0:
|
if obj.latest_status == 0:
|
||||||
if old_status == 1:
|
if old_status == 1:
|
||||||
self._record_alarm(obj, '2')
|
self._record_alarm(obj, '2')
|
||||||
logger.info(f'{human_datetime()} recover job_id: {obj.id}')
|
logger.info(f'{human_datetime()} recover job_id: {obj.id}')
|
||||||
self._do_notify('2', obj)
|
self._do_notify('2', obj, out)
|
||||||
else:
|
else:
|
||||||
if obj.fault_times >= obj.threshold:
|
if obj.fault_times >= obj.threshold:
|
||||||
if time.time() - obj.latest_notify_time >= obj.quiet * 60:
|
if time.time() - obj.latest_notify_time >= obj.quiet * 60:
|
||||||
|
@ -63,7 +67,7 @@ class Scheduler:
|
||||||
obj.save()
|
obj.save()
|
||||||
self._record_alarm(obj, '1')
|
self._record_alarm(obj, '1')
|
||||||
logger.info(f'{human_datetime()} notify job_id: {obj.id}')
|
logger.info(f'{human_datetime()} notify job_id: {obj.id}')
|
||||||
self._do_notify('1', obj)
|
self._do_notify('1', obj, out)
|
||||||
|
|
||||||
def _handle_event(self, event):
|
def _handle_event(self, event):
|
||||||
close_old_connections()
|
close_old_connections()
|
||||||
|
@ -78,11 +82,12 @@ class Scheduler:
|
||||||
logger.info(f'EVENT_JOB_ERROR: job_id {event.job_id} exception: {event.exception}')
|
logger.info(f'EVENT_JOB_ERROR: job_id {event.job_id} exception: {event.exception}')
|
||||||
Notify.make_notify('monitor', '1', f'{obj.name} - 执行异常', f'{event.exception}')
|
Notify.make_notify('monitor', '1', f'{obj.name} - 执行异常', f'{event.exception}')
|
||||||
elif event.code == EVENT_JOB_EXECUTED:
|
elif event.code == EVENT_JOB_EXECUTED:
|
||||||
|
is_ok, out = event.retval
|
||||||
obj = Detection.objects.filter(pk=event.job_id).first()
|
obj = Detection.objects.filter(pk=event.job_id).first()
|
||||||
old_status = obj.latest_status
|
old_status = obj.latest_status
|
||||||
obj.latest_status = 0 if event.retval else 1
|
obj.latest_status = 0 if is_ok else 1
|
||||||
obj.latest_run_time = human_datetime(event.scheduled_run_time)
|
obj.latest_run_time = human_datetime(event.scheduled_run_time)
|
||||||
if old_status in [0, None] and event.retval is False:
|
if old_status in [0, None] and is_ok is False:
|
||||||
obj.latest_fault_time = int(time.time())
|
obj.latest_fault_time = int(time.time())
|
||||||
if obj.latest_status == 0:
|
if obj.latest_status == 0:
|
||||||
obj.latest_notify_time = 0
|
obj.latest_notify_time = 0
|
||||||
|
@ -90,7 +95,7 @@ class Scheduler:
|
||||||
else:
|
else:
|
||||||
obj.fault_times += 1
|
obj.fault_times += 1
|
||||||
obj.save()
|
obj.save()
|
||||||
self._handle_notify(obj, old_status)
|
self._handle_notify(obj, old_status, out)
|
||||||
|
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.scheduler.start()
|
self.scheduler.start()
|
||||||
|
|
|
@ -18,8 +18,8 @@ def _parse_args(grp):
|
||||||
return spug_key, sum([json.loads(x.contacts) for x in Group.objects.filter(id__in=grp)], [])
|
return spug_key, sum([json.loads(x.contacts) for x in Group.objects.filter(id__in=grp)], [])
|
||||||
|
|
||||||
|
|
||||||
def notify_by_wx(event, subject, n_grp):
|
def notify_by_wx(event, obj):
|
||||||
spug_key, u_ids = _parse_args(n_grp)
|
spug_key, u_ids = _parse_args(obj.grp)
|
||||||
if not spug_key:
|
if not spug_key:
|
||||||
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未配置报警服务调用凭据,请在系统管理/系统设置/报警服务设置中配置。')
|
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未配置报警服务调用凭据,请在系统管理/系统设置/报警服务设置中配置。')
|
||||||
return
|
return
|
||||||
|
@ -28,7 +28,9 @@ def notify_by_wx(event, subject, n_grp):
|
||||||
data = {
|
data = {
|
||||||
'token': spug_key,
|
'token': spug_key,
|
||||||
'event': event,
|
'event': event,
|
||||||
'subject': subject,
|
'subject': obj.name,
|
||||||
|
'desc': obj.out,
|
||||||
|
'remark': f'故障持续{obj.duration}' if event == '2' else None,
|
||||||
'users': list(users)
|
'users': list(users)
|
||||||
}
|
}
|
||||||
requests.post(f'{spug_server}/apis/notify/wx/', json=data)
|
requests.post(f'{spug_server}/apis/notify/wx/', json=data)
|
||||||
|
@ -36,21 +38,25 @@ def notify_by_wx(event, subject, n_grp):
|
||||||
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的微信Token。')
|
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的微信Token。')
|
||||||
|
|
||||||
|
|
||||||
def notify_by_email(event, subject, grp):
|
def notify_by_email(event, obj):
|
||||||
spug_key, u_ids = _parse_args(grp)
|
spug_key, u_ids = _parse_args(obj.grp)
|
||||||
users = set(x.email for x in Contact.objects.filter(id__in=u_ids, email__isnull=False))
|
users = set(x.email for x in Contact.objects.filter(id__in=u_ids, email__isnull=False))
|
||||||
if users:
|
if users:
|
||||||
mail_service = json.loads(AppSetting.get_default('mail_service', '{}'))
|
mail_service = json.loads(AppSetting.get_default('mail_service', '{}'))
|
||||||
|
body = ['告警名称:' + obj.name, '告警时间:' + human_datetime(), '告警描述:' + obj.out]
|
||||||
|
if event == '2':
|
||||||
|
body.append('故障持续:' + obj.duration)
|
||||||
if mail_service.get('server'):
|
if mail_service.get('server'):
|
||||||
event_map = {'1': '告警', '2': '恢复'}
|
event_map = {'1': '告警发生', '2': '告警恢复'}
|
||||||
subject = f'{event_map[event]}-{subject}'
|
subject = f'{event_map[event]}-{obj.name}'
|
||||||
mail = Mail(**mail_service)
|
mail = Mail(**mail_service)
|
||||||
mail.send_text_mail(users, subject, f'{subject}\r\n\r\n自动发送,请勿回复。')
|
mail.send_text_mail(users, subject, '\r\n'.join(body) + '\r\n\r\n自动发送,请勿回复。')
|
||||||
elif spug_key:
|
elif spug_key:
|
||||||
data = {
|
data = {
|
||||||
'token': spug_key,
|
'token': spug_key,
|
||||||
'event': event,
|
'event': event,
|
||||||
'subject': subject,
|
'subject': obj.name,
|
||||||
|
'body': '\r\n'.join(body),
|
||||||
'users': list(users)
|
'users': list(users)
|
||||||
}
|
}
|
||||||
requests.post(f'{spug_server}/apis/notify/mail/', json=data)
|
requests.post(f'{spug_server}/apis/notify/mail/', json=data)
|
||||||
|
@ -60,22 +66,23 @@ def notify_by_email(event, subject, grp):
|
||||||
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的邮件地址。')
|
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的邮件地址。')
|
||||||
|
|
||||||
|
|
||||||
def notify_by_dd(event, subject, grp):
|
def notify_by_dd(event, obj):
|
||||||
_, u_ids = _parse_args(grp)
|
_, u_ids = _parse_args(obj.grp)
|
||||||
users = set(x.ding for x in Contact.objects.filter(id__in=u_ids, ding__isnull=False))
|
users = set(x.ding for x in Contact.objects.filter(id__in=u_ids, ding__isnull=False))
|
||||||
if users:
|
if users:
|
||||||
texts = [
|
texts = [
|
||||||
'## %s ## ' % '监控告警通知' if event == '1' else '告警恢复通知',
|
'## %s ## ' % ('监控告警通知' if event == '1' else '告警恢复通知'),
|
||||||
f'**告警名称:** <font color="#{"f90202" if event == "1" else "8ece60"}">{subject}</font> ',
|
f'**告警名称:** <font color="#{"f90202" if event == "1" else "8ece60"}">{obj.name}</font> ',
|
||||||
f'**告警时间:** {human_datetime()} ',
|
f'**告警时间:** {human_datetime()} ',
|
||||||
'**告警描述:** %s ' % '请在运维平台监控中心查看详情' if event == '1' else '告警已恢复',
|
f'**告警描述:** {obj.out} ',
|
||||||
'> ###### 来自 Spug运维平台'
|
|
||||||
]
|
]
|
||||||
|
if event == '2':
|
||||||
|
texts.append(f'**持续时间:** {obj.duration} ')
|
||||||
data = {
|
data = {
|
||||||
'msgtype': 'markdown',
|
'msgtype': 'markdown',
|
||||||
'markdown': {
|
'markdown': {
|
||||||
'title': '监控告警通知',
|
'title': '监控告警通知',
|
||||||
'text': '\n\n'.join(texts)
|
'text': '\n\n'.join(texts) + '\n\n> ###### 来自 Spug运维平台'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for url in users:
|
for url in users:
|
||||||
|
|
Loading…
Reference in New Issue