U 增加告警信息内容

pull/103/head
vapao 2020-05-04 13:30:19 +08:00
parent b801720470
commit 875f86dcbd
3 changed files with 54 additions and 39 deletions

View File

@ -12,27 +12,30 @@ logging.captureWarnings(True)
def site_check(url): def site_check(url):
status_code = -1
try: try:
res = requests.get(url, timeout=10, verify=False) res = requests.get(url, timeout=10, verify=False)
status_code = res.status_code return 200 <= res.status_code < 400, f'返回状态码:{res.status_code}'
finally: except Exception as e:
return status_code == 200 return False, f'异常信息:{e}'
def port_check(addr, port): def port_check(addr, port):
sock = socket() try:
sock.settimeout(5) sock = socket()
return sock.connect_ex((addr, int(port))) == 0 sock.settimeout(5)
sock.connect((addr, int(port)))
return True, None
except Exception as e:
return False, f'异常信息:{e}'
def host_executor(host, pkey, command): def host_executor(host, pkey, command):
exit_code = -1
try: try:
cli = SSH(host.hostname, host.port, host.username, pkey=pkey) cli = SSH(host.hostname, host.port, host.username, pkey=pkey)
exit_code, _ = cli.exec_command(command) exit_code, out = cli.exec_command(command)
finally: return exit_code == 0, out.decode()
return exit_code == 0 except Exception as e:
return False, f'异常信息:{e}'
def dispatch(tp, addr, extra): def dispatch(tp, addr, extra):

View File

@ -13,7 +13,8 @@ from apps.monitor.executors import dispatch
from apps.monitor.utils import seconds_to_human from apps.monitor.utils import seconds_to_human
from apps.notify.models import Notify from apps.notify.models import Notify
from django.conf import settings from django.conf import settings
from libs import spug, AttrDict, human_datetime from libs import spug, AttrDict, human_datetime, human_diff_time
from datetime import datetime
import logging import logging
import json import json
import time import time
@ -40,22 +41,25 @@ class Scheduler:
notify_grp=obj.notify_grp, notify_grp=obj.notify_grp,
notify_mode=obj.notify_mode) notify_mode=obj.notify_mode)
def _do_notify(self, event, obj): def _do_notify(self, event, obj, out):
grp = json.loads(obj.notify_grp) obj.out = out
obj.grp = json.loads(obj.notify_grp)
if event == '2':
obj.duration = human_diff_time(datetime.now(), datetime.fromtimestamp(obj.latest_fault_time))
for mode in json.loads(obj.notify_mode): for mode in json.loads(obj.notify_mode):
if mode == '1': if mode == '1':
spug.notify_by_wx(event, obj.name, grp) spug.notify_by_wx(event, obj)
elif mode == '3': elif mode == '3':
spug.notify_by_dd(event, obj.name, grp) spug.notify_by_dd(event, obj)
elif mode == '4': elif mode == '4':
spug.notify_by_email(event, obj.name, grp) spug.notify_by_email(event, obj)
def _handle_notify(self, obj, old_status): def _handle_notify(self, obj, old_status, out):
if obj.latest_status == 0: if obj.latest_status == 0:
if old_status == 1: if old_status == 1:
self._record_alarm(obj, '2') self._record_alarm(obj, '2')
logger.info(f'{human_datetime()} recover job_id: {obj.id}') logger.info(f'{human_datetime()} recover job_id: {obj.id}')
self._do_notify('2', obj) self._do_notify('2', obj, out)
else: else:
if obj.fault_times >= obj.threshold: if obj.fault_times >= obj.threshold:
if time.time() - obj.latest_notify_time >= obj.quiet * 60: if time.time() - obj.latest_notify_time >= obj.quiet * 60:
@ -63,7 +67,7 @@ class Scheduler:
obj.save() obj.save()
self._record_alarm(obj, '1') self._record_alarm(obj, '1')
logger.info(f'{human_datetime()} notify job_id: {obj.id}') logger.info(f'{human_datetime()} notify job_id: {obj.id}')
self._do_notify('1', obj) self._do_notify('1', obj, out)
def _handle_event(self, event): def _handle_event(self, event):
close_old_connections() close_old_connections()
@ -78,11 +82,12 @@ class Scheduler:
logger.info(f'EVENT_JOB_ERROR: job_id {event.job_id} exception: {event.exception}') logger.info(f'EVENT_JOB_ERROR: job_id {event.job_id} exception: {event.exception}')
Notify.make_notify('monitor', '1', f'{obj.name} - 执行异常', f'{event.exception}') Notify.make_notify('monitor', '1', f'{obj.name} - 执行异常', f'{event.exception}')
elif event.code == EVENT_JOB_EXECUTED: elif event.code == EVENT_JOB_EXECUTED:
is_ok, out = event.retval
obj = Detection.objects.filter(pk=event.job_id).first() obj = Detection.objects.filter(pk=event.job_id).first()
old_status = obj.latest_status old_status = obj.latest_status
obj.latest_status = 0 if event.retval else 1 obj.latest_status = 0 if is_ok else 1
obj.latest_run_time = human_datetime(event.scheduled_run_time) obj.latest_run_time = human_datetime(event.scheduled_run_time)
if old_status in [0, None] and event.retval is False: if old_status in [0, None] and is_ok is False:
obj.latest_fault_time = int(time.time()) obj.latest_fault_time = int(time.time())
if obj.latest_status == 0: if obj.latest_status == 0:
obj.latest_notify_time = 0 obj.latest_notify_time = 0
@ -90,7 +95,7 @@ class Scheduler:
else: else:
obj.fault_times += 1 obj.fault_times += 1
obj.save() obj.save()
self._handle_notify(obj, old_status) self._handle_notify(obj, old_status, out)
def _init(self): def _init(self):
self.scheduler.start() self.scheduler.start()

View File

@ -18,8 +18,8 @@ def _parse_args(grp):
return spug_key, sum([json.loads(x.contacts) for x in Group.objects.filter(id__in=grp)], []) return spug_key, sum([json.loads(x.contacts) for x in Group.objects.filter(id__in=grp)], [])
def notify_by_wx(event, subject, n_grp): def notify_by_wx(event, obj):
spug_key, u_ids = _parse_args(n_grp) spug_key, u_ids = _parse_args(obj.grp)
if not spug_key: if not spug_key:
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未配置报警服务调用凭据,请在系统管理/系统设置/报警服务设置中配置。') Notify.make_notify(notify_source, '1', '发送报警信息失败', '未配置报警服务调用凭据,请在系统管理/系统设置/报警服务设置中配置。')
return return
@ -28,7 +28,9 @@ def notify_by_wx(event, subject, n_grp):
data = { data = {
'token': spug_key, 'token': spug_key,
'event': event, 'event': event,
'subject': subject, 'subject': obj.name,
'desc': obj.out,
'remark': f'故障持续{obj.duration}' if event == '2' else None,
'users': list(users) 'users': list(users)
} }
requests.post(f'{spug_server}/apis/notify/wx/', json=data) requests.post(f'{spug_server}/apis/notify/wx/', json=data)
@ -36,21 +38,25 @@ def notify_by_wx(event, subject, n_grp):
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象请确保设置了相关报警联系人的微信Token。') Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象请确保设置了相关报警联系人的微信Token。')
def notify_by_email(event, subject, grp): def notify_by_email(event, obj):
spug_key, u_ids = _parse_args(grp) spug_key, u_ids = _parse_args(obj.grp)
users = set(x.email for x in Contact.objects.filter(id__in=u_ids, email__isnull=False)) users = set(x.email for x in Contact.objects.filter(id__in=u_ids, email__isnull=False))
if users: if users:
mail_service = json.loads(AppSetting.get_default('mail_service', '{}')) mail_service = json.loads(AppSetting.get_default('mail_service', '{}'))
body = ['告警名称:' + obj.name, '告警时间:' + human_datetime(), '告警描述:' + obj.out]
if event == '2':
body.append('故障持续:' + obj.duration)
if mail_service.get('server'): if mail_service.get('server'):
event_map = {'1': '告警', '2': '恢复'} event_map = {'1': '告警发生', '2': '告警恢复'}
subject = f'{event_map[event]}-{subject}' subject = f'{event_map[event]}-{obj.name}'
mail = Mail(**mail_service) mail = Mail(**mail_service)
mail.send_text_mail(users, subject, f'{subject}\r\n\r\n自动发送,请勿回复。') mail.send_text_mail(users, subject, '\r\n'.join(body) + '\r\n\r\n自动发送,请勿回复。')
elif spug_key: elif spug_key:
data = { data = {
'token': spug_key, 'token': spug_key,
'event': event, 'event': event,
'subject': subject, 'subject': obj.name,
'body': '\r\n'.join(body),
'users': list(users) 'users': list(users)
} }
requests.post(f'{spug_server}/apis/notify/mail/', json=data) requests.post(f'{spug_server}/apis/notify/mail/', json=data)
@ -60,22 +66,23 @@ def notify_by_email(event, subject, grp):
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的邮件地址。') Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的邮件地址。')
def notify_by_dd(event, subject, grp): def notify_by_dd(event, obj):
_, u_ids = _parse_args(grp) _, u_ids = _parse_args(obj.grp)
users = set(x.ding for x in Contact.objects.filter(id__in=u_ids, ding__isnull=False)) users = set(x.ding for x in Contact.objects.filter(id__in=u_ids, ding__isnull=False))
if users: if users:
texts = [ texts = [
'## %s ## ' % '监控告警通知' if event == '1' else '告警恢复通知', '## %s ## ' % ('监控告警通知' if event == '1' else '告警恢复通知'),
f'**告警名称:** <font color="#{"f90202" if event == "1" else "8ece60"}">{subject}</font> ', f'**告警名称:** <font color="#{"f90202" if event == "1" else "8ece60"}">{obj.name}</font> ',
f'**告警时间:** {human_datetime()} ', f'**告警时间:** {human_datetime()} ',
'**告警描述:** %s ' % '请在运维平台监控中心查看详情' if event == '1' else '告警已恢复', f'**告警描述:** {obj.out} ',
'> ###### 来自 Spug运维平台'
] ]
if event == '2':
texts.append(f'**持续时间:** {obj.duration} ')
data = { data = {
'msgtype': 'markdown', 'msgtype': 'markdown',
'markdown': { 'markdown': {
'title': '监控告警通知', 'title': '监控告警通知',
'text': '\n\n'.join(texts) 'text': '\n\n'.join(texts) + '\n\n> ###### 来自 Spug运维平台'
} }
} }
for url in users: for url in users: