diff --git a/spug_api/apps/monitor/executors.py b/spug_api/apps/monitor/executors.py
index 2df3697..2ae6f72 100644
--- a/spug_api/apps/monitor/executors.py
+++ b/spug_api/apps/monitor/executors.py
@@ -12,27 +12,30 @@ logging.captureWarnings(True)
def site_check(url):
- status_code = -1
try:
res = requests.get(url, timeout=10, verify=False)
- status_code = res.status_code
- finally:
- return status_code == 200
+ return 200 <= res.status_code < 400, f'返回状态码:{res.status_code}'
+ except Exception as e:
+ return False, f'异常信息:{e}'
def port_check(addr, port):
- sock = socket()
- sock.settimeout(5)
- return sock.connect_ex((addr, int(port))) == 0
+ try:
+ sock = socket()
+ sock.settimeout(5)
+ sock.connect((addr, int(port)))
+ return True, None
+ except Exception as e:
+ return False, f'异常信息:{e}'
def host_executor(host, pkey, command):
- exit_code = -1
try:
cli = SSH(host.hostname, host.port, host.username, pkey=pkey)
- exit_code, _ = cli.exec_command(command)
- finally:
- return exit_code == 0
+ exit_code, out = cli.exec_command(command)
+ return exit_code == 0, out.decode()
+ except Exception as e:
+ return False, f'异常信息:{e}'
def dispatch(tp, addr, extra):
diff --git a/spug_api/apps/monitor/scheduler.py b/spug_api/apps/monitor/scheduler.py
index 3572074..f5e9edb 100644
--- a/spug_api/apps/monitor/scheduler.py
+++ b/spug_api/apps/monitor/scheduler.py
@@ -13,7 +13,8 @@ from apps.monitor.executors import dispatch
from apps.monitor.utils import seconds_to_human
from apps.notify.models import Notify
from django.conf import settings
-from libs import spug, AttrDict, human_datetime
+from libs import spug, AttrDict, human_datetime, human_diff_time
+from datetime import datetime
import logging
import json
import time
@@ -40,22 +41,25 @@ class Scheduler:
notify_grp=obj.notify_grp,
notify_mode=obj.notify_mode)
- def _do_notify(self, event, obj):
- grp = json.loads(obj.notify_grp)
+ def _do_notify(self, event, obj, out):
+ obj.out = out
+ obj.grp = json.loads(obj.notify_grp)
+ if event == '2':
+ obj.duration = human_diff_time(datetime.now(), datetime.fromtimestamp(obj.latest_fault_time))
for mode in json.loads(obj.notify_mode):
if mode == '1':
- spug.notify_by_wx(event, obj.name, grp)
+ spug.notify_by_wx(event, obj)
elif mode == '3':
- spug.notify_by_dd(event, obj.name, grp)
+ spug.notify_by_dd(event, obj)
elif mode == '4':
- spug.notify_by_email(event, obj.name, grp)
+ spug.notify_by_email(event, obj)
- def _handle_notify(self, obj, old_status):
+ def _handle_notify(self, obj, old_status, out):
if obj.latest_status == 0:
if old_status == 1:
self._record_alarm(obj, '2')
logger.info(f'{human_datetime()} recover job_id: {obj.id}')
- self._do_notify('2', obj)
+ self._do_notify('2', obj, out)
else:
if obj.fault_times >= obj.threshold:
if time.time() - obj.latest_notify_time >= obj.quiet * 60:
@@ -63,7 +67,7 @@ class Scheduler:
obj.save()
self._record_alarm(obj, '1')
logger.info(f'{human_datetime()} notify job_id: {obj.id}')
- self._do_notify('1', obj)
+ self._do_notify('1', obj, out)
def _handle_event(self, event):
close_old_connections()
@@ -78,11 +82,12 @@ class Scheduler:
logger.info(f'EVENT_JOB_ERROR: job_id {event.job_id} exception: {event.exception}')
Notify.make_notify('monitor', '1', f'{obj.name} - 执行异常', f'{event.exception}')
elif event.code == EVENT_JOB_EXECUTED:
+ is_ok, out = event.retval
obj = Detection.objects.filter(pk=event.job_id).first()
old_status = obj.latest_status
- obj.latest_status = 0 if event.retval else 1
+ obj.latest_status = 0 if is_ok else 1
obj.latest_run_time = human_datetime(event.scheduled_run_time)
- if old_status in [0, None] and event.retval is False:
+ if old_status in [0, None] and is_ok is False:
obj.latest_fault_time = int(time.time())
if obj.latest_status == 0:
obj.latest_notify_time = 0
@@ -90,7 +95,7 @@ class Scheduler:
else:
obj.fault_times += 1
obj.save()
- self._handle_notify(obj, old_status)
+ self._handle_notify(obj, old_status, out)
def _init(self):
self.scheduler.start()
diff --git a/spug_api/libs/spug.py b/spug_api/libs/spug.py
index 515a9d7..04437e2 100644
--- a/spug_api/libs/spug.py
+++ b/spug_api/libs/spug.py
@@ -18,8 +18,8 @@ def _parse_args(grp):
return spug_key, sum([json.loads(x.contacts) for x in Group.objects.filter(id__in=grp)], [])
-def notify_by_wx(event, subject, n_grp):
- spug_key, u_ids = _parse_args(n_grp)
+def notify_by_wx(event, obj):
+ spug_key, u_ids = _parse_args(obj.grp)
if not spug_key:
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未配置报警服务调用凭据,请在系统管理/系统设置/报警服务设置中配置。')
return
@@ -28,7 +28,9 @@ def notify_by_wx(event, subject, n_grp):
data = {
'token': spug_key,
'event': event,
- 'subject': subject,
+ 'subject': obj.name,
+ 'desc': obj.out,
+ 'remark': f'故障持续{obj.duration}' if event == '2' else None,
'users': list(users)
}
requests.post(f'{spug_server}/apis/notify/wx/', json=data)
@@ -36,21 +38,25 @@ def notify_by_wx(event, subject, n_grp):
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的微信Token。')
-def notify_by_email(event, subject, grp):
- spug_key, u_ids = _parse_args(grp)
+def notify_by_email(event, obj):
+ spug_key, u_ids = _parse_args(obj.grp)
users = set(x.email for x in Contact.objects.filter(id__in=u_ids, email__isnull=False))
if users:
mail_service = json.loads(AppSetting.get_default('mail_service', '{}'))
+ body = ['告警名称:' + obj.name, '告警时间:' + human_datetime(), '告警描述:' + obj.out]
+ if event == '2':
+ body.append('故障持续:' + obj.duration)
if mail_service.get('server'):
- event_map = {'1': '告警', '2': '恢复'}
- subject = f'{event_map[event]}-{subject}'
+ event_map = {'1': '告警发生', '2': '告警恢复'}
+ subject = f'{event_map[event]}-{obj.name}'
mail = Mail(**mail_service)
- mail.send_text_mail(users, subject, f'{subject}\r\n\r\n自动发送,请勿回复。')
+ mail.send_text_mail(users, subject, '\r\n'.join(body) + '\r\n\r\n自动发送,请勿回复。')
elif spug_key:
data = {
'token': spug_key,
'event': event,
- 'subject': subject,
+ 'subject': obj.name,
+ 'body': '\r\n'.join(body),
'users': list(users)
}
requests.post(f'{spug_server}/apis/notify/mail/', json=data)
@@ -60,22 +66,23 @@ def notify_by_email(event, subject, grp):
Notify.make_notify(notify_source, '1', '发送报警信息失败', '未找到可用的通知对象,请确保设置了相关报警联系人的邮件地址。')
-def notify_by_dd(event, subject, grp):
- _, u_ids = _parse_args(grp)
+def notify_by_dd(event, obj):
+ _, u_ids = _parse_args(obj.grp)
users = set(x.ding for x in Contact.objects.filter(id__in=u_ids, ding__isnull=False))
if users:
texts = [
- '## %s ## ' % '监控告警通知' if event == '1' else '告警恢复通知',
- f'**告警名称:** {subject} ',
+ '## %s ## ' % ('监控告警通知' if event == '1' else '告警恢复通知'),
+ f'**告警名称:** {obj.name} ',
f'**告警时间:** {human_datetime()} ',
- '**告警描述:** %s ' % '请在运维平台监控中心查看详情' if event == '1' else '告警已恢复',
- '> ###### 来自 Spug运维平台'
+ f'**告警描述:** {obj.out} ',
]
+ if event == '2':
+ texts.append(f'**持续时间:** {obj.duration} ')
data = {
'msgtype': 'markdown',
'markdown': {
'title': '监控告警通知',
- 'text': '\n\n'.join(texts)
+ 'text': '\n\n'.join(texts) + '\n\n> ###### 来自 Spug运维平台'
}
}
for url in users: