mirror of https://github.com/jumpserver/jumpserver
feat: 服务性能告警指标包含:Core服务和各组件状态;指标包括:cpu/disk/memory/is_alive (#6564)
* feat: 服务性能告警指标包含:Core服务和各组件状态;指标包括:cpu/disk/memory/is_alive * feat: 服务性能告警指标包含:Core服务和各组件状态;指标包括:cpu/disk/memory/is_alive 2 Co-authored-by: Bai <bugatti_it@163.com>pull/6569/head
parent
67f6b1080e
commit
66b0173e20
|
@ -249,6 +249,18 @@ def get_disk_usage():
|
||||||
return usages
|
return usages
|
||||||
|
|
||||||
|
|
||||||
|
def get_cpu_load():
|
||||||
|
cpu_load_1, cpu_load_5, cpu_load_15 = psutil.getloadavg()
|
||||||
|
cpu_count = psutil.cpu_count()
|
||||||
|
single_cpu_load_1 = cpu_load_1 / cpu_count
|
||||||
|
single_cpu_load_1 = '%.2f' % single_cpu_load_1
|
||||||
|
return float(single_cpu_load_1)
|
||||||
|
|
||||||
|
|
||||||
|
def get_memory_used():
|
||||||
|
return psutil.virtual_memory().percent
|
||||||
|
|
||||||
|
|
||||||
class Time:
|
class Time:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._timestamps = []
|
self._timestamps = []
|
||||||
|
|
|
@ -92,8 +92,9 @@ class Message(metaclass=MessageType):
|
||||||
|
|
||||||
def get_email_msg(self) -> dict:
|
def get_email_msg(self) -> dict:
|
||||||
msg = self.get_common_msg()
|
msg = self.get_common_msg()
|
||||||
|
subject = f'{msg[:20]} ...' if len(msg) >= 20 else msg
|
||||||
return {
|
return {
|
||||||
'subject': msg,
|
'subject': subject,
|
||||||
'message': msg
|
'message': msg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
from notifications.notifications import SystemMessage
|
from notifications.notifications import SystemMessage
|
||||||
from notifications.models import SystemMsgSubscription
|
from notifications.models import SystemMsgSubscription
|
||||||
from users.models import User
|
from users.models import User
|
||||||
from notifications.backends import BACKEND
|
from notifications.backends import BACKEND
|
||||||
|
from common.utils import get_disk_usage, get_cpu_load, get_memory_used
|
||||||
|
from terminal.models import Status, Terminal
|
||||||
|
|
||||||
__all__ = ('ServerPerformanceMessage',)
|
__all__ = ('ServerPerformanceMessage', 'ServerPerformanceCheckUtil')
|
||||||
|
|
||||||
|
|
||||||
class ServerPerformanceMessage(SystemMessage):
|
class ServerPerformanceMessage(SystemMessage):
|
||||||
|
@ -13,13 +16,11 @@ class ServerPerformanceMessage(SystemMessage):
|
||||||
category_label = _('Operations')
|
category_label = _('Operations')
|
||||||
message_type_label = _('Server performance')
|
message_type_label = _('Server performance')
|
||||||
|
|
||||||
def __init__(self, path, usage):
|
def __init__(self, msg):
|
||||||
self.path = path
|
self._msg = msg
|
||||||
self.usage = usage
|
|
||||||
|
|
||||||
def get_common_msg(self):
|
def get_common_msg(self):
|
||||||
msg = _("Disk used more than 80%: {} => {}").format(self.path, self.usage.percent)
|
return self._msg
|
||||||
return msg
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def post_insert_to_db(cls, subscription: SystemMsgSubscription):
|
def post_insert_to_db(cls, subscription: SystemMsgSubscription):
|
||||||
|
@ -27,3 +28,117 @@ class ServerPerformanceMessage(SystemMessage):
|
||||||
subscription.users.add(*admins)
|
subscription.users.add(*admins)
|
||||||
subscription.receive_backends = [BACKEND.EMAIL]
|
subscription.receive_backends = [BACKEND.EMAIL]
|
||||||
subscription.save()
|
subscription.save()
|
||||||
|
|
||||||
|
|
||||||
|
class ServerPerformanceCheckUtil(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.alarm_messages = []
|
||||||
|
self.disk_usage_threshold = 20 # 80
|
||||||
|
self.cpu_load_threshold = 1 # 5
|
||||||
|
self.memory_usage_threshold = 20 # 85
|
||||||
|
# checking terminal
|
||||||
|
self._terminal = None
|
||||||
|
|
||||||
|
def check_and_publish(self):
|
||||||
|
self.check()
|
||||||
|
self.publish()
|
||||||
|
|
||||||
|
def publish(self):
|
||||||
|
if not self.alarm_messages:
|
||||||
|
return
|
||||||
|
msg = '<br>'.join(self.alarm_messages)
|
||||||
|
ServerPerformanceMessage(msg).publish()
|
||||||
|
|
||||||
|
def check(self):
|
||||||
|
check_items = ['disk_usage', 'cpu_load', 'memory_usage']
|
||||||
|
|
||||||
|
# Check local
|
||||||
|
if settings.DISK_CHECK_ENABLED:
|
||||||
|
self.check_items(check_items)
|
||||||
|
|
||||||
|
# Check terminal
|
||||||
|
check_items += ['is_alive']
|
||||||
|
terminals = self.get_terminals()
|
||||||
|
for terminal in terminals:
|
||||||
|
self._terminal = terminal
|
||||||
|
self.check_items(check_items)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_terminals():
|
||||||
|
terminals = []
|
||||||
|
for terminal in Terminal.objects.filter(is_accepted=True, is_deleted=False):
|
||||||
|
if not terminal.is_active:
|
||||||
|
continue
|
||||||
|
terminal.status = Status.get_terminal_latest_stat(terminal)
|
||||||
|
terminals.append(terminal)
|
||||||
|
return terminals
|
||||||
|
|
||||||
|
def check_items(self, items):
|
||||||
|
for item in items:
|
||||||
|
messages = getattr(self, f'check_{item}', lambda: None)()
|
||||||
|
self.alarm_messages.extend(messages)
|
||||||
|
|
||||||
|
def check_is_alive(self):
|
||||||
|
message = []
|
||||||
|
if not self._terminal and not self._terminal.is_alive:
|
||||||
|
name = self._terminal.name
|
||||||
|
msg = _('The terminal is offline: {}').format(name)
|
||||||
|
message.append(msg)
|
||||||
|
return message
|
||||||
|
|
||||||
|
def check_disk_usage(self):
|
||||||
|
messages = []
|
||||||
|
if self._terminal:
|
||||||
|
name = self._terminal.name
|
||||||
|
disk_used = getattr(self._terminal.status, 'disk_used', None)
|
||||||
|
disks_used = [['/', disk_used]] if disk_used else []
|
||||||
|
else:
|
||||||
|
name = 'Core'
|
||||||
|
disks_used = self._get_local_disk_usage()
|
||||||
|
|
||||||
|
for disk, used in disks_used:
|
||||||
|
if used <= self.disk_usage_threshold:
|
||||||
|
continue
|
||||||
|
msg = _("Disk used more than {}%: {} => {} ({})").format(self.disk_usage_threshold, disk, used, name)
|
||||||
|
messages.append(msg)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_local_disk_usage():
|
||||||
|
disks_usage = []
|
||||||
|
usages = get_disk_usage()
|
||||||
|
uncheck_paths = ['/etc', '/boot']
|
||||||
|
for path, usage in usages.items():
|
||||||
|
if len(path) > 4 and path[:4] in uncheck_paths:
|
||||||
|
continue
|
||||||
|
disks_usage.append([path, usage.percent])
|
||||||
|
return disks_usage
|
||||||
|
|
||||||
|
def check_cpu_load(self):
|
||||||
|
messages = []
|
||||||
|
if self._terminal:
|
||||||
|
name = self._terminal.name
|
||||||
|
cpu_load = getattr(self._terminal.status, 'cpu_load', 0)
|
||||||
|
else:
|
||||||
|
name = 'Core'
|
||||||
|
cpu_load = get_cpu_load()
|
||||||
|
|
||||||
|
if cpu_load > self.cpu_load_threshold:
|
||||||
|
msg = _('CPU load more than {}: => {} ({})').format(self.cpu_load_threshold, cpu_load, name)
|
||||||
|
messages.append(msg)
|
||||||
|
return messages
|
||||||
|
|
||||||
|
def check_memory_usage(self):
|
||||||
|
messages = []
|
||||||
|
if self._terminal:
|
||||||
|
name = self._terminal.name
|
||||||
|
memory_usage = getattr(self._terminal.status, 'memory_usage', 0)
|
||||||
|
else:
|
||||||
|
name = 'Core'
|
||||||
|
memory_usage = get_memory_used()
|
||||||
|
|
||||||
|
if memory_usage > self.memory_usage_threshold:
|
||||||
|
msg = _('Memory used more than {}%: => {} ({})').format(self.memory_usage_threshold, memory_usage, name)
|
||||||
|
messages.append(msg)
|
||||||
|
return messages
|
||||||
|
|
|
@ -9,7 +9,7 @@ from celery.exceptions import SoftTimeLimitExceeded
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from common.utils import get_logger, get_object_or_none, get_disk_usage, get_log_keep_day
|
from common.utils import get_logger, get_object_or_none, get_log_keep_day
|
||||||
from orgs.utils import tmp_to_root_org, tmp_to_org
|
from orgs.utils import tmp_to_root_org, tmp_to_org
|
||||||
from .celery.decorator import (
|
from .celery.decorator import (
|
||||||
register_as_period_task, after_app_shutdown_clean_periodic,
|
register_as_period_task, after_app_shutdown_clean_periodic,
|
||||||
|
@ -20,7 +20,7 @@ from .celery.utils import (
|
||||||
disable_celery_periodic_task, delete_celery_periodic_task
|
disable_celery_periodic_task, delete_celery_periodic_task
|
||||||
)
|
)
|
||||||
from .models import Task, CommandExecution, CeleryTask
|
from .models import Task, CommandExecution, CeleryTask
|
||||||
from .notifications import ServerPerformanceMessage
|
from .notifications import ServerPerformanceCheckUtil
|
||||||
|
|
||||||
logger = get_logger(__file__)
|
logger = get_logger(__file__)
|
||||||
|
|
||||||
|
@ -132,18 +132,7 @@ def create_or_update_registered_periodic_tasks():
|
||||||
@shared_task
|
@shared_task
|
||||||
@register_as_period_task(interval=3600)
|
@register_as_period_task(interval=3600)
|
||||||
def check_server_performance_period():
|
def check_server_performance_period():
|
||||||
if not settings.DISK_CHECK_ENABLED:
|
ServerPerformanceCheckUtil().check_and_publish()
|
||||||
return
|
|
||||||
usages = get_disk_usage()
|
|
||||||
uncheck_paths = ['/etc', '/boot']
|
|
||||||
|
|
||||||
for path, usage in usages.items():
|
|
||||||
need_check = True
|
|
||||||
for uncheck_path in uncheck_paths:
|
|
||||||
if path.startswith(uncheck_path):
|
|
||||||
need_check = False
|
|
||||||
if need_check and usage.percent > 80:
|
|
||||||
ServerPerformanceMessage(path=path, usage=usage).publish()
|
|
||||||
|
|
||||||
|
|
||||||
@shared_task(queue="ansible")
|
@shared_task(queue="ansible")
|
||||||
|
|
|
@ -184,6 +184,8 @@ class Terminal(StorageMixin, TerminalStatusMixin, models.Model):
|
||||||
status = "Deleted"
|
status = "Deleted"
|
||||||
elif not self.is_active:
|
elif not self.is_active:
|
||||||
status = "Disable"
|
status = "Disable"
|
||||||
|
elif not self.is_alive:
|
||||||
|
status = 'Offline'
|
||||||
return '%s: %s' % (self.name, status)
|
return '%s: %s' % (self.name, status)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
|
|
Loading…
Reference in New Issue