send exception to light monitor only if the server is available

pull/465/head
JiaoPL 2023-11-02 14:38:03 +08:00
parent 6b2bff421c
commit efa2b618d1
1 changed files with 10 additions and 2 deletions

View File

@ -155,7 +155,11 @@ class MonitorManager(metaclass=SingletonMeta):
format_trace = ""
for line in filtered_trace:
format_trace += "\n" + line
if self.send_exception:
if (
self.send_exception
and gpc.config.monitor.alert.get("enable_feishu_alert", False)
and gpc.config.monitor.alert.get("light_monitor_address", None)
):
self.send_exception(format_trace, gpc.get_global_rank())
send_alert_message(
address=alert_address,
@ -169,7 +173,11 @@ class MonitorManager(metaclass=SingletonMeta):
print("receive frame: ", frame)
print("receive signal: ", sys_signal)
message = f"Process received signal {signal} and exited."
if self.send_exception:
if (
self.send_exception
and gpc.config.monitor.alert.get("enable_feishu_alert", False)
and gpc.config.monitor.alert.get("light_monitor_address", None)
):
self.send_exception(message, gpc.get_global_rank())
send_alert_message(
address=alert_address,