init light monitoring on all ranks

pull/462/head
JiaoPL 2023-11-01 22:29:32 +08:00
parent 21624f6f81
commit ad21459ce4
2 changed files with 4 additions and 3 deletions

View File

@ -532,11 +532,11 @@ def initialize_distributed_env(
# init light monitor client # init light monitor client
if gpc.config.get("monitor") and gpc.config.monitor.get("alert"): if gpc.config.get("monitor") and gpc.config.monitor.get("alert"):
alert_config = gpc.config.monitor.alert alert_config = gpc.config.monitor.alert
if alert_config.enable_feishu_alert and gpc.is_rank_for_log(): if alert_config.enable_feishu_alert:
light_monitor_address = alert_config.light_monitor_address light_monitor_address = alert_config.light_monitor_address
if light_monitor_address: if light_monitor_address:
initialize_light_monitor(light_monitor_address) initialize_light_monitor(light_monitor_address)
else: elif gpc.is_rank_for_log():
logger.warning("monitor address is none, monitor could not be used!") logger.warning("monitor address is none, monitor could not be used!")

View File

@ -7,6 +7,7 @@ from typing import Dict
import requests import requests
from internlm.core.context import global_context as gpc
from internlm.utils.logger import get_logger from internlm.utils.logger import get_logger
logger = get_logger(__file__) logger = get_logger(__file__)
@ -29,7 +30,7 @@ def initialize_light_monitor(monitor_address: str = None):
try: try:
from uniscale_monitoring import init_monitor from uniscale_monitoring import init_monitor
init_monitor(monitor_address) init_monitor(monitor_address, is_root_rank=gpc.is_rank_for_log())
except Exception as e: except Exception as e:
logger.warning(f"init monitor meet error: {e}") logger.warning(f"init monitor meet error: {e}")