mirror of https://github.com/InternLM/InternLM
init light monitoring on all ranks (#462)
parent
0218e3131c
commit
0763bf3972
|
@ -532,11 +532,11 @@ def initialize_distributed_env(
|
||||||
# init light monitor client
|
# init light monitor client
|
||||||
if gpc.config.get("monitor") and gpc.config.monitor.get("alert"):
|
if gpc.config.get("monitor") and gpc.config.monitor.get("alert"):
|
||||||
alert_config = gpc.config.monitor.alert
|
alert_config = gpc.config.monitor.alert
|
||||||
if alert_config.enable_feishu_alert and gpc.is_rank_for_log():
|
if alert_config.enable_feishu_alert:
|
||||||
light_monitor_address = alert_config.light_monitor_address
|
light_monitor_address = alert_config.light_monitor_address
|
||||||
if light_monitor_address:
|
if light_monitor_address:
|
||||||
initialize_light_monitor(light_monitor_address)
|
initialize_light_monitor(light_monitor_address)
|
||||||
else:
|
elif gpc.is_rank_for_log():
|
||||||
logger.warning("monitor address is none, monitor could not be used!")
|
logger.warning("monitor address is none, monitor could not be used!")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ from typing import Dict
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from internlm.core.context import global_context as gpc
|
||||||
from internlm.utils.logger import get_logger
|
from internlm.utils.logger import get_logger
|
||||||
|
|
||||||
logger = get_logger(__file__)
|
logger = get_logger(__file__)
|
||||||
|
@ -29,7 +30,7 @@ def initialize_light_monitor(monitor_address: str = None):
|
||||||
try:
|
try:
|
||||||
from uniscale_monitoring import init_monitor
|
from uniscale_monitoring import init_monitor
|
||||||
|
|
||||||
init_monitor(monitor_address)
|
init_monitor(monitor_address, is_root_rank=gpc.is_rank_for_log())
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"init monitor meet error: {e}")
|
logger.warning(f"init monitor meet error: {e}")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue