add comments to the monitoring config

pull/285/head
JiaoPL 2023-09-07 15:05:59 +08:00
parent d82f223e53
commit c0289ac2a6
1 changed files with 8 additions and 1 deletions

View File

@ -253,16 +253,23 @@ def args_sanity_check():
gpc.config.parallel.sequence_parallel is True and gpc.config.model.use_flash_attn is False gpc.config.parallel.sequence_parallel is True and gpc.config.model.use_flash_attn is False
), "sequence parallel does not support use_flash_attn=False" ), "sequence parallel does not support use_flash_attn=False"
# feishu webhook address for alerting # monitoring config
# compatible with old alert config
if "alert_address" not in gpc.config:
gpc.config._add_item("alert_address", None)
# set default values for the new monitoring config
if "monitor" not in gpc.config: if "monitor" not in gpc.config:
gpc.config._add_item("monitor", {}) gpc.config._add_item("monitor", {})
if "alert" not in gpc.config.monitor: if "alert" not in gpc.config.monitor:
gpc.config.monitor._add_item("alert", {}) gpc.config.monitor._add_item("alert", {})
alert = gpc.config.monitor.alert alert = gpc.config.monitor.alert
# the alert switch is set to False by default
if "enable_feishu_alert" not in alert: if "enable_feishu_alert" not in alert:
alert._add_item("enable_feishu_alert", False) alert._add_item("enable_feishu_alert", False)
# the feishu alert address is set to None by default
if "feishu_alert_address" not in alert: if "feishu_alert_address" not in alert:
alert._add_item("feishu_alert_address", None) alert._add_item("feishu_alert_address", None)
# check the monitoring config
if alert.enable_feishu_alert: if alert.enable_feishu_alert:
if not alert.feishu_alert_address and gpc.is_rank_for_log(): if not alert.feishu_alert_address and gpc.is_rank_for_log():
alert.feishu_alert_address = None alert.feishu_alert_address = None