mirror of https://github.com/hpcaitech/ColossalAI
[cli] hotfix launch command for multi-nodes (#4165)
parent
2ac24040eb
commit
1908caad38
|
@ -164,9 +164,7 @@ def get_launch_command(
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
# extra launch args for torch distributed launcher with torch >= 1.9
|
# extra launch args for torch distributed launcher with torch >= 1.9
|
||||||
default_torchrun_rdzv_args = dict(rdzv_backend="c10d",
|
default_torchrun_rdzv_args = dict(master_addr=master_addr, master_port=master_port)
|
||||||
rdzv_endpoint=f"{master_addr}:{master_port}",
|
|
||||||
rdzv_id="colossalai-default-job")
|
|
||||||
|
|
||||||
# update rdzv arguments
|
# update rdzv arguments
|
||||||
for key in default_torchrun_rdzv_args.keys():
|
for key in default_torchrun_rdzv_args.keys():
|
||||||
|
|
Loading…
Reference in New Issue