From c72c827e95bf4f58ed6dd051326453a19b61c317 Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Wed, 11 Jan 2023 13:56:42 +0800 Subject: [PATCH] [cli] provided more details if colossalai run fail (#2442) --- colossalai/cli/launcher/multinode_runner.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/colossalai/cli/launcher/multinode_runner.py b/colossalai/cli/launcher/multinode_runner.py index c45ad5e5a..a51e1e371 100644 --- a/colossalai/cli/launcher/multinode_runner.py +++ b/colossalai/cli/launcher/multinode_runner.py @@ -1,8 +1,10 @@ -import fabric -from .hostinfo import HostInfo, HostInfoList from multiprocessing import Pipe, Process from multiprocessing import connection as mp_connection + import click +import fabric + +from .hostinfo import HostInfo, HostInfoList def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Connection, @@ -45,8 +47,10 @@ def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Conne # execute on the remote machine fab_conn.run(cmds, hide=False) send_conn.send('success') - except: - click.echo(f"Error: failed to run {cmds} on {hostinfo.hostname}") + except Exception as e: + click.echo( + f"Error: failed to run {cmds} on {hostinfo.hostname}, is localhost: {hostinfo.is_local_host}, exception: {e}" + ) send_conn.send('failure') # shutdown