mirror of https://github.com/hpcaitech/ColossalAI
[cli] provided more details if colossalai run fail (#2442)
parent
c41e59e5ad
commit
c72c827e95
|
@ -1,8 +1,10 @@
|
||||||
import fabric
|
|
||||||
from .hostinfo import HostInfo, HostInfoList
|
|
||||||
from multiprocessing import Pipe, Process
|
from multiprocessing import Pipe, Process
|
||||||
from multiprocessing import connection as mp_connection
|
from multiprocessing import connection as mp_connection
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
import fabric
|
||||||
|
|
||||||
|
from .hostinfo import HostInfo, HostInfoList
|
||||||
|
|
||||||
|
|
||||||
def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Connection,
|
def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Connection,
|
||||||
|
@ -45,8 +47,10 @@ def run_on_host(hostinfo: HostInfo, workdir: str, recv_conn: mp_connection.Conne
|
||||||
# execute on the remote machine
|
# execute on the remote machine
|
||||||
fab_conn.run(cmds, hide=False)
|
fab_conn.run(cmds, hide=False)
|
||||||
send_conn.send('success')
|
send_conn.send('success')
|
||||||
except:
|
except Exception as e:
|
||||||
click.echo(f"Error: failed to run {cmds} on {hostinfo.hostname}")
|
click.echo(
|
||||||
|
f"Error: failed to run {cmds} on {hostinfo.hostname}, is localhost: {hostinfo.is_local_host}, exception: {e}"
|
||||||
|
)
|
||||||
send_conn.send('failure')
|
send_conn.send('failure')
|
||||||
|
|
||||||
# shutdown
|
# shutdown
|
||||||
|
|
Loading…
Reference in New Issue