diff --git a/docker/Dockerfile b/docker/Dockerfile index 2c7bafd96..97399c939 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,6 +5,18 @@ LABEL org.opencontainers.image.source = "https://github.com/hpcaitech/ColossalAI LABEL org.opencontainers.image.licenses = "Apache License 2.0" LABEL org.opencontainers.image.base.name = "docker.io/library/hpcaitech/cuda-conda:11.3" +# enable passwordless ssh +RUN mkdir ~/.ssh && \ + printf "Host * \n ForwardAgent yes\nHost *\n StrictHostKeyChecking no" > ~/.ssh/config && \ + ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && \ + cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys + +# enable RDMA support +RUN apt-get update && \ + apt-get install -y infiniband-diags perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + # install torch RUN conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch