2022-06-23 06:51:59 +00:00
|
|
|
FROM hpcaitech/cuda-conda:11.3
|
|
|
|
|
2023-03-14 08:28:06 +00:00
|
|
|
# metainformation
|
|
|
|
LABEL org.opencontainers.image.source = "https://github.com/hpcaitech/ColossalAI"
|
|
|
|
LABEL org.opencontainers.image.licenses = "Apache License 2.0"
|
|
|
|
LABEL org.opencontainers.image.base.name = "docker.io/library/hpcaitech/cuda-conda:11.3"
|
|
|
|
|
2023-07-07 07:31:51 +00:00
|
|
|
# enable passwordless ssh
|
|
|
|
RUN mkdir ~/.ssh && \
|
|
|
|
printf "Host * \n ForwardAgent yes\nHost *\n StrictHostKeyChecking no" > ~/.ssh/config && \
|
|
|
|
ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && \
|
|
|
|
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
|
|
|
|
|
|
|
|
# enable RDMA support
|
|
|
|
RUN apt-get update && \
|
|
|
|
apt-get install -y infiniband-diags perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 && \
|
|
|
|
apt-get clean && \
|
|
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
|
2022-06-23 06:51:59 +00:00
|
|
|
# install torch
|
2023-09-01 10:12:34 +00:00
|
|
|
RUN conda install -y pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
|
2022-06-23 06:51:59 +00:00
|
|
|
|
2023-05-24 02:22:51 +00:00
|
|
|
# install ninja
|
2023-07-10 03:48:27 +00:00
|
|
|
RUN apt-get update && \
|
|
|
|
apt-get install -y --no-install-recommends ninja-build && \
|
|
|
|
apt-get clean && \
|
|
|
|
rm -rf /var/lib/apt/lists/*
|
2023-05-24 02:22:51 +00:00
|
|
|
|
2022-06-23 06:51:59 +00:00
|
|
|
# install apex
|
|
|
|
RUN git clone https://github.com/NVIDIA/apex && \
|
|
|
|
cd apex && \
|
2023-05-24 02:22:51 +00:00
|
|
|
git checkout 91fcaa && \
|
2023-01-10 01:26:14 +00:00
|
|
|
pip install packaging && \
|
2022-10-26 12:54:39 +00:00
|
|
|
pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_layer_norm" ./
|
2022-01-07 06:54:04 +00:00
|
|
|
|
|
|
|
# install colossalai
|
2023-07-10 03:48:27 +00:00
|
|
|
ARG VERSION=main
|
2023-05-22 07:04:00 +00:00
|
|
|
RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git \
|
2022-10-26 12:54:39 +00:00
|
|
|
&& cd ./ColossalAI \
|
2023-01-10 01:26:14 +00:00
|
|
|
&& CUDA_EXT=1 pip install -v --no-cache-dir .
|
2022-06-23 07:12:15 +00:00
|
|
|
|
|
|
|
# install titans
|
2022-06-23 09:34:59 +00:00
|
|
|
RUN pip install --no-cache-dir titans
|
2022-07-21 09:44:00 +00:00
|
|
|
|
|
|
|
# install tensornvme
|
2023-09-01 10:12:34 +00:00
|
|
|
RUN conda install -y cmake && \
|
2022-07-21 09:44:00 +00:00
|
|
|
git clone https://github.com/hpcaitech/TensorNVMe.git && \
|
|
|
|
cd TensorNVMe && \
|
2023-09-01 10:12:34 +00:00
|
|
|
apt update -y && apt install -y libaio-dev && \
|
2022-07-21 09:44:00 +00:00
|
|
|
pip install -r requirements.txt && \
|
2023-03-14 08:28:06 +00:00
|
|
|
pip install -v --no-cache-dir .
|