mirror of https://github.com/hpcaitech/ColossalAI
[devops] fix extention building (#5427)
parent
822241a99c
commit
070df689e6
|
@ -1,16 +1,16 @@
|
|||
{
|
||||
"build": [
|
||||
{
|
||||
"torch_command": "pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102",
|
||||
"cuda_image": "hpcaitech/cuda-conda:10.2"
|
||||
"torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121",
|
||||
"cuda_image": "hpcaitech/cuda-conda:12.1"
|
||||
},
|
||||
{
|
||||
"torch_command": "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113",
|
||||
"cuda_image": "hpcaitech/cuda-conda:11.3"
|
||||
"torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118",
|
||||
"cuda_image": "hpcaitech/cuda-conda:11.8"
|
||||
},
|
||||
{
|
||||
"torch_command": "pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116",
|
||||
"cuda_image": "hpcaitech/cuda-conda:11.6"
|
||||
"torch_command": "pip install torch==2.0.0 torchvision==0.15.1 torchaudio==2.0.1",
|
||||
"cuda_image": "hpcaitech/cuda-conda:11.7"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -83,7 +83,7 @@ jobs:
|
|||
fi
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
pip install -r requirements/requirements-test.txt
|
||||
- name: Unit Testing
|
||||
run: |
|
||||
|
|
|
@ -78,7 +78,7 @@ jobs:
|
|||
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
pip install -r requirements/requirements-test.txt
|
||||
- name: Unit Testing
|
||||
run: |
|
||||
|
|
|
@ -75,7 +75,7 @@ jobs:
|
|||
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
pip install -r requirements/requirements-test.txt
|
||||
|
||||
- name: Unit Testing
|
||||
|
|
|
@ -51,4 +51,4 @@ jobs:
|
|||
|
||||
- name: Build
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
|
|
|
@ -89,7 +89,7 @@ jobs:
|
|||
- name: Install ColossalAI
|
||||
run: |
|
||||
source activate pytorch
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
|
||||
- name: Test the Doc
|
||||
run: |
|
||||
|
|
|
@ -32,7 +32,7 @@ jobs:
|
|||
|
||||
- name: Install ColossalAI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
|
||||
- name: Install Doc Test Requirements
|
||||
run: |
|
||||
|
|
|
@ -53,7 +53,7 @@ jobs:
|
|||
uses: actions/checkout@v3
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
- name: Test the example
|
||||
run: |
|
||||
dir=${{ matrix.directory }}
|
||||
|
|
|
@ -88,7 +88,7 @@ jobs:
|
|||
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
|
||||
- name: Test the example
|
||||
run: |
|
||||
|
|
|
@ -42,7 +42,7 @@ jobs:
|
|||
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
CUDA_EXT=1 pip install -v .
|
||||
BUILD_EXT=1 pip install -v .
|
||||
|
||||
- name: Traverse all files
|
||||
run: |
|
||||
|
|
|
@ -76,7 +76,7 @@ def check_installation():
|
|||
click.echo("")
|
||||
click.echo(f"Note:")
|
||||
click.echo(
|
||||
f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable CUDA_EXT=1 is set"
|
||||
f"1. AOT (ahead-of-time) compilation of the CUDA kernels occurs during installation when the environment variable BUILD_EXT=1 is set"
|
||||
)
|
||||
click.echo(f"2. If AOT compilation is not enabled, stay calm as the CUDA kernels can still be built during runtime")
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ conda install -c conda-forge cupy cudnn cutensor nccl cuda-version=11.6
|
|||
|
||||
# install colossalai with PyTorch extensions
|
||||
cd <path_to_ColossalAI_repo>
|
||||
CUDA_EXT=1 pip install -e .
|
||||
BUILD_EXT=1 pip install -e .
|
||||
|
||||
# install other dependencies
|
||||
pip install triton==2.0.0.dev20221202
|
||||
|
|
|
@ -25,7 +25,7 @@ conda install -c "nvidia/label/cuda-11.6.2" cuda-toolkit
|
|||
cd <path_to_ColossalAI_repo>
|
||||
pip install -r requirements/requirements.txt
|
||||
pip install -r requirements/requirements-test.txt
|
||||
CUDA_EXT=1 pip install -e .
|
||||
BUILD_EXT=1 pip install -e .
|
||||
|
||||
# install torchserve
|
||||
cd <path_to_torch_serve_repo>
|
||||
|
|
|
@ -38,7 +38,7 @@ ARG VERSION=main
|
|||
RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git && \
|
||||
cd ./ColossalAI && \
|
||||
git checkout 3e05c07bb8921f2a8f9736b6f6673d4e9f1697d0 && \
|
||||
CUDA_EXT=1 pip install -v --no-cache-dir .
|
||||
BUILD_EXT=1 pip install -v --no-cache-dir .
|
||||
|
||||
# install titans
|
||||
RUN pip install --no-cache-dir titans
|
||||
|
|
|
@ -78,7 +78,7 @@ class CPUAdam(NVMeOptimizer):
|
|||
super(CPUAdam, self).__init__(model_params, default_args, nvme_offload_fraction, nvme_offload_dir)
|
||||
self.adamw_mode = adamw_mode
|
||||
cpu_adam = CPUAdamLoader().load()
|
||||
# if you find yourself stuck here, make sure that you install colossalai with CUDA_EXT=1 specification
|
||||
# if you find yourself stuck here, make sure that you install colossalai with BUILD_EXT=1 specification
|
||||
self.cpu_adam_op = cpu_adam.CPUAdamOptimizer(lr, betas[0], betas[1], eps, weight_decay, adamw_mode)
|
||||
|
||||
def torch_adam_update(
|
||||
|
|
|
@ -37,7 +37,7 @@ RUN git clone https://github.com/NVIDIA/apex && \
|
|||
ARG VERSION=main
|
||||
RUN git clone -b ${VERSION} https://github.com/hpcaitech/ColossalAI.git \
|
||||
&& cd ./ColossalAI \
|
||||
&& CUDA_EXT=1 pip install -v --no-cache-dir .
|
||||
&& BUILD_EXT=1 pip install -v --no-cache-dir .
|
||||
|
||||
# install titans
|
||||
RUN pip install --no-cache-dir titans
|
||||
|
|
|
@ -146,25 +146,25 @@ Colossal-AI 为您提供了一系列并行组件。我们的目标是让您的
|
|||
[[HuggingFace model weights]](https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-13b-base)
|
||||
[[Modelscope model weights]](https://www.modelscope.cn/models/colossalai/Colossal-LLaMA-2-13b-base/summary)
|
||||
|
||||
| Model | Backbone | Tokens Consumed | MMLU (5-shot) | CMMLU (5-shot)| AGIEval (5-shot) | GAOKAO (0-shot) | CEval (5-shot) |
|
||||
| :----------------------------: | :--------: | :-------------: | :------------------: | :-----------: | :--------------: | :-------------: | :-------------: |
|
||||
| Baichuan-7B | - | 1.2T | 42.32 (42.30) | 44.53 (44.02) | 38.72 | 36.74 | 42.80 |
|
||||
| Baichuan-13B-Base | - | 1.4T | 50.51 (51.60) | 55.73 (55.30) | 47.20 | 51.41 | 53.60 |
|
||||
| Baichuan2-7B-Base | - | 2.6T | 46.97 (54.16) | 57.67 (57.07) | 45.76 | 52.60 | 54.00 |
|
||||
| Baichuan2-13B-Base | - | 2.6T | 54.84 (59.17) | 62.62 (61.97) | 52.08 | 58.25 | 58.10 |
|
||||
| ChatGLM-6B | - | 1.0T | 39.67 (40.63) | 41.17 (-) | 40.10 | 36.53 | 38.90 |
|
||||
| ChatGLM2-6B | - | 1.4T | 44.74 (45.46) | 49.40 (-) | 46.36 | 45.49 | 51.70 |
|
||||
| InternLM-7B | - | 1.6T | 46.70 (51.00) | 52.00 (-) | 44.77 | 61.64 | 52.80 |
|
||||
| Qwen-7B | - | 2.2T | 54.29 (56.70) | 56.03 (58.80) | 52.47 | 56.42 | 59.60 |
|
||||
| Llama-2-7B | - | 2.0T | 44.47 (45.30) | 32.97 (-) | 32.60 | 25.46 | - |
|
||||
| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B | 1.0T | 37.43 | 29.92 | 32.00 | 27.57 | - |
|
||||
| wenge-research/yayi-7b-llama2 | Llama-2-7B | - | 38.56 | 31.52 | 30.99 | 25.95 | - |
|
||||
| ziqingyang/chinese-llama-2-7b | Llama-2-7B | - | 33.86 | 34.69 | 34.52 | 25.18 | 34.2 |
|
||||
| TigerResearch/tigerbot-7b-base | Llama-2-7B | 0.3T | 43.73 | 42.04 | 37.64 | 30.61 | - |
|
||||
| LinkSoul/Chinese-Llama-2-7b | Llama-2-7B | - | 48.41 | 38.31 | 38.45 | 27.72 | - |
|
||||
| FlagAlpha/Atom-7B | Llama-2-7B | 0.1T | 49.96 | 41.10 | 39.83 | 33.00 | - |
|
||||
| IDEA-CCNL/Ziya-LLaMA-13B-v1.1 | Llama-13B | 0.11T | 50.25 | 40.99 | 40.04 | 30.54 | - |
|
||||
| **Colossal-LLaMA-2-7b-base** | Llama-2-7B | **0.0085T** | 53.06 | 49.89 | 51.48 | 58.82 | 50.2 |
|
||||
| Model | Backbone | Tokens Consumed | MMLU (5-shot) | CMMLU (5-shot) | AGIEval (5-shot) | GAOKAO (0-shot) | CEval (5-shot) |
|
||||
|:------------------------------:|:----------:|:---------------:|:-------------:|:--------------:|:----------------:|:---------------:|:--------------:|
|
||||
| Baichuan-7B | - | 1.2T | 42.32 (42.30) | 44.53 (44.02) | 38.72 | 36.74 | 42.80 |
|
||||
| Baichuan-13B-Base | - | 1.4T | 50.51 (51.60) | 55.73 (55.30) | 47.20 | 51.41 | 53.60 |
|
||||
| Baichuan2-7B-Base | - | 2.6T | 46.97 (54.16) | 57.67 (57.07) | 45.76 | 52.60 | 54.00 |
|
||||
| Baichuan2-13B-Base | - | 2.6T | 54.84 (59.17) | 62.62 (61.97) | 52.08 | 58.25 | 58.10 |
|
||||
| ChatGLM-6B | - | 1.0T | 39.67 (40.63) | 41.17 (-) | 40.10 | 36.53 | 38.90 |
|
||||
| ChatGLM2-6B | - | 1.4T | 44.74 (45.46) | 49.40 (-) | 46.36 | 45.49 | 51.70 |
|
||||
| InternLM-7B | - | 1.6T | 46.70 (51.00) | 52.00 (-) | 44.77 | 61.64 | 52.80 |
|
||||
| Qwen-7B | - | 2.2T | 54.29 (56.70) | 56.03 (58.80) | 52.47 | 56.42 | 59.60 |
|
||||
| Llama-2-7B | - | 2.0T | 44.47 (45.30) | 32.97 (-) | 32.60 | 25.46 | - |
|
||||
| Linly-AI/Chinese-LLaMA-2-7B-hf | Llama-2-7B | 1.0T | 37.43 | 29.92 | 32.00 | 27.57 | - |
|
||||
| wenge-research/yayi-7b-llama2 | Llama-2-7B | - | 38.56 | 31.52 | 30.99 | 25.95 | - |
|
||||
| ziqingyang/chinese-llama-2-7b | Llama-2-7B | - | 33.86 | 34.69 | 34.52 | 25.18 | 34.2 |
|
||||
| TigerResearch/tigerbot-7b-base | Llama-2-7B | 0.3T | 43.73 | 42.04 | 37.64 | 30.61 | - |
|
||||
| LinkSoul/Chinese-Llama-2-7b | Llama-2-7B | - | 48.41 | 38.31 | 38.45 | 27.72 | - |
|
||||
| FlagAlpha/Atom-7B | Llama-2-7B | 0.1T | 49.96 | 41.10 | 39.83 | 33.00 | - |
|
||||
| IDEA-CCNL/Ziya-LLaMA-13B-v1.1 | Llama-13B | 0.11T | 50.25 | 40.99 | 40.04 | 30.54 | - |
|
||||
| **Colossal-LLaMA-2-7b-base** | Llama-2-7B | **0.0085T** | 53.06 | 49.89 | 51.48 | 58.82 | 50.2 |
|
||||
|
||||
|
||||
### ColossalChat
|
||||
|
@ -406,10 +406,10 @@ pip install colossalai
|
|||
|
||||
**注:目前只支持Linux。**
|
||||
|
||||
但是,如果你想在安装时就直接构建PyTorch扩展,您可以设置环境变量`CUDA_EXT=1`.
|
||||
但是,如果你想在安装时就直接构建PyTorch扩展,您可以设置环境变量`BUILD_EXT=1`.
|
||||
|
||||
```bash
|
||||
CUDA_EXT=1 pip install colossalai
|
||||
BUILD_EXT=1 pip install colossalai
|
||||
```
|
||||
|
||||
**否则,PyTorch扩展只会在你实际需要使用他们时在运行时里被构建。**
|
||||
|
@ -438,7 +438,7 @@ pip install .
|
|||
我们默认在`pip install`时不安装PyTorch扩展,而是在运行时临时编译,如果你想要提前安装这些扩展的话(在使用融合优化器时会用到),可以使用一下命令。
|
||||
|
||||
```shell
|
||||
CUDA_EXT=1 pip install .
|
||||
BUILD_EXT=1 pip install .
|
||||
```
|
||||
|
||||
<p align="right">(<a href="#top">返回顶端</a>)</p>
|
||||
|
|
|
@ -42,7 +42,7 @@ pip install -r requirements/requirements.txt
|
|||
BUILD_EXT=1 pip install .
|
||||
```
|
||||
|
||||
If you don't want to install and enable CUDA kernel fusion (compulsory installation when using fused optimizer), just don't specify the `CUDA_EXT`:
|
||||
If you don't want to install and enable CUDA kernel fusion (compulsory installation when using fused optimizer), just don't specify the `BUILD_EXT`:
|
||||
|
||||
```shell
|
||||
pip install .
|
||||
|
|
|
@ -77,7 +77,7 @@ git clone https://github.com/hpcaitech/ColossalAI.git
|
|||
cd ColossalAI
|
||||
|
||||
# install colossalai
|
||||
CUDA_EXT=1 pip install .
|
||||
BUILD_EXT=1 pip install .
|
||||
```
|
||||
|
||||
#### Step 3: Accelerate with flash attention by xformers (Optional)
|
||||
|
|
|
@ -8,7 +8,7 @@ conda activate ldm
|
|||
conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
|
||||
pip install transformers diffusers invisible-watermark
|
||||
|
||||
CUDA_EXT=1 pip install colossalai
|
||||
BUILD_EXT=1 pip install colossalai
|
||||
|
||||
wget https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ We follow the hyperparameter settings from the original LLaMA paper. We use Adam
|
|||
Please install the latest ColossalAI from source.
|
||||
|
||||
```bash
|
||||
CUDA_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI
|
||||
BUILD_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI
|
||||
```
|
||||
|
||||
Then install other dependencies.
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
Please install the latest ColossalAI from source.
|
||||
|
||||
```bash
|
||||
CUDA_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI
|
||||
BUILD_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI
|
||||
```
|
||||
|
||||
Then install dependencies.
|
||||
|
|
|
@ -154,7 +154,7 @@ def check_cuda_availability():
|
|||
def set_cuda_arch_list(cuda_dir):
|
||||
"""
|
||||
This function sets the PyTorch TORCH_CUDA_ARCH_LIST variable for ahead-of-time extension compilation.
|
||||
Ahead-of-time compilation occurs when CUDA_EXT=1 is set when running 'pip install'.
|
||||
Ahead-of-time compilation occurs when BUILD_EXT=1 is set when running 'pip install'.
|
||||
"""
|
||||
cuda_available = check_cuda_availability()
|
||||
|
||||
|
|
2
setup.py
2
setup.py
|
@ -70,7 +70,7 @@ def get_version() -> str:
|
|||
if BUILD_EXT:
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ModuleNotFoundError(
|
||||
"[extension] PyTorch is not found while CUDA_EXT=1. You need to install PyTorch first in order to build CUDA extensions"
|
||||
"[extension] PyTorch is not found while BUILD_EXT=1. You need to install PyTorch first in order to build CUDA extensions"
|
||||
)
|
||||
|
||||
from extensions import ALL_EXTENSIONS
|
||||
|
|
Loading…
Reference in New Issue