From d383449fc4300ae3caf9cf481fc87bb4757f00a4 Mon Sep 17 00:00:00 2001 From: Wenxuan Tan Date: Tue, 27 Aug 2024 10:12:21 +0800 Subject: [PATCH] [CI] Remove triton version for compatibility bug; update req torch >=2.2 (#6018) * remove triton version * remove torch 2.2 * remove torch 2.1 * debug * remove 2.1 build tests * require torch >=2.2 --------- Co-authored-by: Edenzzzz --- .compatibility | 1 - .github/workflows/build_on_pr.yml | 2 +- .github/workflows/build_on_schedule.yml | 2 +- .github/workflows/doc_test_on_pr.yml | 2 +- .github/workflows/doc_test_on_schedule.yml | 2 +- .github/workflows/example_check_on_dispatch.yml | 2 +- .github/workflows/example_check_on_pr.yml | 2 +- .github/workflows/example_check_on_schedule.yml | 2 +- .github/workflows/run_chatgpt_examples.yml | 2 +- .github/workflows/run_chatgpt_unit_tests.yml | 2 +- .github/workflows/run_colossalqa_unit_tests.yml | 2 +- README.md | 2 +- requirements/requirements.txt | 2 +- 13 files changed, 12 insertions(+), 13 deletions(-) diff --git a/.compatibility b/.compatibility index 62d19faff..e1836506a 100644 --- a/.compatibility +++ b/.compatibility @@ -1,4 +1,3 @@ -2.1.0-12.1.0 2.2.2-12.1.0 2.3.0-12.1.0 2.4.0-12.4.1 diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml index 58cd88268..ceb33c9ac 100644 --- a/.github/workflows/build_on_pr.yml +++ b/.github/workflows/build_on_pr.yml @@ -89,7 +89,7 @@ jobs: if: needs.detect.outputs.anyLibraryFileChanged == 'true' runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch timeout-minutes: 90 defaults: diff --git a/.github/workflows/build_on_schedule.yml b/.github/workflows/build_on_schedule.yml index fc688a71b..f8ca07d97 100644 --- a/.github/workflows/build_on_schedule.yml +++ b/.github/workflows/build_on_schedule.yml @@ -12,7 +12,7 @@ jobs: if: github.repository == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/ timeout-minutes: 90 steps: diff --git a/.github/workflows/doc_test_on_pr.yml b/.github/workflows/doc_test_on_pr.yml index 31c421846..2e0ff6a59 100644 --- a/.github/workflows/doc_test_on_pr.yml +++ b/.github/workflows/doc_test_on_pr.yml @@ -56,7 +56,7 @@ jobs: needs: detect-changed-doc runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm timeout-minutes: 30 defaults: diff --git a/.github/workflows/doc_test_on_schedule.yml b/.github/workflows/doc_test_on_schedule.yml index e2491e460..3ea6481f9 100644 --- a/.github/workflows/doc_test_on_schedule.yml +++ b/.github/workflows/doc_test_on_schedule.yml @@ -12,7 +12,7 @@ jobs: name: Test the changed Doc runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm timeout-minutes: 60 steps: diff --git a/.github/workflows/example_check_on_dispatch.yml b/.github/workflows/example_check_on_dispatch.yml index d877b06ce..6a65c4ff5 100644 --- a/.github/workflows/example_check_on_dispatch.yml +++ b/.github/workflows/example_check_on_dispatch.yml @@ -45,7 +45,7 @@ jobs: fail-fast: false matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}} container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm timeout-minutes: 15 steps: diff --git a/.github/workflows/example_check_on_pr.yml b/.github/workflows/example_check_on_pr.yml index 7a906738c..af8da0383 100644 --- a/.github/workflows/example_check_on_pr.yml +++ b/.github/workflows/example_check_on_pr.yml @@ -90,7 +90,7 @@ jobs: fail-fast: false matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}} container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm timeout-minutes: 30 concurrency: diff --git a/.github/workflows/example_check_on_schedule.yml b/.github/workflows/example_check_on_schedule.yml index 6ec1b0591..bc98e0b0c 100644 --- a/.github/workflows/example_check_on_schedule.yml +++ b/.github/workflows/example_check_on_schedule.yml @@ -34,7 +34,7 @@ jobs: fail-fast: false matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm timeout-minutes: 30 steps: diff --git a/.github/workflows/run_chatgpt_examples.yml b/.github/workflows/run_chatgpt_examples.yml index b7522ffbd..262def229 100644 --- a/.github/workflows/run_chatgpt_examples.yml +++ b/.github/workflows/run_chatgpt_examples.yml @@ -19,7 +19,7 @@ jobs: github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb timeout-minutes: 60 defaults: diff --git a/.github/workflows/run_chatgpt_unit_tests.yml b/.github/workflows/run_chatgpt_unit_tests.yml index c0e74ecbb..21545098a 100644 --- a/.github/workflows/run_chatgpt_unit_tests.yml +++ b/.github/workflows/run_chatgpt_unit_tests.yml @@ -19,7 +19,7 @@ jobs: github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data timeout-minutes: 30 defaults: diff --git a/.github/workflows/run_colossalqa_unit_tests.yml b/.github/workflows/run_colossalqa_unit_tests.yml index 00944b92d..326ef4526 100644 --- a/.github/workflows/run_colossalqa_unit_tests.yml +++ b/.github/workflows/run_colossalqa_unit_tests.yml @@ -19,7 +19,7 @@ jobs: github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' runs-on: [self-hosted, gpu] container: - image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 + image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 volumes: - /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa - /data/scratch/llama-tiny:/data/scratch/llama-tiny diff --git a/README.md b/README.md index 69506e338..22c565b50 100644 --- a/README.md +++ b/README.md @@ -420,7 +420,7 @@ Please visit our [documentation](https://www.colossalai.org/) and [examples](htt ## Installation Requirements: -- PyTorch >= 2.1 +- PyTorch >= 2.2 - Python >= 3.7 - CUDA >= 11.0 - [NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus) >= 7.0 (V100/RTX20 and higher) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 578122d47..b77a33b0a 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -8,7 +8,7 @@ click fabric contexttimer ninja -torch>=2.1.0,<=2.4.0 +torch>=2.2.0,<=2.4.0 safetensors einops pydantic