Browse Source

[CI] Remove triton version for compatibility bug; update req torch >=2.2 (#6018)

* remove triton version

* remove torch 2.2

* remove torch 2.1

* debug

* remove 2.1 build tests

* require torch >=2.2

---------

Co-authored-by: Edenzzzz <wtan45@wisc.edu>
pull/6036/head
Wenxuan Tan 3 months ago committed by GitHub
parent
commit
d383449fc4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      .compatibility
  2. 2
      .github/workflows/build_on_pr.yml
  3. 2
      .github/workflows/build_on_schedule.yml
  4. 2
      .github/workflows/doc_test_on_pr.yml
  5. 2
      .github/workflows/doc_test_on_schedule.yml
  6. 2
      .github/workflows/example_check_on_dispatch.yml
  7. 2
      .github/workflows/example_check_on_pr.yml
  8. 2
      .github/workflows/example_check_on_schedule.yml
  9. 2
      .github/workflows/run_chatgpt_examples.yml
  10. 2
      .github/workflows/run_chatgpt_unit_tests.yml
  11. 2
      .github/workflows/run_colossalqa_unit_tests.yml
  12. 2
      README.md
  13. 2
      requirements/requirements.txt

1
.compatibility

@ -1,4 +1,3 @@
2.1.0-12.1.0
2.2.2-12.1.0 2.2.2-12.1.0
2.3.0-12.1.0 2.3.0-12.1.0
2.4.0-12.4.1 2.4.0-12.4.1

2
.github/workflows/build_on_pr.yml

@ -89,7 +89,7 @@ jobs:
if: needs.detect.outputs.anyLibraryFileChanged == 'true' if: needs.detect.outputs.anyLibraryFileChanged == 'true'
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch
timeout-minutes: 90 timeout-minutes: 90
defaults: defaults:

2
.github/workflows/build_on_schedule.yml

@ -12,7 +12,7 @@ jobs:
if: github.repository == 'hpcaitech/ColossalAI' if: github.repository == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/ options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/
timeout-minutes: 90 timeout-minutes: 90
steps: steps:

2
.github/workflows/doc_test_on_pr.yml

@ -56,7 +56,7 @@ jobs:
needs: detect-changed-doc needs: detect-changed-doc
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm options: --gpus all --rm
timeout-minutes: 30 timeout-minutes: 30
defaults: defaults:

2
.github/workflows/doc_test_on_schedule.yml

@ -12,7 +12,7 @@ jobs:
name: Test the changed Doc name: Test the changed Doc
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm options: --gpus all --rm
timeout-minutes: 60 timeout-minutes: 60
steps: steps:

2
.github/workflows/example_check_on_dispatch.yml

@ -45,7 +45,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}} matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 15 timeout-minutes: 15
steps: steps:

2
.github/workflows/example_check_on_pr.yml

@ -90,7 +90,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}} matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 30 timeout-minutes: 30
concurrency: concurrency:

2
.github/workflows/example_check_on_schedule.yml

@ -34,7 +34,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
timeout-minutes: 30 timeout-minutes: 30
steps: steps:

2
.github/workflows/run_chatgpt_examples.yml

@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb
timeout-minutes: 60 timeout-minutes: 60
defaults: defaults:

2
.github/workflows/run_chatgpt_unit_tests.yml

@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data
timeout-minutes: 30 timeout-minutes: 30
defaults: defaults:

2
.github/workflows/run_colossalqa_unit_tests.yml

@ -19,7 +19,7 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, gpu] runs-on: [self-hosted, gpu]
container: container:
image: hpcaitech/pytorch-cuda:2.1.0-12.1.0 image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
volumes: volumes:
- /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa - /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa
- /data/scratch/llama-tiny:/data/scratch/llama-tiny - /data/scratch/llama-tiny:/data/scratch/llama-tiny

2
README.md

@ -420,7 +420,7 @@ Please visit our [documentation](https://www.colossalai.org/) and [examples](htt
## Installation ## Installation
Requirements: Requirements:
- PyTorch >= 2.1 - PyTorch >= 2.2
- Python >= 3.7 - Python >= 3.7
- CUDA >= 11.0 - CUDA >= 11.0
- [NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus) >= 7.0 (V100/RTX20 and higher) - [NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus) >= 7.0 (V100/RTX20 and higher)

2
requirements/requirements.txt

@ -8,7 +8,7 @@ click
fabric fabric
contexttimer contexttimer
ninja ninja
torch>=2.1.0,<=2.4.0 torch>=2.2.0,<=2.4.0
safetensors safetensors
einops einops
pydantic pydantic

Loading…
Cancel
Save