From d383449fc4300ae3caf9cf481fc87bb4757f00a4 Mon Sep 17 00:00:00 2001
From: Wenxuan Tan <wenxuan.tan@wisc.edu>
Date: Tue, 27 Aug 2024 10:12:21 +0800
Subject: [PATCH] [CI] Remove triton version for compatibility bug; update req
 torch >=2.2 (#6018)

* remove triton version

* remove torch 2.2

* remove torch 2.1

* debug

* remove 2.1 build tests

* require torch >=2.2

---------

Co-authored-by: Edenzzzz <wtan45@wisc.edu>
---
 .compatibility                                  | 1 -
 .github/workflows/build_on_pr.yml               | 2 +-
 .github/workflows/build_on_schedule.yml         | 2 +-
 .github/workflows/doc_test_on_pr.yml            | 2 +-
 .github/workflows/doc_test_on_schedule.yml      | 2 +-
 .github/workflows/example_check_on_dispatch.yml | 2 +-
 .github/workflows/example_check_on_pr.yml       | 2 +-
 .github/workflows/example_check_on_schedule.yml | 2 +-
 .github/workflows/run_chatgpt_examples.yml      | 2 +-
 .github/workflows/run_chatgpt_unit_tests.yml    | 2 +-
 .github/workflows/run_colossalqa_unit_tests.yml | 2 +-
 README.md                                       | 2 +-
 requirements/requirements.txt                   | 2 +-
 13 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/.compatibility b/.compatibility
index 62d19faff..e1836506a 100644
--- a/.compatibility
+++ b/.compatibility
@@ -1,4 +1,3 @@
-2.1.0-12.1.0
 2.2.2-12.1.0
 2.3.0-12.1.0
 2.4.0-12.4.1
diff --git a/.github/workflows/build_on_pr.yml b/.github/workflows/build_on_pr.yml
index 58cd88268..ceb33c9ac 100644
--- a/.github/workflows/build_on_pr.yml
+++ b/.github/workflows/build_on_pr.yml
@@ -89,7 +89,7 @@ jobs:
     if: needs.detect.outputs.anyLibraryFileChanged == 'true'
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /dev/shm -v /data/scratch:/data/scratch
     timeout-minutes: 90
     defaults:
diff --git a/.github/workflows/build_on_schedule.yml b/.github/workflows/build_on_schedule.yml
index fc688a71b..f8ca07d97 100644
--- a/.github/workflows/build_on_schedule.yml
+++ b/.github/workflows/build_on_schedule.yml
@@ -12,7 +12,7 @@ jobs:
     if: github.repository == 'hpcaitech/ColossalAI'
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /dev/shm -v /data/scratch/:/data/scratch/
     timeout-minutes: 90
     steps:
diff --git a/.github/workflows/doc_test_on_pr.yml b/.github/workflows/doc_test_on_pr.yml
index 31c421846..2e0ff6a59 100644
--- a/.github/workflows/doc_test_on_pr.yml
+++ b/.github/workflows/doc_test_on_pr.yml
@@ -56,7 +56,7 @@ jobs:
     needs: detect-changed-doc
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm
     timeout-minutes: 30
     defaults:
diff --git a/.github/workflows/doc_test_on_schedule.yml b/.github/workflows/doc_test_on_schedule.yml
index e2491e460..3ea6481f9 100644
--- a/.github/workflows/doc_test_on_schedule.yml
+++ b/.github/workflows/doc_test_on_schedule.yml
@@ -12,7 +12,7 @@ jobs:
     name: Test the changed Doc
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm
     timeout-minutes: 60
     steps:
diff --git a/.github/workflows/example_check_on_dispatch.yml b/.github/workflows/example_check_on_dispatch.yml
index d877b06ce..6a65c4ff5 100644
--- a/.github/workflows/example_check_on_dispatch.yml
+++ b/.github/workflows/example_check_on_dispatch.yml
@@ -45,7 +45,7 @@ jobs:
       fail-fast: false
       matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
     timeout-minutes: 15
     steps:
diff --git a/.github/workflows/example_check_on_pr.yml b/.github/workflows/example_check_on_pr.yml
index 7a906738c..af8da0383 100644
--- a/.github/workflows/example_check_on_pr.yml
+++ b/.github/workflows/example_check_on_pr.yml
@@ -90,7 +90,7 @@ jobs:
       fail-fast: false
       matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
     timeout-minutes: 30
     concurrency:
diff --git a/.github/workflows/example_check_on_schedule.yml b/.github/workflows/example_check_on_schedule.yml
index 6ec1b0591..bc98e0b0c 100644
--- a/.github/workflows/example_check_on_schedule.yml
+++ b/.github/workflows/example_check_on_schedule.yml
@@ -34,7 +34,7 @@ jobs:
       fail-fast: false
       matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm
     timeout-minutes: 30
     steps:
diff --git a/.github/workflows/run_chatgpt_examples.yml b/.github/workflows/run_chatgpt_examples.yml
index b7522ffbd..262def229 100644
--- a/.github/workflows/run_chatgpt_examples.yml
+++ b/.github/workflows/run_chatgpt_examples.yml
@@ -19,7 +19,7 @@ jobs:
       github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data --shm-size=10.24gb
     timeout-minutes: 60
     defaults:
diff --git a/.github/workflows/run_chatgpt_unit_tests.yml b/.github/workflows/run_chatgpt_unit_tests.yml
index c0e74ecbb..21545098a 100644
--- a/.github/workflows/run_chatgpt_unit_tests.yml
+++ b/.github/workflows/run_chatgpt_unit_tests.yml
@@ -19,7 +19,7 @@ jobs:
       github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       options: --gpus all --rm -v /data/scratch/examples-data:/data/scratch/examples-data
     timeout-minutes: 30
     defaults:
diff --git a/.github/workflows/run_colossalqa_unit_tests.yml b/.github/workflows/run_colossalqa_unit_tests.yml
index 00944b92d..326ef4526 100644
--- a/.github/workflows/run_colossalqa_unit_tests.yml
+++ b/.github/workflows/run_colossalqa_unit_tests.yml
@@ -19,7 +19,7 @@ jobs:
       github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
     runs-on: [self-hosted, gpu]
     container:
-      image: hpcaitech/pytorch-cuda:2.1.0-12.1.0
+      image: hpcaitech/pytorch-cuda:2.2.2-12.1.0
       volumes:
         - /data/scratch/test_data_colossalqa:/data/scratch/test_data_colossalqa
         - /data/scratch/llama-tiny:/data/scratch/llama-tiny
diff --git a/README.md b/README.md
index 69506e338..22c565b50 100644
--- a/README.md
+++ b/README.md
@@ -420,7 +420,7 @@ Please visit our [documentation](https://www.colossalai.org/) and [examples](htt
 ## Installation
 
 Requirements:
-- PyTorch >= 2.1
+- PyTorch >= 2.2
 - Python >= 3.7
 - CUDA >= 11.0
 - [NVIDIA GPU Compute Capability](https://developer.nvidia.com/cuda-gpus) >= 7.0 (V100/RTX20 and higher)
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 578122d47..b77a33b0a 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -8,7 +8,7 @@ click
 fabric
 contexttimer
 ninja
-torch>=2.1.0,<=2.4.0
+torch>=2.2.0,<=2.4.0
 safetensors
 einops
 pydantic