diff --git a/.github/workflows/dispatch_compatibility_test.yml b/.github/workflows/compatiblity_test_on_dispatch.yml similarity index 98% rename from .github/workflows/dispatch_compatibility_test.yml rename to .github/workflows/compatiblity_test_on_dispatch.yml index ac5669c6f..717cf729b 100644 --- a/.github/workflows/dispatch_compatibility_test.yml +++ b/.github/workflows/compatiblity_test_on_dispatch.yml @@ -1,4 +1,4 @@ -name: Dispatch Compatibility Test +name: Compatibility Test on Dispatch on: workflow_dispatch: diff --git a/.github/workflows/compatiblity_test_on_pr.yml b/.github/workflows/compatiblity_test_on_pr.yml new file mode 100644 index 000000000..2fca67b82 --- /dev/null +++ b/.github/workflows/compatiblity_test_on_pr.yml @@ -0,0 +1,71 @@ +name: Compatibility Test on PR + +on: + pull_request: + paths: + - 'version.txt' + - '.compatibility' + +jobs: + matrix_preparation: + name: Prepare Container List + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v3 + - id: set-matrix + run: | + IFS=',' + DOCKER_IMAGE=() + + while read tag; do + DOCKER_IMAGE+=("\"hpcaitech/pytorch-cuda:${tag}\"") + done <.compatibility + + container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" ) + container="[${container}]" + echo "$container" + echo "::set-output name=matrix::{\"container\":$(echo "$container")}" + + build: + name: Test for PyTorch Compatibility + needs: matrix_preparation + if: github.repository == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, gpu] + strategy: + fail-fast: false + matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} + container: + image: ${{ matrix.container }} + options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 + timeout-minutes: 120 + steps: + - name: Install dependencies + run: | + pip install -U pip setuptools wheel --user + - uses: actions/checkout@v2 + with: + repository: hpcaitech/TensorNVMe + ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} + path: TensorNVMe + - name: Install tensornvme + run: | + cd TensorNVMe + conda install cmake + pip install -r requirements.txt + pip install -v . + - uses: actions/checkout@v2 + with: + ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} + - name: Install Colossal-AI + run: | + pip install -v --no-cache-dir . + pip install -r requirements/requirements-test.txt + - name: Unit Testing + run: | + PYTHONPATH=$PWD pytest tests + env: + DATA: /data/scratch/cifar-10 + NCCL_SHM_DISABLE: 1 + LD_LIBRARY_PATH: /github/home/.tensornvme/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 diff --git a/.github/workflows/auto_compatibility_test.yml b/.github/workflows/compatiblity_test_on_schedule.yml similarity index 95% rename from .github/workflows/auto_compatibility_test.yml rename to .github/workflows/compatiblity_test_on_schedule.yml index 4b026c63e..399f03cc7 100644 --- a/.github/workflows/auto_compatibility_test.yml +++ b/.github/workflows/compatiblity_test_on_schedule.yml @@ -1,10 +1,6 @@ -name: Compatibility Test +name: Compatibility Test on Schedule on: - pull_request: - paths: - - 'version.txt' - - '.compatibility' # run at 03:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00 schedule: - cron: '0 19 * * 6'