From 4d582893a79b72a878e8fac52b3282799e404636 Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Mon, 6 Feb 2023 17:07:41 +0800 Subject: [PATCH] [workflow] added cuda extension build test before release (#2598) * [workflow] added cuda extension build test before release * polish code --- .bdist.json | 24 ----- .cuda_ext.json | 16 +++ .../workflows/cuda_ext_check_before_merge.yml | 42 ++++++++ .github/workflows/release_bdist.yml | 99 ------------------- 4 files changed, 58 insertions(+), 123 deletions(-) delete mode 100644 .bdist.json create mode 100644 .cuda_ext.json create mode 100644 .github/workflows/cuda_ext_check_before_merge.yml delete mode 100644 .github/workflows/release_bdist.yml diff --git a/.bdist.json b/.bdist.json deleted file mode 100644 index 8693bca48..000000000 --- a/.bdist.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "build": [ - { - "torch_version": "1.11.0", - "cuda_image": "hpcaitech/cuda-conda:10.2" - }, - { - "torch_version": "1.11.0", - "cuda_image": "hpcaitech/cuda-conda:11.3" - }, - { - "torch_version": "1.12.1", - "cuda_image": "hpcaitech/cuda-conda:10.2" - }, - { - "torch_version": "1.12.1", - "cuda_image": "hpcaitech/cuda-conda:11.3" - }, - { - "torch_version": "1.12.1", - "cuda_image": "hpcaitech/cuda-conda:11.6" - } - ] -} diff --git a/.cuda_ext.json b/.cuda_ext.json new file mode 100644 index 000000000..eba19cf05 --- /dev/null +++ b/.cuda_ext.json @@ -0,0 +1,16 @@ +{ + "build": [ + { + "torch_command": "pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102", + "cuda_image": "hpcaitech/cuda-conda:10.2" + }, + { + "torch_command": "pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113", + "cuda_image": "hpcaitech/cuda-conda:11.3" + }, + { + "torch_command": "pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116", + "cuda_image": "hpcaitech/cuda-conda:11.6" + } + ] +} diff --git a/.github/workflows/cuda_ext_check_before_merge.yml b/.github/workflows/cuda_ext_check_before_merge.yml new file mode 100644 index 000000000..eba5bb98e --- /dev/null +++ b/.github/workflows/cuda_ext_check_before_merge.yml @@ -0,0 +1,42 @@ +name: Check CUDA Extension Build Before Merge + +on: + workflow_dispatch: + pull_request: + paths: + - 'version.txt' + +jobs: + matrix_preparation: + name: Prepare Container List + if: github.repository == 'hpcaitech/ColossalAI' + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v3 + + - id: set-matrix + run: | + cuda_ext=$(cat .cuda_ext.json | tr '\n' ' ') + echo "matrix=${cuda_ext}" >> $GITHUB_OUTPUT + + build: + name: Release bdist wheels + needs: matrix_preparation + runs-on: [self-hosted, gpu] + strategy: + fail-fast: false + matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} + container: + image: ${{ matrix.build.cuda_image }} + options: --gpus all --rm + steps: + - uses: actions/checkout@v2 + + - name: Install PyTorch + run: eval ${{ matrix.build.torch_command }} + + - name: Build + run: | + CUDA_EXT=1 pip install -v . diff --git a/.github/workflows/release_bdist.yml b/.github/workflows/release_bdist.yml deleted file mode 100644 index c9c51df8d..000000000 --- a/.github/workflows/release_bdist.yml +++ /dev/null @@ -1,99 +0,0 @@ -name: Release bdist wheel - -on: - workflow_dispatch: - inputs: - torch_version: - type: string - description: torch version, separated by comma - required: true - default: "all" - cuda_version: - type: string - description: cuda version, separated by comma - required: true - github_ref: - type: string - description: Branch or Tag - default: 'main' - required: true - -jobs: - matrix_preparation: - name: Prepare Container List - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - id: set-matrix - env: - TORCH_VERSIONS: ${{ inputs.torch_version }} - CUDA_VERSIONS: ${{ inputs.cuda_version }} - run: | - echo $TORCH_VERSIONS - echo $CUDA_VERSIONS - IFS=',' - DOCKER_IMAGE=() - - for cv in $CUDA_VERSIONS - do - DOCKER_IMAGE+=("\"hpcaitech/cuda-conda:${cv}\"") - done - - container=$( IFS=',' ; echo "${DOCKER_IMAGE[*]}" ) - container="[${container}]" - echo "$container" - echo "::set-output name=matrix::{\"container\":$(echo "$container")}" - - build: - name: Release bdist wheels - needs: matrix_preparation - if: github.repository == 'hpcaitech/ColossalAI' && contains(fromJson('["FrankLeeeee", "ver217", "feifeibear", "kurisusnowdeng"]'), github.actor) - runs-on: [self-hosted, gpu] - strategy: - fail-fast: false - matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}} - container: - image: ${{ matrix.container }} - options: --gpus all --rm - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - # cub is for cuda 10.2 - - name: Copy scripts and checkout - run: | - cp -r ./.github/workflows/scripts/* ./ - - # link the cache diretories to current path - ln -s /github/home/conda_pkgs ./conda_pkgs - ln -s /github/home/pip_wheels ./pip_wheels - - # set the conda package path - echo "pkgs_dirs:\n - $PWD/conda_pkgs" > ~/.condarc - - # set safe directory - git config --global --add safe.directory /__w/ColossalAI/ColossalAI - - # check out - git checkout $git_ref - - # get cub package for cuda 10.2 - wget https://github.com/NVIDIA/cub/archive/refs/tags/1.8.0.zip - unzip 1.8.0.zip - env: - git_ref: ${{ github.event.inputs.github_ref }} - - name: Build bdist wheel - run: | - pip install beautifulsoup4 requests packaging - python ./build_colossalai_wheel.py --torch_version $TORCH_VERSIONS - env: - TORCH_VERSIONS: ${{ inputs.torch_version }} - - name: 🚀 Deploy - uses: garygrossgarten/github-action-scp@release - with: - local: all_dist - remote: ${{ secrets.PRIVATE_PYPI_DIR }} - host: ${{ secrets.PRIVATE_PYPI_HOST }} - username: ${{ secrets.PRIVATE_PYPI_USER }} - password: ${{ secrets.PRIVATE_PYPI_PASSWD }}