From 03e52ecba3b60b04b552d82809043e5642509005 Mon Sep 17 00:00:00 2001 From: Frank Lee Date: Fri, 10 Jun 2022 17:38:15 +0800 Subject: [PATCH] [workflow] added regular 8 GPU testing (#1099) * [workflow] added regular 8 GPU testing * polish workflow --- .github/workflows/build_gpu_8.yml | 37 +++++++++++++++++++++++++++ .github/workflows/release_nightly.yml | 14 ++-------- 2 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/build_gpu_8.yml diff --git a/.github/workflows/build_gpu_8.yml b/.github/workflows/build_gpu_8.yml new file mode 100644 index 000000000..410bb2167 --- /dev/null +++ b/.github/workflows/build_gpu_8.yml @@ -0,0 +1,37 @@ +name: Build on 8 GPUs + +on: + schedule: + # run at 00:00 of every Sunday + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + build: + name: Build and Test Colossal-AI + if: | + github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' + runs-on: [self-hosted, 8-gpu] + container: + image: hpcaitech/pytorch-cuda:1.10.1-11.3.0 + options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10 + timeout-minutes: 40 + steps: + - uses: actions/checkout@v2 + with: + ssh-key: ${{ secrets.SSH_KEY_FOR_CI }} + - name: Install Colossal-AI + run: | + [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/ + pip install -r requirements/requirements.txt + pip install -v -e . + cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/ + cp /__w/ColossalAI/ColossalAI/*.so /github/home/cuda_ext_cache/ + pip install -r requirements/requirements-test.txt + - name: Unit Testing + run: | + gpu_used=$(nvidia-smi -i 0 --query-gpu=memory.used --format=csv,noheader,nounits) + [ "$gpu_used" -gt "100" ] && PYTHONPATH=$PWD pytest tests + env: + DATA: /data/scratch/cifar-10 + \ No newline at end of file diff --git a/.github/workflows/release_nightly.yml b/.github/workflows/release_nightly.yml index 9014b1290..0ef942841 100644 --- a/.github/workflows/release_nightly.yml +++ b/.github/workflows/release_nightly.yml @@ -5,16 +5,7 @@ on: # run at 00:00 of every Sunday - cron: '0 0 * * 6' workflow_dispatch: - inputs: - cuda_version: - type: choice - description: CUDA Version - default: "all" - required: true - options: - - "all" - - "11.3" - - "10.2" + jobs: matrix_preparation: name: Prepare Container List @@ -24,8 +15,7 @@ jobs: steps: - id: set-matrix run: | - [ "${{github.event.inputs.cuda_version}}" != "all" ] && matrix="[\"hpcaitech/cuda-conda:${{github.event.inputs.cuda_version}}\"]" - [ "${{github.event.inputs.cuda_version}}" == "all" || "${{github.event.inputs.cuda_version}}" == "" ] && matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]" + matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]" echo $matrix echo "::set-output name=matrix::{\"container\":$(echo $matrix)}"