[workflow] added regular 8 GPU testing (#1099)

* [workflow] added regular 8 GPU testing

* polish workflow
pull/1101/head
Frank Lee 2022-06-10 17:38:15 +08:00 committed by GitHub
parent 7f2d2b2b5b
commit 03e52ecba3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 12 deletions

37
.github/workflows/build_gpu_8.yml vendored Normal file
View File

@ -0,0 +1,37 @@
name: Build on 8 GPUs
on:
schedule:
# run at 00:00 of every Sunday
- cron: '0 0 * * *'
workflow_dispatch:
jobs:
build:
name: Build and Test Colossal-AI
if: |
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
runs-on: [self-hosted, 8-gpu]
container:
image: hpcaitech/pytorch-cuda:1.10.1-11.3.0
options: --gpus all --rm -v /data/scratch/cifar-10:/data/scratch/cifar-10
timeout-minutes: 40
steps:
- uses: actions/checkout@v2
with:
ssh-key: ${{ secrets.SSH_KEY_FOR_CI }}
- name: Install Colossal-AI
run: |
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
pip install -r requirements/requirements.txt
pip install -v -e .
cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
cp /__w/ColossalAI/ColossalAI/*.so /github/home/cuda_ext_cache/
pip install -r requirements/requirements-test.txt
- name: Unit Testing
run: |
gpu_used=$(nvidia-smi -i 0 --query-gpu=memory.used --format=csv,noheader,nounits)
[ "$gpu_used" -gt "100" ] && PYTHONPATH=$PWD pytest tests
env:
DATA: /data/scratch/cifar-10

View File

@ -5,16 +5,7 @@ on:
# run at 00:00 of every Sunday
- cron: '0 0 * * 6'
workflow_dispatch:
inputs:
cuda_version:
type: choice
description: CUDA Version
default: "all"
required: true
options:
- "all"
- "11.3"
- "10.2"
jobs:
matrix_preparation:
name: Prepare Container List
@ -24,8 +15,7 @@ jobs:
steps:
- id: set-matrix
run: |
[ "${{github.event.inputs.cuda_version}}" != "all" ] && matrix="[\"hpcaitech/cuda-conda:${{github.event.inputs.cuda_version}}\"]"
[ "${{github.event.inputs.cuda_version}}" == "all" || "${{github.event.inputs.cuda_version}}" == "" ] && matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]"
matrix="[\"hpcaitech/cuda-conda:11.3\", \"hpcaitech/cuda-conda:10.2\"]"
echo $matrix
echo "::set-output name=matrix::{\"container\":$(echo $matrix)}"