[workflow] refactored the example check workflow (#2411)

* [workflow] refactored the example check workflow

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code

* polish code
pull/2413/head
Frank Lee 2023-01-10 11:26:19 +08:00 committed by GitHub
parent 8de8de9fa3
commit 8327932d2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 113 additions and 92 deletions

View File

@ -1,7 +1,7 @@
name: Test Example
on:
pull_request:
# So only the changes in examples folder will trigger jobs below.
# any change in the examples folder will trigger check for the corresponding example.
paths:
- 'examples/**'
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
@ -17,12 +17,14 @@ jobs:
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
name: Check out all files
matrix: ${{ steps.setup-matrix.outputs.matrix }}
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
name: Detect changed example files
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 2
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Get all changed example files
id: changed-files
uses: tj-actions/changed-files@v35
@ -30,46 +32,53 @@ jobs:
with:
since_last_remote_commit: true
- name: setup matrix
id: set-matrix
id: setup-matrix
run: |
changedFileName=""
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
changedFileName="${file}:${changedFileName}"
done
echo "$changedFileName was changed"
res=`python .github/workflows/scripts/changed_example.py --fileNameList $changedFileName`
echo "All changed files are $res"
loc=$( IFS=',' ; echo "${res[*]}" )
echo "$loc"
echo "::set-output name=matrix::{\"loc\":$(echo "$loc")}"
res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName`
echo "All changed examples are $res"
if [ "$x" = "[]" ]; then
echo "anyChanged=false" >> $GITHUB_OUTPUT
echo "matrix=null" >> $GITHUB_OUTPUT
else
dirs=$( IFS=',' ; echo "${res[*]}" )
echo "anyChanged=true" >> $GITHUB_OUTPUT
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
fi
# If no file is changed, it will prompt an error and shows the matrix do not have value.
check-all-changed-files:
check-changed-example:
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
name: Test each changed example files
name: Test the changed example
needs: detect-changed-example
runs-on: [self-hosted, gpu]
strategy:
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 2
- name: Install dependancies
- name: Install Colossal-AI
run: |
pip install -r ./requirements/requirements.txt
pip install colossalai
- name: List all changed example files
pip install -v .
- name: Test the example
run: |
res=${{ matrix.loc }}
cd "${PWD}/examples/${res}"
example_dir=${{ matrix.directory }}
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1
# This is for all files' weekly check. Specifically, this job is to find all the directories.
matrix_preparation:
@ -77,20 +86,20 @@ jobs:
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'schedule'
name: Prepare Directory List for All files
name: Prepare matrix for weekly check
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
matrix: ${{ steps.setup-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: setup matrix
id: set-matrix
id: setup-matrix
run: |
res=`python .github/workflows/scripts/weekly_check_example.py`
res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
all_loc=$( IFS=',' ; echo "${res[*]}" )
echo "$all_loc"
echo "::set-output name=matrix::{\"all_loc\":$(echo "$all_loc")}"
echo "Found the examples: $all_loc"
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
weekly_check:
if: |
@ -104,16 +113,18 @@ jobs:
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install the requirements
- name: Install Colossal-AI
run: |
pip install -r ./requirements/requirements.txt
pip install colossalai
pip install -v .
- name: Traverse all files
run: |
dir=${{ matrix.all_loc }}
echo "${dir} is current directory"
cd "${PWD}/examples/${dir}"
example_dir=${{ matrix.diretory }}
echo "Testing ${example_dir} now"
cd "${PWD}/examples/${example_dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1

View File

@ -8,7 +8,7 @@ on:
required: true
jobs:
manual_check_matrix_preparation:
matrix_preparation:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
@ -16,31 +16,24 @@ jobs:
name: Check the examples user want
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix-1.outputs.matrix }}
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Get manual directories
id: set-matrix-1
- name: Set up matrix
id: set-matrix
env:
check_dir: ${{ inputs.example_directory }}
run: |
all_mannual_check_dir=()
for cdi in $check_dir
do
all_mannual_check_dir+=("\"${cdi}\"")
done
man_loc=$( IFS=',' ; echo "${all_mannual_check_dir[*]}" )
res=`python .github/workflows/scripts/input_check_example.py --fileNameList $man_loc`
echo "${res} is file existance. 1 for all exist, -1 for at least one file not exist."
if [ res == -1 ];then
exit(1)
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
if [ res == "failure" ];then
exit -1
fi
man_loc="[${man_loc}]"
echo "$man_loc"
echo "::set-output name=matrix::{\"man_loc\":$(echo "$man_loc")}"
dirs="[${check_dir}]"
echo "Testing examples in $dirs"
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
manual_check:
test_example:
if: |
github.event.pull_request.draft == false &&
github.base_ref == 'main' &&
@ -52,16 +45,19 @@ jobs:
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
container:
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
options: --gpus all --rm -v /data/scratch/examples-data:/data/
timeout-minutes: 10
steps:
- name: 📚 Checkout
uses: actions/checkout@v3
- name: Install the requirements
- name: Install Colossal-AI
run: |
pip install -r ./requirements/requirements.txt
pip install colossalai
- name: Traverse all files
pip install -v .
- name: Test the example
run: |
dir=${{ matrix.man_loc }}
echo "${dir} is current directory"
dir=${{ matrix.directory }}
echo "Testing ${dir} now"
cd "${PWD}/examples/${dir}"
bash test_ci.sh
env:
NCCL_SHM_DISABLE: 1

View File

@ -0,0 +1,27 @@
import argparse
import os
def check_inputs(input_list):
for path in input_list:
real_path = os.path.join('examples', path)
if not os.path.exists(real_path):
return False
return True
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
args = parser.parse_args()
name_list = args.fileNameList.split(",")
is_correct = check_inputs(name_list)
if is_correct:
print('success')
else:
print('failure')
if __name__ == '__main__':
main()

View File

@ -5,9 +5,9 @@ def show_files(path, all_files):
# Traverse all the folder/file in current directory
file_list = os.listdir(path)
# Determine the element is folder or file. If file, pass it into list, if folder, recurse.
for file in file_list:
for file_name in file_list:
# Get the abs directory using os.path.join() and store into cur_path.
cur_path = os.path.join(path, file)
cur_path = os.path.join(path, file_name)
# Determine whether folder
if os.path.isdir(cur_path):
show_files(cur_path, all_files)
@ -26,9 +26,8 @@ def main():
for file_loc in contents:
split_loc = file_loc.split('/')
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
if len(split_loc) - split_loc.index('examples') >= 3:
tmp_loc = split_loc[(split_loc.index('examples') + 1):(split_loc.index('examples') + 3)]
re_loc = join(tmp_loc, '/')
if len(split_loc) >= 4:
re_loc = '/'.join(split_loc[1:3])
if re_loc not in all_loc:
all_loc.append(re_loc)
print(all_loc)

View File

@ -3,14 +3,19 @@ import argparse
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--fileNameList', type=str)
parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
args = parser.parse_args()
name_list = args.fileNameList.split(":")
folder_need_check = set()
for loc in name_list:
# Find only the sub-folder of 'example' folder
# Find only the sub-sub-folder of 'example' folder
# the examples folder structure is like
# - examples
# - area
# - application
# - file
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
folder_need_check.add(loc.split("/")[1] + "/" + loc.split("/")[2])
folder_need_check.add('/'.join(loc.split("/")[1:3]))
# Output the result using print. Then the shell can get the values.
print(list(folder_need_check))

View File

@ -1,23 +0,0 @@
import argparse
import os
def detect_correct(loc_li):
for loc in loc_li:
real_loc = 'examples/' + eval(loc)
if not os.path.exists(real_loc):
return -1
return 1
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--fileNameList', type=str)
args = parser.parse_args()
name_list = args.fileNameList.split(",")
result = detect_correct(name_list)
print(result)
if __name__ == '__main__':
main()

View File

@ -6,8 +6,8 @@ from colossalai.amp import AMP_TYPE
BATCH_SIZE = 256
LEARNING_RATE = 3e-3
WEIGHT_DECAY = 0.3
NUM_EPOCHS = 10
WARMUP_EPOCHS = 3
NUM_EPOCHS = 2
WARMUP_EPOCHS = 1
# model config
IMG_SIZE = 224

View File

@ -1,2 +1,3 @@
colossalai >= 0.1.12
torch >= 1.8.1
titans

View File

@ -0,0 +1,5 @@
#!/bin/bash
set -euxo pipefail
pip install -r requirements.txt
torchrun --standalone --nproc_per_node 4 train.py --config config.py -s

View File

@ -98,9 +98,9 @@ def main():
root = os.environ.get('DATA', '../data')
if args.synthetic:
# if we use synthetic dataset
# we train for 30 steps and eval for 10 steps per epoch
train_dataloader = DummyDataloader(length=30, batch_size=gpc.config.BATCH_SIZE)
test_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
# we train for 10 steps and eval for 5 steps per epoch
train_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
test_dataloader = DummyDataloader(length=5, batch_size=gpc.config.BATCH_SIZE)
else:
train_dataloader, test_dataloader = build_cifar(gpc.config.BATCH_SIZE, root, pad_if_needed=True)