mirror of https://github.com/hpcaitech/ColossalAI
[workflow] refactored the example check workflow (#2411)
* [workflow] refactored the example check workflow * polish code * polish code * polish code * polish code * polish code * polish code * polish code * polish code * polish code * polish code * polish codepull/2413/head
parent
8de8de9fa3
commit
8327932d2c
|
@ -1,7 +1,7 @@
|
||||||
name: Test Example
|
name: Test Example
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
# So only the changes in examples folder will trigger jobs below.
|
# any change in the examples folder will trigger check for the corresponding example.
|
||||||
paths:
|
paths:
|
||||||
- 'examples/**'
|
- 'examples/**'
|
||||||
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
|
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
|
||||||
|
@ -17,12 +17,14 @@ jobs:
|
||||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
matrix: ${{ steps.setup-matrix.outputs.matrix }}
|
||||||
name: Check out all files
|
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }}
|
||||||
|
name: Detect changed example files
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
with:
|
with:
|
||||||
fetch-depth: 2
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.pull_request.head.sha }}
|
||||||
- name: Get all changed example files
|
- name: Get all changed example files
|
||||||
id: changed-files
|
id: changed-files
|
||||||
uses: tj-actions/changed-files@v35
|
uses: tj-actions/changed-files@v35
|
||||||
|
@ -30,46 +32,53 @@ jobs:
|
||||||
with:
|
with:
|
||||||
since_last_remote_commit: true
|
since_last_remote_commit: true
|
||||||
- name: setup matrix
|
- name: setup matrix
|
||||||
id: set-matrix
|
id: setup-matrix
|
||||||
run: |
|
run: |
|
||||||
changedFileName=""
|
changedFileName=""
|
||||||
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
|
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do
|
||||||
changedFileName="${file}:${changedFileName}"
|
changedFileName="${file}:${changedFileName}"
|
||||||
done
|
done
|
||||||
echo "$changedFileName was changed"
|
echo "$changedFileName was changed"
|
||||||
res=`python .github/workflows/scripts/changed_example.py --fileNameList $changedFileName`
|
res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName`
|
||||||
echo "All changed files are $res"
|
echo "All changed examples are $res"
|
||||||
loc=$( IFS=',' ; echo "${res[*]}" )
|
|
||||||
echo "$loc"
|
if [ "$x" = "[]" ]; then
|
||||||
echo "::set-output name=matrix::{\"loc\":$(echo "$loc")}"
|
echo "anyChanged=false" >> $GITHUB_OUTPUT
|
||||||
|
echo "matrix=null" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
dirs=$( IFS=',' ; echo "${res[*]}" )
|
||||||
|
echo "anyChanged=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
# If no file is changed, it will prompt an error and shows the matrix do not have value.
|
# If no file is changed, it will prompt an error and shows the matrix do not have value.
|
||||||
check-all-changed-files:
|
check-changed-example:
|
||||||
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
|
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch.
|
||||||
if: |
|
if: |
|
||||||
github.event.pull_request.draft == false &&
|
github.event.pull_request.draft == false &&
|
||||||
github.base_ref == 'main' &&
|
github.base_ref == 'main' &&
|
||||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request'
|
||||||
name: Test each changed example files
|
name: Test the changed example
|
||||||
needs: detect-changed-example
|
needs: detect-changed-example
|
||||||
runs-on: [self-hosted, gpu]
|
runs-on: [self-hosted, gpu]
|
||||||
strategy:
|
strategy:
|
||||||
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
|
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}}
|
||||||
container:
|
container:
|
||||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
options: --gpus all --rm -v /data/scratch/examples-data:/data/
|
||||||
|
timeout-minutes: 10
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
with:
|
- name: Install Colossal-AI
|
||||||
fetch-depth: 2
|
|
||||||
- name: Install dependancies
|
|
||||||
run: |
|
run: |
|
||||||
pip install -r ./requirements/requirements.txt
|
pip install -v .
|
||||||
pip install colossalai
|
- name: Test the example
|
||||||
- name: List all changed example files
|
|
||||||
run: |
|
run: |
|
||||||
res=${{ matrix.loc }}
|
example_dir=${{ matrix.directory }}
|
||||||
cd "${PWD}/examples/${res}"
|
cd "${PWD}/examples/${example_dir}"
|
||||||
bash test_ci.sh
|
bash test_ci.sh
|
||||||
|
env:
|
||||||
|
NCCL_SHM_DISABLE: 1
|
||||||
|
|
||||||
# This is for all files' weekly check. Specifically, this job is to find all the directories.
|
# This is for all files' weekly check. Specifically, this job is to find all the directories.
|
||||||
matrix_preparation:
|
matrix_preparation:
|
||||||
|
@ -77,20 +86,20 @@ jobs:
|
||||||
github.event.pull_request.draft == false &&
|
github.event.pull_request.draft == false &&
|
||||||
github.base_ref == 'main' &&
|
github.base_ref == 'main' &&
|
||||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'schedule'
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'schedule'
|
||||||
name: Prepare Directory List for All files
|
name: Prepare matrix for weekly check
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
matrix: ${{ steps.setup-matrix.outputs.matrix }}
|
||||||
steps:
|
steps:
|
||||||
- name: 📚 Checkout
|
- name: 📚 Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: setup matrix
|
- name: setup matrix
|
||||||
id: set-matrix
|
id: setup-matrix
|
||||||
run: |
|
run: |
|
||||||
res=`python .github/workflows/scripts/weekly_check_example.py`
|
res=`python .github/workflows/scripts/example_checks/check_example_weekly.py`
|
||||||
all_loc=$( IFS=',' ; echo "${res[*]}" )
|
all_loc=$( IFS=',' ; echo "${res[*]}" )
|
||||||
echo "$all_loc"
|
echo "Found the examples: $all_loc"
|
||||||
echo "::set-output name=matrix::{\"all_loc\":$(echo "$all_loc")}"
|
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
weekly_check:
|
weekly_check:
|
||||||
if: |
|
if: |
|
||||||
|
@ -104,16 +113,18 @@ jobs:
|
||||||
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
|
matrix: ${{fromJson(needs.matrix_preparation.outputs.matrix)}}
|
||||||
container:
|
container:
|
||||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
timeout-minutes: 10
|
||||||
steps:
|
steps:
|
||||||
- name: 📚 Checkout
|
- name: 📚 Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Install the requirements
|
- name: Install Colossal-AI
|
||||||
run: |
|
run: |
|
||||||
pip install -r ./requirements/requirements.txt
|
pip install -v .
|
||||||
pip install colossalai
|
|
||||||
- name: Traverse all files
|
- name: Traverse all files
|
||||||
run: |
|
run: |
|
||||||
dir=${{ matrix.all_loc }}
|
example_dir=${{ matrix.diretory }}
|
||||||
echo "${dir} is current directory"
|
echo "Testing ${example_dir} now"
|
||||||
cd "${PWD}/examples/${dir}"
|
cd "${PWD}/examples/${example_dir}"
|
||||||
bash test_ci.sh
|
bash test_ci.sh
|
||||||
|
env:
|
||||||
|
NCCL_SHM_DISABLE: 1
|
|
@ -8,7 +8,7 @@ on:
|
||||||
required: true
|
required: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
manual_check_matrix_preparation:
|
matrix_preparation:
|
||||||
if: |
|
if: |
|
||||||
github.event.pull_request.draft == false &&
|
github.event.pull_request.draft == false &&
|
||||||
github.base_ref == 'main' &&
|
github.base_ref == 'main' &&
|
||||||
|
@ -16,31 +16,24 @@ jobs:
|
||||||
name: Check the examples user want
|
name: Check the examples user want
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
matrix: ${{ steps.set-matrix-1.outputs.matrix }}
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
steps:
|
steps:
|
||||||
- name: 📚 Checkout
|
- name: 📚 Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Get manual directories
|
- name: Set up matrix
|
||||||
id: set-matrix-1
|
id: set-matrix
|
||||||
env:
|
env:
|
||||||
check_dir: ${{ inputs.example_directory }}
|
check_dir: ${{ inputs.example_directory }}
|
||||||
run: |
|
run: |
|
||||||
all_mannual_check_dir=()
|
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
|
||||||
for cdi in $check_dir
|
if [ res == "failure" ];then
|
||||||
do
|
exit -1
|
||||||
all_mannual_check_dir+=("\"${cdi}\"")
|
|
||||||
done
|
|
||||||
man_loc=$( IFS=',' ; echo "${all_mannual_check_dir[*]}" )
|
|
||||||
res=`python .github/workflows/scripts/input_check_example.py --fileNameList $man_loc`
|
|
||||||
echo "${res} is file existance. 1 for all exist, -1 for at least one file not exist."
|
|
||||||
if [ res == -1 ];then
|
|
||||||
exit(1)
|
|
||||||
fi
|
fi
|
||||||
man_loc="[${man_loc}]"
|
dirs="[${check_dir}]"
|
||||||
echo "$man_loc"
|
echo "Testing examples in $dirs"
|
||||||
echo "::set-output name=matrix::{\"man_loc\":$(echo "$man_loc")}"
|
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
manual_check:
|
test_example:
|
||||||
if: |
|
if: |
|
||||||
github.event.pull_request.draft == false &&
|
github.event.pull_request.draft == false &&
|
||||||
github.base_ref == 'main' &&
|
github.base_ref == 'main' &&
|
||||||
|
@ -52,16 +45,19 @@ jobs:
|
||||||
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
|
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
|
||||||
container:
|
container:
|
||||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
options: --gpus all --rm -v /data/scratch/examples-data:/data/
|
||||||
|
timeout-minutes: 10
|
||||||
steps:
|
steps:
|
||||||
- name: 📚 Checkout
|
- name: 📚 Checkout
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Install the requirements
|
- name: Install Colossal-AI
|
||||||
run: |
|
run: |
|
||||||
pip install -r ./requirements/requirements.txt
|
pip install -v .
|
||||||
pip install colossalai
|
- name: Test the example
|
||||||
- name: Traverse all files
|
|
||||||
run: |
|
run: |
|
||||||
dir=${{ matrix.man_loc }}
|
dir=${{ matrix.directory }}
|
||||||
echo "${dir} is current directory"
|
echo "Testing ${dir} now"
|
||||||
cd "${PWD}/examples/${dir}"
|
cd "${PWD}/examples/${dir}"
|
||||||
bash test_ci.sh
|
bash test_ci.sh
|
||||||
|
env:
|
||||||
|
NCCL_SHM_DISABLE: 1
|
|
@ -0,0 +1,27 @@
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def check_inputs(input_list):
|
||||||
|
for path in input_list:
|
||||||
|
real_path = os.path.join('examples', path)
|
||||||
|
if not os.path.exists(real_path):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-f', '--fileNameList', type=str, help="List of file names")
|
||||||
|
args = parser.parse_args()
|
||||||
|
name_list = args.fileNameList.split(",")
|
||||||
|
is_correct = check_inputs(name_list)
|
||||||
|
|
||||||
|
if is_correct:
|
||||||
|
print('success')
|
||||||
|
else:
|
||||||
|
print('failure')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -5,9 +5,9 @@ def show_files(path, all_files):
|
||||||
# Traverse all the folder/file in current directory
|
# Traverse all the folder/file in current directory
|
||||||
file_list = os.listdir(path)
|
file_list = os.listdir(path)
|
||||||
# Determine the element is folder or file. If file, pass it into list, if folder, recurse.
|
# Determine the element is folder or file. If file, pass it into list, if folder, recurse.
|
||||||
for file in file_list:
|
for file_name in file_list:
|
||||||
# Get the abs directory using os.path.join() and store into cur_path.
|
# Get the abs directory using os.path.join() and store into cur_path.
|
||||||
cur_path = os.path.join(path, file)
|
cur_path = os.path.join(path, file_name)
|
||||||
# Determine whether folder
|
# Determine whether folder
|
||||||
if os.path.isdir(cur_path):
|
if os.path.isdir(cur_path):
|
||||||
show_files(cur_path, all_files)
|
show_files(cur_path, all_files)
|
||||||
|
@ -26,9 +26,8 @@ def main():
|
||||||
for file_loc in contents:
|
for file_loc in contents:
|
||||||
split_loc = file_loc.split('/')
|
split_loc = file_loc.split('/')
|
||||||
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
|
# must have two sub-folder levels after examples folder, such as examples/images/vit is acceptable, examples/images/README.md is not, examples/requirements.txt is not.
|
||||||
if len(split_loc) - split_loc.index('examples') >= 3:
|
if len(split_loc) >= 4:
|
||||||
tmp_loc = split_loc[(split_loc.index('examples') + 1):(split_loc.index('examples') + 3)]
|
re_loc = '/'.join(split_loc[1:3])
|
||||||
re_loc = join(tmp_loc, '/')
|
|
||||||
if re_loc not in all_loc:
|
if re_loc not in all_loc:
|
||||||
all_loc.append(re_loc)
|
all_loc.append(re_loc)
|
||||||
print(all_loc)
|
print(all_loc)
|
|
@ -3,14 +3,19 @@ import argparse
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--fileNameList', type=str)
|
parser.add_argument('-f', '--fileNameList', type=str, help="The list of changed files")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
name_list = args.fileNameList.split(":")
|
name_list = args.fileNameList.split(":")
|
||||||
folder_need_check = set()
|
folder_need_check = set()
|
||||||
for loc in name_list:
|
for loc in name_list:
|
||||||
# Find only the sub-folder of 'example' folder
|
# Find only the sub-sub-folder of 'example' folder
|
||||||
|
# the examples folder structure is like
|
||||||
|
# - examples
|
||||||
|
# - area
|
||||||
|
# - application
|
||||||
|
# - file
|
||||||
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
|
if loc.split("/")[0] == "examples" and len(loc.split("/")) >= 4:
|
||||||
folder_need_check.add(loc.split("/")[1] + "/" + loc.split("/")[2])
|
folder_need_check.add('/'.join(loc.split("/")[1:3]))
|
||||||
# Output the result using print. Then the shell can get the values.
|
# Output the result using print. Then the shell can get the values.
|
||||||
print(list(folder_need_check))
|
print(list(folder_need_check))
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
def detect_correct(loc_li):
|
|
||||||
for loc in loc_li:
|
|
||||||
real_loc = 'examples/' + eval(loc)
|
|
||||||
if not os.path.exists(real_loc):
|
|
||||||
return -1
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument('--fileNameList', type=str)
|
|
||||||
args = parser.parse_args()
|
|
||||||
name_list = args.fileNameList.split(",")
|
|
||||||
result = detect_correct(name_list)
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -6,8 +6,8 @@ from colossalai.amp import AMP_TYPE
|
||||||
BATCH_SIZE = 256
|
BATCH_SIZE = 256
|
||||||
LEARNING_RATE = 3e-3
|
LEARNING_RATE = 3e-3
|
||||||
WEIGHT_DECAY = 0.3
|
WEIGHT_DECAY = 0.3
|
||||||
NUM_EPOCHS = 10
|
NUM_EPOCHS = 2
|
||||||
WARMUP_EPOCHS = 3
|
WARMUP_EPOCHS = 1
|
||||||
|
|
||||||
# model config
|
# model config
|
||||||
IMG_SIZE = 224
|
IMG_SIZE = 224
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
colossalai >= 0.1.12
|
colossalai >= 0.1.12
|
||||||
torch >= 1.8.1
|
torch >= 1.8.1
|
||||||
|
titans
|
|
@ -0,0 +1,5 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -euxo pipefail
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
torchrun --standalone --nproc_per_node 4 train.py --config config.py -s
|
|
@ -98,9 +98,9 @@ def main():
|
||||||
root = os.environ.get('DATA', '../data')
|
root = os.environ.get('DATA', '../data')
|
||||||
if args.synthetic:
|
if args.synthetic:
|
||||||
# if we use synthetic dataset
|
# if we use synthetic dataset
|
||||||
# we train for 30 steps and eval for 10 steps per epoch
|
# we train for 10 steps and eval for 5 steps per epoch
|
||||||
train_dataloader = DummyDataloader(length=30, batch_size=gpc.config.BATCH_SIZE)
|
train_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
|
||||||
test_dataloader = DummyDataloader(length=10, batch_size=gpc.config.BATCH_SIZE)
|
test_dataloader = DummyDataloader(length=5, batch_size=gpc.config.BATCH_SIZE)
|
||||||
else:
|
else:
|
||||||
train_dataloader, test_dataloader = build_cifar(gpc.config.BATCH_SIZE, root, pad_if_needed=True)
|
train_dataloader, test_dataloader = build_cifar(gpc.config.BATCH_SIZE, root, pad_if_needed=True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue