mirror of https://github.com/hpcaitech/ColossalAI
[workflow] hook example test alert with lark (#2585)
parent
788e138960
commit
186ddce2c4
|
@ -1,63 +0,0 @@
|
||||||
name: Manual Test Example
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
example_directory:
|
|
||||||
type: string
|
|
||||||
description: example directory, separated by space. For example, language/gpt, images/vit. Simply input language or simply gpt does not work.
|
|
||||||
required: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
matrix_preparation:
|
|
||||||
if: |
|
|
||||||
github.event.pull_request.draft == false &&
|
|
||||||
github.base_ref == 'main' &&
|
|
||||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
|
||||||
name: Check the examples user want
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
||||||
steps:
|
|
||||||
- name: 📚 Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
- name: Set up matrix
|
|
||||||
id: set-matrix
|
|
||||||
env:
|
|
||||||
check_dir: ${{ inputs.example_directory }}
|
|
||||||
run: |
|
|
||||||
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
|
|
||||||
if [ res == "failure" ];then
|
|
||||||
exit -1
|
|
||||||
fi
|
|
||||||
dirs="[${check_dir}]"
|
|
||||||
echo "Testing examples in $dirs"
|
|
||||||
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
|
|
||||||
|
|
||||||
test_example:
|
|
||||||
if: |
|
|
||||||
github.event.pull_request.draft == false &&
|
|
||||||
github.base_ref == 'main' &&
|
|
||||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
|
||||||
name: Manually check example files
|
|
||||||
needs: manual_check_matrix_preparation
|
|
||||||
runs-on: [self-hosted, gpu]
|
|
||||||
strategy:
|
|
||||||
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
|
|
||||||
container:
|
|
||||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
|
||||||
options: --gpus all --rm -v /data/scratch/examples-data:/data/
|
|
||||||
timeout-minutes: 10
|
|
||||||
steps:
|
|
||||||
- name: 📚 Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
- name: Install Colossal-AI
|
|
||||||
run: |
|
|
||||||
pip install -v .
|
|
||||||
- name: Test the example
|
|
||||||
run: |
|
|
||||||
dir=${{ matrix.directory }}
|
|
||||||
echo "Testing ${dir} now"
|
|
||||||
cd "${PWD}/examples/${dir}"
|
|
||||||
bash test_ci.sh
|
|
||||||
env:
|
|
||||||
NCCL_SHM_DISABLE: 1
|
|
|
@ -3,13 +3,12 @@ on:
|
||||||
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
|
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '0 16 * * 6'
|
- cron: '0 16 * * 6'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# This is for all files' weekly check. Specifically, this job is to find all the directories.
|
# This is for all files' weekly check. Specifically, this job is to find all the directories.
|
||||||
matrix_preparation:
|
matrix_preparation:
|
||||||
if: |
|
if: github.repository == 'hpcaitech/ColossalAI'
|
||||||
github.repository == 'hpcaitech/ColossalAI' &&
|
|
||||||
github.event_name == 'schedule'
|
|
||||||
name: Prepare matrix for weekly check
|
name: Prepare matrix for weekly check
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
outputs:
|
outputs:
|
||||||
|
@ -27,9 +26,7 @@ jobs:
|
||||||
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
|
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
weekly_check:
|
weekly_check:
|
||||||
if: |
|
if: github.repository == 'hpcaitech/ColossalAI'
|
||||||
github.repository == 'hpcaitech/ColossalAI' &&
|
|
||||||
github.event_name == 'schedule'
|
|
||||||
name: Weekly check all examples
|
name: Weekly check all examples
|
||||||
needs: matrix_preparation
|
needs: matrix_preparation
|
||||||
runs-on: [self-hosted, gpu]
|
runs-on: [self-hosted, gpu]
|
||||||
|
@ -55,3 +52,18 @@ jobs:
|
||||||
bash test_ci.sh
|
bash test_ci.sh
|
||||||
env:
|
env:
|
||||||
NCCL_SHM_DISABLE: 1
|
NCCL_SHM_DISABLE: 1
|
||||||
|
|
||||||
|
- name: Notify Lark
|
||||||
|
id: message-preparation
|
||||||
|
if: ${{ failure() }}
|
||||||
|
run: |
|
||||||
|
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
|
||||||
|
msg="Example tests failed for $EXAMPLE_DIR, please visit $url for details"
|
||||||
|
echo $msg
|
||||||
|
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
|
||||||
|
env:
|
||||||
|
SERVER_URL: ${{github.server_url }}
|
||||||
|
REPO: ${{ github.repository }}
|
||||||
|
RUN_ID: ${{ github.run_id }}
|
||||||
|
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
|
||||||
|
EXAMPLE_DIR: ${{ matrix.diretory }}
|
||||||
|
|
Loading…
Reference in New Issue