mirror of https://github.com/hpcaitech/ColossalAI
[workflow] hook example test alert with lark (#2585)
parent
788e138960
commit
186ddce2c4
|
@ -1,63 +0,0 @@
|
|||
name: Manual Test Example
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
example_directory:
|
||||
type: string
|
||||
description: example directory, separated by space. For example, language/gpt, images/vit. Simply input language or simply gpt does not work.
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
matrix_preparation:
|
||||
if: |
|
||||
github.event.pull_request.draft == false &&
|
||||
github.base_ref == 'main' &&
|
||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||
name: Check the examples user want
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: 📚 Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up matrix
|
||||
id: set-matrix
|
||||
env:
|
||||
check_dir: ${{ inputs.example_directory }}
|
||||
run: |
|
||||
res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir`
|
||||
if [ res == "failure" ];then
|
||||
exit -1
|
||||
fi
|
||||
dirs="[${check_dir}]"
|
||||
echo "Testing examples in $dirs"
|
||||
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT
|
||||
|
||||
test_example:
|
||||
if: |
|
||||
github.event.pull_request.draft == false &&
|
||||
github.base_ref == 'main' &&
|
||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||
name: Manually check example files
|
||||
needs: manual_check_matrix_preparation
|
||||
runs-on: [self-hosted, gpu]
|
||||
strategy:
|
||||
matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}}
|
||||
container:
|
||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||
options: --gpus all --rm -v /data/scratch/examples-data:/data/
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- name: 📚 Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Install Colossal-AI
|
||||
run: |
|
||||
pip install -v .
|
||||
- name: Test the example
|
||||
run: |
|
||||
dir=${{ matrix.directory }}
|
||||
echo "Testing ${dir} now"
|
||||
cd "${PWD}/examples/${dir}"
|
||||
bash test_ci.sh
|
||||
env:
|
||||
NCCL_SHM_DISABLE: 1
|
|
@ -3,13 +3,12 @@ on:
|
|||
# run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00
|
||||
schedule:
|
||||
- cron: '0 16 * * 6'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
# This is for all files' weekly check. Specifically, this job is to find all the directories.
|
||||
matrix_preparation:
|
||||
if: |
|
||||
github.repository == 'hpcaitech/ColossalAI' &&
|
||||
github.event_name == 'schedule'
|
||||
if: github.repository == 'hpcaitech/ColossalAI'
|
||||
name: Prepare matrix for weekly check
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
|
@ -27,9 +26,7 @@ jobs:
|
|||
echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT
|
||||
|
||||
weekly_check:
|
||||
if: |
|
||||
github.repository == 'hpcaitech/ColossalAI' &&
|
||||
github.event_name == 'schedule'
|
||||
if: github.repository == 'hpcaitech/ColossalAI'
|
||||
name: Weekly check all examples
|
||||
needs: matrix_preparation
|
||||
runs-on: [self-hosted, gpu]
|
||||
|
@ -55,3 +52,18 @@ jobs:
|
|||
bash test_ci.sh
|
||||
env:
|
||||
NCCL_SHM_DISABLE: 1
|
||||
|
||||
- name: Notify Lark
|
||||
id: message-preparation
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
url=$SERVER_URL/$REPO/actions/runs/$RUN_ID
|
||||
msg="Example tests failed for $EXAMPLE_DIR, please visit $url for details"
|
||||
echo $msg
|
||||
python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL
|
||||
env:
|
||||
SERVER_URL: ${{github.server_url }}
|
||||
REPO: ${{ github.repository }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }}
|
||||
EXAMPLE_DIR: ${{ matrix.diretory }}
|
||||
|
|
Loading…
Reference in New Issue