diff --git a/.github/workflows/dispatch_example_check.yml b/.github/workflows/dispatch_example_check.yml deleted file mode 100644 index e0333422f..000000000 --- a/.github/workflows/dispatch_example_check.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: Manual Test Example -on: - workflow_dispatch: - inputs: - example_directory: - type: string - description: example directory, separated by space. For example, language/gpt, images/vit. Simply input language or simply gpt does not work. - required: true - -jobs: - matrix_preparation: - if: | - github.event.pull_request.draft == false && - github.base_ref == 'main' && - github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' - name: Check the examples user want - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - name: 📚 Checkout - uses: actions/checkout@v3 - - name: Set up matrix - id: set-matrix - env: - check_dir: ${{ inputs.example_directory }} - run: | - res=`python .github/workflows/scripts/example_checks/check_dispatch_inputs.py --fileNameList $check_dir` - if [ res == "failure" ];then - exit -1 - fi - dirs="[${check_dir}]" - echo "Testing examples in $dirs" - echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT - - test_example: - if: | - github.event.pull_request.draft == false && - github.base_ref == 'main' && - github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' - name: Manually check example files - needs: manual_check_matrix_preparation - runs-on: [self-hosted, gpu] - strategy: - matrix: ${{fromJson(needs.manual_check_matrix_preparation.outputs.matrix)}} - container: - image: hpcaitech/pytorch-cuda:1.12.0-11.3.0 - options: --gpus all --rm -v /data/scratch/examples-data:/data/ - timeout-minutes: 10 - steps: - - name: 📚 Checkout - uses: actions/checkout@v3 - - name: Install Colossal-AI - run: | - pip install -v . - - name: Test the example - run: | - dir=${{ matrix.directory }} - echo "Testing ${dir} now" - cd "${PWD}/examples/${dir}" - bash test_ci.sh - env: - NCCL_SHM_DISABLE: 1 diff --git a/.github/workflows/example_check_on_schedule.yml b/.github/workflows/example_check_on_schedule.yml index 07424ecbe..9d8dcbbb5 100644 --- a/.github/workflows/example_check_on_schedule.yml +++ b/.github/workflows/example_check_on_schedule.yml @@ -3,13 +3,12 @@ on: # run at 00:00 of every Sunday(singapore time) so here is UTC time Saturday 16:00 schedule: - cron: '0 16 * * 6' + workflow_dispatch: jobs: # This is for all files' weekly check. Specifically, this job is to find all the directories. matrix_preparation: - if: | - github.repository == 'hpcaitech/ColossalAI' && - github.event_name == 'schedule' + if: github.repository == 'hpcaitech/ColossalAI' name: Prepare matrix for weekly check runs-on: ubuntu-latest outputs: @@ -27,9 +26,7 @@ jobs: echo "matrix={\"directory\":$(echo "$all_loc")}" >> $GITHUB_OUTPUT weekly_check: - if: | - github.repository == 'hpcaitech/ColossalAI' && - github.event_name == 'schedule' + if: github.repository == 'hpcaitech/ColossalAI' name: Weekly check all examples needs: matrix_preparation runs-on: [self-hosted, gpu] @@ -55,3 +52,18 @@ jobs: bash test_ci.sh env: NCCL_SHM_DISABLE: 1 + + - name: Notify Lark + id: message-preparation + if: ${{ failure() }} + run: | + url=$SERVER_URL/$REPO/actions/runs/$RUN_ID + msg="Example tests failed for $EXAMPLE_DIR, please visit $url for details" + echo $msg + python .github/workflows/scripts/send_message_to_lark.py -m "$msg" -u $WEBHOOK_URL + env: + SERVER_URL: ${{github.server_url }} + REPO: ${{ github.repository }} + RUN_ID: ${{ github.run_id }} + WEBHOOK_URL: ${{ secrets.LARK_NOTIFICATION_WEBHOOK_URL }} + EXAMPLE_DIR: ${{ matrix.diretory }}