mirror of https://github.com/hpcaitech/ColossalAI
[devops] improving testmon cache (#3902)
* [devops] improving testmon cache * [devops] fix branch name with slash * [devops] fix branch name with slash * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] update readmepull/3740/head^2
parent
57a6d7685c
commit
ec9bbc0094
|
@ -43,10 +43,18 @@ I will provide the details of each workflow below.
|
|||
|
||||
| Workflow Name | File name | Description |
|
||||
| ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files. It will run all the unit tests in the repository with 4 GPUs. |
|
||||
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files and a branch is created/deleted. It will run all the unit tests in the repository with 4 GPUs. |
|
||||
| `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. |
|
||||
| `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. |
|
||||
|
||||
To reduce the average time of the unit test on PR, `Build on PR` workflow manages testmon cache.
|
||||
|
||||
1. When creating a new branch, it copies `cache/main/.testmondata*` to `cache/<branch>/`.
|
||||
2. When creating a new PR or change the base branch of a PR, it copies `cache/<base_ref>/.testmondata*` to `cache/_pull/<pr_number>/`.
|
||||
3. When running unit tests for each PR, it restores testmon cache from `cache/_pull/<pr_number>/`. After the test, it stores the cache back to `cache/_pull/<pr_number>/`.
|
||||
4. When a PR is closed, if it's merged, it copies `cache/_pull/<pr_number>/.testmondata*` to `cache/<base_ref>/`. Otherwise, it just removes `cache/_pull/<pr_number>`.
|
||||
5. When a branch is deleted, it removes `cache/<ref>`.
|
||||
|
||||
### Example Test
|
||||
|
||||
| Workflow Name | File name | Description |
|
||||
|
|
|
@ -2,7 +2,7 @@ name: Build on PR
|
|||
|
||||
on:
|
||||
pull_request:
|
||||
types: [synchronize, opened, reopened]
|
||||
types: [synchronize, opened, reopened, ready_for_review, closed, edited]
|
||||
branches:
|
||||
- "main"
|
||||
- "develop"
|
||||
|
@ -18,11 +18,63 @@ on:
|
|||
- "!tests/**.md" # ignore doc change
|
||||
- "pytest.ini" # test config change
|
||||
- "setup.py" # install command change
|
||||
create:
|
||||
delete:
|
||||
|
||||
jobs:
|
||||
prepare_cache:
|
||||
name: Prepare testmon cache
|
||||
if: |
|
||||
github.event_name == 'create' &&
|
||||
github.event.ref_type == 'branch' &&
|
||||
github.event.repository.full_name == 'hpcaitech/ColossalAI'
|
||||
runs-on: [self-hosted, gpu]
|
||||
container:
|
||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||
options: --rm
|
||||
timeout-minutes: 5
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Copy testmon cache
|
||||
run: | # branch name may contain slash, we need to replace it with space
|
||||
export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /")
|
||||
if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then
|
||||
[ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}"
|
||||
fi
|
||||
env:
|
||||
MAIN_BRANCH: ${{ github.event.master_branch }}
|
||||
|
||||
prepare_cache_for_pr:
|
||||
name: Prepare testmon cache for PR
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
(github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) &&
|
||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||
runs-on: [self-hosted, gpu]
|
||||
container:
|
||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||
options: --rm
|
||||
timeout-minutes: 5
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Copy testmon cache
|
||||
run: | # branch name may contain slash, we need to replace it with space
|
||||
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
|
||||
if [ -d "/github/home/testmon_cache/${BASE}" ]; then
|
||||
[ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r "/github/home/testmon_cache/${BASE}" /github/home/testmon_cache/_pull/${PR_NUMBER}
|
||||
fi
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.head.ref }}
|
||||
|
||||
detect:
|
||||
name: Detect file change
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
(github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||
outputs:
|
||||
|
@ -135,9 +187,11 @@ jobs:
|
|||
|
||||
- name: Restore Testmon Cache
|
||||
run: |
|
||||
if [ -d /github/home/testmon_cache ]; then
|
||||
[ ! -z "$(ls -A /github/home/testmon_cache)" ] && cp -p -r /github/home/testmon_cache/.testmondata* /__w/ColossalAI/ColossalAI/
|
||||
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then
|
||||
[ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/
|
||||
fi
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.number }}
|
||||
|
||||
- name: Execute Unit Testing
|
||||
run: |
|
||||
|
@ -149,8 +203,10 @@ jobs:
|
|||
|
||||
- name: Store Testmon Cache
|
||||
run: |
|
||||
[ -d /github/home/testmon_cache ] || mkdir /github/home/testmon_cache
|
||||
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/
|
||||
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER}
|
||||
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.number }}
|
||||
|
||||
- name: Collate artifact
|
||||
env:
|
||||
|
@ -188,3 +244,55 @@ jobs:
|
|||
with:
|
||||
name: report
|
||||
path: report/
|
||||
|
||||
store_cache:
|
||||
name: Store testmon cache for PR
|
||||
if: |
|
||||
github.event_name == 'pull_request' &&
|
||||
github.event.action == 'closed' &&
|
||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||
runs-on: [self-hosted, gpu]
|
||||
container:
|
||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||
options: --rm
|
||||
timeout-minutes: 5
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Store testmon cache if possible
|
||||
if: github.event.pull_request.merged == true
|
||||
run: | # branch name may contain slash, we need to replace it with space
|
||||
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
|
||||
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then
|
||||
[ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/"
|
||||
fi
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Remove testmon cache
|
||||
if: github.event.pull_request.merged != true
|
||||
run: |
|
||||
rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER}
|
||||
env:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
remove_cache:
|
||||
name: Remove testmon cache
|
||||
if: |
|
||||
github.event_name == 'delete' &&
|
||||
github.event.ref_type == 'branch' &&
|
||||
github.event.repository.full_name == 'hpcaitech/ColossalAI'
|
||||
runs-on: [self-hosted, gpu]
|
||||
container:
|
||||
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||
options: --rm
|
||||
timeout-minutes: 5
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
steps:
|
||||
- name: Remove testmon cache
|
||||
run: | # branch name may contain slash, we need to replace it with space
|
||||
export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /")
|
||||
rm -rf "/github/home/testmon_cache/${BASE}"
|
||||
|
|
Loading…
Reference in New Issue