mirror of https://github.com/hpcaitech/ColossalAI
[devops] improving testmon cache (#3902)
* [devops] improving testmon cache * [devops] fix branch name with slash * [devops] fix branch name with slash * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] fix edit action * [devops] update readmepull/3740/head^2
parent
57a6d7685c
commit
ec9bbc0094
|
@ -43,10 +43,18 @@ I will provide the details of each workflow below.
|
||||||
|
|
||||||
| Workflow Name | File name | Description |
|
| Workflow Name | File name | Description |
|
||||||
| ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------- | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files. It will run all the unit tests in the repository with 4 GPUs. |
|
| `Build on PR` | `build_on_pr.yml` | This workflow is triggered when a PR changes essential files and a branch is created/deleted. It will run all the unit tests in the repository with 4 GPUs. |
|
||||||
| `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. |
|
| `Build on Schedule` | `build_on_schedule.yml` | This workflow will run the unit tests everyday with 8 GPUs. The result is sent to Lark. |
|
||||||
| `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. |
|
| `Report test coverage` | `report_test_coverage.yml` | This PR will put up a comment to report the test coverage results when `Build` is done. |
|
||||||
|
|
||||||
|
To reduce the average time of the unit test on PR, `Build on PR` workflow manages testmon cache.
|
||||||
|
|
||||||
|
1. When creating a new branch, it copies `cache/main/.testmondata*` to `cache/<branch>/`.
|
||||||
|
2. When creating a new PR or change the base branch of a PR, it copies `cache/<base_ref>/.testmondata*` to `cache/_pull/<pr_number>/`.
|
||||||
|
3. When running unit tests for each PR, it restores testmon cache from `cache/_pull/<pr_number>/`. After the test, it stores the cache back to `cache/_pull/<pr_number>/`.
|
||||||
|
4. When a PR is closed, if it's merged, it copies `cache/_pull/<pr_number>/.testmondata*` to `cache/<base_ref>/`. Otherwise, it just removes `cache/_pull/<pr_number>`.
|
||||||
|
5. When a branch is deleted, it removes `cache/<ref>`.
|
||||||
|
|
||||||
### Example Test
|
### Example Test
|
||||||
|
|
||||||
| Workflow Name | File name | Description |
|
| Workflow Name | File name | Description |
|
||||||
|
|
|
@ -2,7 +2,7 @@ name: Build on PR
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [synchronize, opened, reopened]
|
types: [synchronize, opened, reopened, ready_for_review, closed, edited]
|
||||||
branches:
|
branches:
|
||||||
- "main"
|
- "main"
|
||||||
- "develop"
|
- "develop"
|
||||||
|
@ -18,11 +18,63 @@ on:
|
||||||
- "!tests/**.md" # ignore doc change
|
- "!tests/**.md" # ignore doc change
|
||||||
- "pytest.ini" # test config change
|
- "pytest.ini" # test config change
|
||||||
- "setup.py" # install command change
|
- "setup.py" # install command change
|
||||||
|
create:
|
||||||
|
delete:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
prepare_cache:
|
||||||
|
name: Prepare testmon cache
|
||||||
|
if: |
|
||||||
|
github.event_name == 'create' &&
|
||||||
|
github.event.ref_type == 'branch' &&
|
||||||
|
github.event.repository.full_name == 'hpcaitech/ColossalAI'
|
||||||
|
runs-on: [self-hosted, gpu]
|
||||||
|
container:
|
||||||
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
options: --rm
|
||||||
|
timeout-minutes: 5
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
steps:
|
||||||
|
- name: Copy testmon cache
|
||||||
|
run: | # branch name may contain slash, we need to replace it with space
|
||||||
|
export REF_BRANCH=$(echo ${{ github.event.ref }} | sed "s/\// /")
|
||||||
|
if [ -d /github/home/testmon_cache/${MAIN_BRANCH} ]; then
|
||||||
|
[ ! -z "$(ls -A /github/home/testmon_cache/${MAIN_BRANCH})" ] && cp -p -r /github/home/testmon_cache/${MAIN_BRANCH} "/github/home/testmon_cache/${REF_BRANCH}"
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
MAIN_BRANCH: ${{ github.event.master_branch }}
|
||||||
|
|
||||||
|
prepare_cache_for_pr:
|
||||||
|
name: Prepare testmon cache for PR
|
||||||
|
if: |
|
||||||
|
github.event_name == 'pull_request' &&
|
||||||
|
(github.event.action == 'opened' || github.event.action == 'reopened' || (github.event.action == 'edited' && github.event.changes.base != null)) &&
|
||||||
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||||
|
runs-on: [self-hosted, gpu]
|
||||||
|
container:
|
||||||
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
options: --rm
|
||||||
|
timeout-minutes: 5
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
steps:
|
||||||
|
- name: Copy testmon cache
|
||||||
|
run: | # branch name may contain slash, we need to replace it with space
|
||||||
|
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
|
||||||
|
if [ -d "/github/home/testmon_cache/${BASE}" ]; then
|
||||||
|
[ ! -z "$(ls -A "/github/home/testmon_cache/${BASE}")" ] && mkdir /github/home/testmon_cache/_pull && cp -p -r "/github/home/testmon_cache/${BASE}" /github/home/testmon_cache/_pull/${PR_NUMBER}
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.head.ref }}
|
||||||
|
|
||||||
detect:
|
detect:
|
||||||
name: Detect file change
|
name: Detect file change
|
||||||
if: |
|
if: |
|
||||||
|
github.event_name == 'pull_request' &&
|
||||||
|
(github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.action == 'reopened' || github.event.action == 'ready_for_review') &&
|
||||||
github.event.pull_request.draft == false &&
|
github.event.pull_request.draft == false &&
|
||||||
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||||
outputs:
|
outputs:
|
||||||
|
@ -135,9 +187,11 @@ jobs:
|
||||||
|
|
||||||
- name: Restore Testmon Cache
|
- name: Restore Testmon Cache
|
||||||
run: |
|
run: |
|
||||||
if [ -d /github/home/testmon_cache ]; then
|
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then
|
||||||
[ ! -z "$(ls -A /github/home/testmon_cache)" ] && cp -p -r /github/home/testmon_cache/.testmondata* /__w/ColossalAI/ColossalAI/
|
[ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* /__w/ColossalAI/ColossalAI/
|
||||||
fi
|
fi
|
||||||
|
env:
|
||||||
|
PR_NUMBER: ${{ github.event.number }}
|
||||||
|
|
||||||
- name: Execute Unit Testing
|
- name: Execute Unit Testing
|
||||||
run: |
|
run: |
|
||||||
|
@ -149,8 +203,10 @@ jobs:
|
||||||
|
|
||||||
- name: Store Testmon Cache
|
- name: Store Testmon Cache
|
||||||
run: |
|
run: |
|
||||||
[ -d /github/home/testmon_cache ] || mkdir /github/home/testmon_cache
|
mkdir -p /github/home/testmon_cache/_pull/${PR_NUMBER}
|
||||||
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/
|
cp -p -r /__w/ColossalAI/ColossalAI/.testmondata* /github/home/testmon_cache/_pull/${PR_NUMBER}/
|
||||||
|
env:
|
||||||
|
PR_NUMBER: ${{ github.event.number }}
|
||||||
|
|
||||||
- name: Collate artifact
|
- name: Collate artifact
|
||||||
env:
|
env:
|
||||||
|
@ -188,3 +244,55 @@ jobs:
|
||||||
with:
|
with:
|
||||||
name: report
|
name: report
|
||||||
path: report/
|
path: report/
|
||||||
|
|
||||||
|
store_cache:
|
||||||
|
name: Store testmon cache for PR
|
||||||
|
if: |
|
||||||
|
github.event_name == 'pull_request' &&
|
||||||
|
github.event.action == 'closed' &&
|
||||||
|
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI'
|
||||||
|
runs-on: [self-hosted, gpu]
|
||||||
|
container:
|
||||||
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
options: --rm
|
||||||
|
timeout-minutes: 5
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
steps:
|
||||||
|
- name: Store testmon cache if possible
|
||||||
|
if: github.event.pull_request.merged == true
|
||||||
|
run: | # branch name may contain slash, we need to replace it with space
|
||||||
|
export BASE=$(echo ${{ github.event.pull_request.base.ref }} | sed "s/\// /")
|
||||||
|
if [ -d /github/home/testmon_cache/_pull/${PR_NUMBER} ]; then
|
||||||
|
[ ! -z "$(ls -A /github/home/testmon_cache/_pull/${PR_NUMBER})" ] && cp -p -r /github/home/testmon_cache/_pull/${PR_NUMBER}/.testmondata* "/github/home/testmon_cache/${BASE}/"
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
|
||||||
|
- name: Remove testmon cache
|
||||||
|
if: github.event.pull_request.merged != true
|
||||||
|
run: |
|
||||||
|
rm -rf /github/home/testmon_cache/_pull/${PR_NUMBER}
|
||||||
|
env:
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
|
||||||
|
remove_cache:
|
||||||
|
name: Remove testmon cache
|
||||||
|
if: |
|
||||||
|
github.event_name == 'delete' &&
|
||||||
|
github.event.ref_type == 'branch' &&
|
||||||
|
github.event.repository.full_name == 'hpcaitech/ColossalAI'
|
||||||
|
runs-on: [self-hosted, gpu]
|
||||||
|
container:
|
||||||
|
image: hpcaitech/pytorch-cuda:1.12.0-11.3.0
|
||||||
|
options: --rm
|
||||||
|
timeout-minutes: 5
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: bash
|
||||||
|
steps:
|
||||||
|
- name: Remove testmon cache
|
||||||
|
run: | # branch name may contain slash, we need to replace it with space
|
||||||
|
export BASE=$(echo ${{ github.event.ref }} | sed "s/\// /")
|
||||||
|
rm -rf "/github/home/testmon_cache/${BASE}"
|
||||||
|
|
Loading…
Reference in New Issue