[test] add no master test for low level zero plugin (#4934)

pull/4990/head
Zhongkai Zhao 2023-10-18 11:41:23 +08:00 committed by GitHub
parent 1f5d2e8062
commit c7aa319ba0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 3 deletions

View File

@ -9,7 +9,8 @@ from .nvme_optimizer import NVMeOptimizer
class CPUAdam(NVMeOptimizer):
"""Implements Adam algorithm.
"""
Implements Adam algorithm.
Supports parameters updating on both GPU and CPU, depending on the device of parameters.
But the parameters and gradients should on the same device:

View File

@ -106,7 +106,8 @@ def exam_zero_1_2():
@parameterize("dtype", [torch.float16, torch.bfloat16])
def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype):
@parameterize("master_weights", [True, False])
def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype, master_weights: bool):
"""
In this test, two pairs of model and optimizers are created.
1. zero: use sharded optimizer and fp16 parameters
@ -131,7 +132,11 @@ def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype):
# in `check_sharded_param_consistency.py`, we will test whether
# level 1 and 2 will produce exactly the same results
zero_optimizer = LowLevelZeroOptimizer(
zero_optimizer, overlap_communication=True, initial_scale=1, reduce_bucket_size=1024 * 1024
zero_optimizer,
overlap_communication=True,
initial_scale=1,
reduce_bucket_size=1024 * 1024,
master_weights=master_weights,
)
torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=1)