[test] add no master test for low level zero plugin (#4934)

pull/4990/head
Zhongkai Zhao 2023-10-18 11:41:23 +08:00 committed by GitHub
parent 1f5d2e8062
commit c7aa319ba0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 3 deletions

View File

@ -9,7 +9,8 @@ from .nvme_optimizer import NVMeOptimizer
class CPUAdam(NVMeOptimizer): class CPUAdam(NVMeOptimizer):
"""Implements Adam algorithm. """
Implements Adam algorithm.
Supports parameters updating on both GPU and CPU, depending on the device of parameters. Supports parameters updating on both GPU and CPU, depending on the device of parameters.
But the parameters and gradients should on the same device: But the parameters and gradients should on the same device:

View File

@ -106,7 +106,8 @@ def exam_zero_1_2():
@parameterize("dtype", [torch.float16, torch.bfloat16]) @parameterize("dtype", [torch.float16, torch.bfloat16])
def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype): @parameterize("master_weights", [True, False])
def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype, master_weights: bool):
""" """
In this test, two pairs of model and optimizers are created. In this test, two pairs of model and optimizers are created.
1. zero: use sharded optimizer and fp16 parameters 1. zero: use sharded optimizer and fp16 parameters
@ -131,7 +132,11 @@ def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype):
# in `check_sharded_param_consistency.py`, we will test whether # in `check_sharded_param_consistency.py`, we will test whether
# level 1 and 2 will produce exactly the same results # level 1 and 2 will produce exactly the same results
zero_optimizer = LowLevelZeroOptimizer( zero_optimizer = LowLevelZeroOptimizer(
zero_optimizer, overlap_communication=True, initial_scale=1, reduce_bucket_size=1024 * 1024 zero_optimizer,
overlap_communication=True,
initial_scale=1,
reduce_bucket_size=1024 * 1024,
master_weights=master_weights,
) )
torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=1) torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=1)