From deee317b0ff54eeec2e03b10dc2a0e7b7b0d9ea3 Mon Sep 17 00:00:00 2001 From: Jiarui Fang Date: Tue, 13 Dec 2022 16:34:10 +0800 Subject: [PATCH] [Gemini] test step-tensor mapping using repeated_computed_layers.py (#2127) --- tests/components_to_test/__init__.py | 7 ++++++- ...ted_computed_layer.py => repeated_computed_layers.py} | 0 tests/test_gemini/update/test_gemini_use_rmt.py | 9 ++++++++- 3 files changed, 14 insertions(+), 2 deletions(-) rename tests/components_to_test/{repeated_computed_layer.py => repeated_computed_layers.py} (100%) diff --git a/tests/components_to_test/__init__.py b/tests/components_to_test/__init__.py index dc27d3607..e498786fb 100644 --- a/tests/components_to_test/__init__.py +++ b/tests/components_to_test/__init__.py @@ -4,10 +4,15 @@ from . import ( hanging_param_model, inline_op_model, nested_model, - repeated_computed_layer, + repeated_computed_layers, resnet, simple_net, ) from .utils import run_fwd_bwd from . import albert # isort:skip + +__all__ = [ + 'bert', 'gpt2', 'hanging_param_model', 'inline_op_model', 'nested_model', 'repeated_computed_layers', 'resnet', + 'simple_net', 'run_fwd_bwd', 'albert' +] diff --git a/tests/components_to_test/repeated_computed_layer.py b/tests/components_to_test/repeated_computed_layers.py similarity index 100% rename from tests/components_to_test/repeated_computed_layer.py rename to tests/components_to_test/repeated_computed_layers.py diff --git a/tests/test_gemini/update/test_gemini_use_rmt.py b/tests/test_gemini/update/test_gemini_use_rmt.py index 518c22fdb..82439144b 100644 --- a/tests/test_gemini/update/test_gemini_use_rmt.py +++ b/tests/test_gemini/update/test_gemini_use_rmt.py @@ -23,7 +23,7 @@ from tests.test_tensor.common_utils import set_seed @parameterize('placement_policy', ['auto']) @parameterize('keep_gather', [False]) -@parameterize('model_name', ['bert', 'albert', 'gpt2']) +@parameterize('model_name', ['repeated_computed_layers', 'bert', 'albert', 'gpt2']) @parameterize('use_grad_checkpoint', [False, True]) def run_gemini_use_rmt(placement_policy, keep_gather, model_name: str, use_grad_checkpoint: bool = False): set_seed(42) @@ -49,6 +49,13 @@ def run_gemini_use_rmt(placement_policy, keep_gather, model_name: str, use_grad_ print('runtime tracer: ', runtime_tracer_non_model_data) print([memstats.param_used_timestep(p) for p in model.parameters()]) + if model_name == 'repeated_computed_layers': + for idx, p in enumerate(model.parameters()): + step_list = memstats.param_used_timestep(p) + if idx < 4: + assert len(step_list) == 4 + + world_size = torch.distributed.get_world_size() config_dict, _ = search_chunk_configuration(model, search_range_mb=1, search_interval_byte=100) config_dict[world_size]['chunk_size'] = 5000