From cc0dfddcbc8ec09033583b870c35901aabf44a4e Mon Sep 17 00:00:00 2001 From: duanjunwen <935724073@qq.com> Date: Fri, 25 Oct 2024 09:01:13 +0000 Subject: [PATCH] [fix] fix test_shard_llama --- colossalai/shardformer/policies/llama.py | 38 +++++++++++-------- examples/language/llama/benchmark.py | 1 - .../test_schedule/test_zerobubble_pp.py | 4 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/colossalai/shardformer/policies/llama.py b/colossalai/shardformer/policies/llama.py index 8a980bf9d..28ac2dc7f 100644 --- a/colossalai/shardformer/policies/llama.py +++ b/colossalai/shardformer/policies/llama.py @@ -394,8 +394,8 @@ class LlamaForCausalLMPolicy(LlamaPolicy): return held_layers def get_shared_params(self) -> List[Dict[int, Tensor]]: - # if self.pipeline_stage_manager is not None and self.pipeline_stage_manager.use_zbv: - # return [] + if self.pipeline_stage_manager is not None and self.pipeline_stage_manager.use_zbv: + return [] llama_model = self.model.model if self.pipeline_stage_manager and self.pipeline_stage_manager.num_stages > 1: if ( @@ -403,20 +403,26 @@ class LlamaForCausalLMPolicy(LlamaPolicy): and self.pipeline_stage_manager.num_stages > 1 ): # tie weights - if self.pipeline_stage_manager.use_zbv: - return [ - { - 0: llama_model.embed_tokens.weight, - 0: self.model.lm_head.weight, - } - ] - else: - return [ - { - 0: llama_model.embed_tokens.weight, - self.pipeline_stage_manager.num_stages - 1: self.model.lm_head.weight, - } - ] + return [ + { + 0: llama_model.embed_tokens.weight, + self.pipeline_stage_manager.num_stages - 1: self.model.lm_head.weight, + } + ] + # if self.pipeline_stage_manager.use_zbv: + # return [ + # { + # 0: llama_model.embed_tokens.weight, + # 0: self.model.lm_head.weight, + # } + # ] + # else: + # return [ + # { + # 0: llama_model.embed_tokens.weight, + # self.pipeline_stage_manager.num_stages - 1: self.model.lm_head.weight, + # } + # ] return [] diff --git a/examples/language/llama/benchmark.py b/examples/language/llama/benchmark.py index 0d80bc225..b60bdd03e 100644 --- a/examples/language/llama/benchmark.py +++ b/examples/language/llama/benchmark.py @@ -237,7 +237,6 @@ def main(): ).get_v_schedule() else: scheduler_nodes = None - # print(f"{dist.get_rank()} {scheduler_nodes[]} ") plugin = HybridParallelPlugin( tp_size=args.tp, diff --git a/tests/test_pipeline/test_schedule/test_zerobubble_pp.py b/tests/test_pipeline/test_schedule/test_zerobubble_pp.py index 5f286d173..c485d3f54 100644 --- a/tests/test_pipeline/test_schedule/test_zerobubble_pp.py +++ b/tests/test_pipeline/test_schedule/test_zerobubble_pp.py @@ -923,9 +923,9 @@ def run_with_booster_moehybridplugin(config: Tuple[int, ...]): @parameterize( "config", [ - # (1, 2, 2, 1), # Pass + (1, 2, 2, 1), # Pass # TODO: only support pp + tp accleration; Will support fully pp and None tp Hybrid in furture; - (0, 4, 1, 1), + # (0, 4, 1, 1), # (1, 2, 1, 2), # (1, 1, 2, 2), ],