From 26329163291e2f8142494bece2db5548aef469c9 Mon Sep 17 00:00:00 2001 From: wangbluo <2538539015@qq.com> Date: Wed, 1 May 2024 09:23:43 +0000 Subject: [PATCH] remove useless code --- colossalai/shardformer/modeling/mistral.py | 1 - colossalai/shardformer/policies/mistral.py | 1 - 2 files changed, 2 deletions(-) diff --git a/colossalai/shardformer/modeling/mistral.py b/colossalai/shardformer/modeling/mistral.py index 642fa3b40..796aeca51 100644 --- a/colossalai/shardformer/modeling/mistral.py +++ b/colossalai/shardformer/modeling/mistral.py @@ -270,7 +270,6 @@ class MistralForwards: shift_labels = labels[..., 1:].contiguous() # Flatten the tokens loss_fct = CrossEntropyLoss() - #shift_logits = shift_logits.view(-1, self.config.vocab_size) shift_labels = shift_labels.view(-1) # Enable model parallelism shift_labels = shift_labels.to(shift_logits.device) diff --git a/colossalai/shardformer/policies/mistral.py b/colossalai/shardformer/policies/mistral.py index 362fd11e5..936fd2d24 100644 --- a/colossalai/shardformer/policies/mistral.py +++ b/colossalai/shardformer/policies/mistral.py @@ -277,7 +277,6 @@ class MistralForCausalLMPolicy(MistralPolicy): suffix="lm_head", target_module=VocabParallelLMHead1D, kwargs={ - #gather_output=True, "gather_output": not self.shard_config.parallel_output, "make_vocab_size_divisible_by": self.shard_config.make_vocab_size_divisible_by, },