From d4ff644ef32f354f510e49cdaaa6a04690238971 Mon Sep 17 00:00:00 2001 From: wangbluo <2538539015@qq.com> Date: Thu, 20 Jun 2024 09:04:57 +0000 Subject: [PATCH] update qwen model --- colossalai/shardformer/modeling/qwen2.py | 4 ++-- colossalai/shardformer/policies/qwen2.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/colossalai/shardformer/modeling/qwen2.py b/colossalai/shardformer/modeling/qwen2.py index e5f2b31e3..2f5521866 100644 --- a/colossalai/shardformer/modeling/qwen2.py +++ b/colossalai/shardformer/modeling/qwen2.py @@ -51,7 +51,7 @@ class Qwen2PipelineForwards: output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states ) - use_cache = use_cache if use_cache is not None else self.config.use_cache + use_cache = False#use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -592,4 +592,4 @@ def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): attentions=outputs.attentions, ) - return forward + return forward \ No newline at end of file diff --git a/colossalai/shardformer/policies/qwen2.py b/colossalai/shardformer/policies/qwen2.py index 933223ba7..7b35b7239 100644 --- a/colossalai/shardformer/policies/qwen2.py +++ b/colossalai/shardformer/policies/qwen2.py @@ -333,4 +333,4 @@ class Qwen2ForSequenceClassificationPolicy(Qwen2Policy): def get_shared_params(self) -> List[Dict[int, Tensor]]: """No shared params in Qwen2 for sequence classification model""" - return [] + return [] \ No newline at end of file