From 868afdb31191ef7b3fa48d6fa71e7758c8707786 Mon Sep 17 00:00:00 2001 From: Wang Binluo <32676639+wangbluo@users.noreply.github.com> Date: Wed, 26 Jun 2024 16:07:06 +0800 Subject: [PATCH] Dev/zero offload (#5858) * fix llama * fix llama --- colossalai/shardformer/modeling/llama.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/colossalai/shardformer/modeling/llama.py b/colossalai/shardformer/modeling/llama.py index 5855dcc4f..d8ea2c74d 100644 --- a/colossalai/shardformer/modeling/llama.py +++ b/colossalai/shardformer/modeling/llama.py @@ -803,8 +803,6 @@ def get_lm_forward_with_dist_cross_entropy(shard_config: ShardConfig): shift_logits, shift_labels, process_group=shard_config.tensor_parallel_process_group, - vocab_size=self.lm_head.out_features, - dtype=self.model.dtype, ) if not return_dict: