mirror of https://github.com/hpcaitech/ColossalAI
remove useless code
parent
9efc79ef24
commit
2632916329
|
@ -270,7 +270,6 @@ class MistralForwards:
|
||||||
shift_labels = labels[..., 1:].contiguous()
|
shift_labels = labels[..., 1:].contiguous()
|
||||||
# Flatten the tokens
|
# Flatten the tokens
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
#shift_logits = shift_logits.view(-1, self.config.vocab_size)
|
|
||||||
shift_labels = shift_labels.view(-1)
|
shift_labels = shift_labels.view(-1)
|
||||||
# Enable model parallelism
|
# Enable model parallelism
|
||||||
shift_labels = shift_labels.to(shift_logits.device)
|
shift_labels = shift_labels.to(shift_logits.device)
|
||||||
|
|
|
@ -277,7 +277,6 @@ class MistralForCausalLMPolicy(MistralPolicy):
|
||||||
suffix="lm_head",
|
suffix="lm_head",
|
||||||
target_module=VocabParallelLMHead1D,
|
target_module=VocabParallelLMHead1D,
|
||||||
kwargs={
|
kwargs={
|
||||||
#gather_output=True,
|
|
||||||
"gather_output": not self.shard_config.parallel_output,
|
"gather_output": not self.shard_config.parallel_output,
|
||||||
"make_vocab_size_divisible_by": self.shard_config.make_vocab_size_divisible_by,
|
"make_vocab_size_divisible_by": self.shard_config.make_vocab_size_divisible_by,
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in New Issue