[fp8] fix missing fp8_comm flag in mixtral (#6057)

pull/6063/head
botbw 2 months ago committed by GitHub
parent a35a078f08
commit 696fced0d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -31,6 +31,7 @@ from colossalai.moe._operation import (
all_to_all_uneven, all_to_all_uneven,
) )
from colossalai.pipeline.stage_manager import PipelineStageManager from colossalai.pipeline.stage_manager import PipelineStageManager
from colossalai.quantization.fp8 import all_reduce_fp8
from colossalai.shardformer.layer._operation import ( from colossalai.shardformer.layer._operation import (
all_to_all_comm, all_to_all_comm,
gather_forward_split_backward, gather_forward_split_backward,
@ -142,6 +143,10 @@ class EPMixtralSparseMoeBlock(ParallelModule):
for i in range(1, self.ep_size): for i in range(1, self.ep_size):
activate_experts += output_split_sizes[i * self.num_experts_per_ep : (i + 1) * self.num_experts_per_ep] activate_experts += output_split_sizes[i * self.num_experts_per_ep : (i + 1) * self.num_experts_per_ep]
activate_experts = (activate_experts > 0).float() activate_experts = (activate_experts > 0).float()
if self.fp8_communication:
all_reduce_fp8(activate_experts, group=self.moe_dp_group)
else:
dist.all_reduce(activate_experts, group=self.moe_dp_group) dist.all_reduce(activate_experts, group=self.moe_dp_group)
input_split_list = input_split_sizes.view(self.ep_size, self.num_experts_per_ep).sum(dim=-1).tolist() input_split_list = input_split_sizes.view(self.ep_size, self.num_experts_per_ep).sum(dim=-1).tolist()

@ -178,6 +178,7 @@ class MixtralPolicy(Policy):
"ep_group": self.shard_config.ep_group, "ep_group": self.shard_config.ep_group,
"tp_group": self.shard_config.tensor_parallel_process_group, "tp_group": self.shard_config.tensor_parallel_process_group,
"moe_dp_group": self.shard_config.moe_dp_group, "moe_dp_group": self.shard_config.moe_dp_group,
"fp8_communication": self.shard_config.fp8_communication,
}, },
) )
], ],

Loading…
Cancel
Save