mirror of https://github.com/InternLM/InternLM
fix the pp moe bugs
parent
7b1709a7ff
commit
db685e8a31
|
@ -1326,9 +1326,11 @@ class InterleavedPipelineScheduler(PipelineScheduler):
|
||||||
output, label = pack_return_tensors(self._return_tensors)
|
output, label = pack_return_tensors(self._return_tensors)
|
||||||
else:
|
else:
|
||||||
output, label = (None, None)
|
output, label = (None, None)
|
||||||
accum_loss = self._accum_loss
|
|
||||||
|
|
||||||
logger.info(f"{gpc.get_local_rank(ParallelMode.PIPELINE)}, moe_loss: {accum_moe_loss.item()}")
|
accum_loss = self._accum_loss
|
||||||
|
accum_loss += self._accum_moe_loss
|
||||||
|
|
||||||
|
logger.info(f"{gpc.get_local_rank(ParallelMode.PIPELINE)}, moe_loss: {self._accum_moe_loss.item()}")
|
||||||
|
|
||||||
dist.all_reduce(self._accum_moe_loss, group=gpc.get_group(ParallelMode.PIPELINE))
|
dist.all_reduce(self._accum_moe_loss, group=gpc.get_group(ParallelMode.PIPELINE))
|
||||||
accum_moe_loss = self._accum_moe_loss
|
accum_moe_loss = self._accum_moe_loss
|
||||||
|
|
Loading…
Reference in New Issue