fix conflicts

2023-11-06 12:08:31 +08:00 · 2023-11-06 12:08:31 +08:00 · b5e4d04a9a
parent b80e6cdcf3
commit b5e4d04a9a
1 changed files with 0 additions and 21 deletions
--- a/internlm/solver/optimizer/hybrid_zero_optim.py
+++ b/internlm/solver/optimizer/hybrid_zero_optim.py
@ -643,27 +643,6 @@ class HybridZeroOptimizer(BaseOptimizer):
        """
        assert closure is None, "closure is not supported by step()"
 <<<<<<< HEAD
 =======
        # do all-reduce for layernorm when sequence_parallel is True
        if gpc.config.parallel.sequence_parallel is True:
            for group_id in range(len(self._fp16_param_groups)):
                norm_bucket = TensorBucket(size=0)
                for param in self._fp16_param_groups[group_id]:
                    if hasattr(param, IS_SEQUENCE_PARALLEL) and getattr(param, IS_SEQUENCE_PARALLEL) is True:
                        norm_bucket.add_to_bucket(param.grad, allow_oversize=True)
                if not norm_bucket.is_empty():
                    norm_bucket.flatten()
                    norm_bucket.commu_handle = reduce_tensor(
                        tensor=norm_bucket.get_flat_tensor(),
                        dtype=None,
                        dst_rank=None,
                        parallel_mode=ParallelMode.TENSOR,
                    )
                    norm_bucket.commu_handle.wait()
                    norm_bucket.unflatten_and_copy()
 >>>>>>> c517ec5b8cdf9c675f97dcc615bfd39c2ffda010
        # if not overlapping communication (no reduction hook is attached)
        # we need to manually reduce these gradients
        if not self._overlap_sync_grad: