From b5e4d04a9a410aec027a1273eae2d3687ae27834 Mon Sep 17 00:00:00 2001 From: yingtongxiong <974106207@qq.com> Date: Mon, 6 Nov 2023 12:08:31 +0800 Subject: [PATCH] fix conflicts --- .../solver/optimizer/hybrid_zero_optim.py | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/internlm/solver/optimizer/hybrid_zero_optim.py b/internlm/solver/optimizer/hybrid_zero_optim.py index e5927e6..b033539 100644 --- a/internlm/solver/optimizer/hybrid_zero_optim.py +++ b/internlm/solver/optimizer/hybrid_zero_optim.py @@ -643,27 +643,6 @@ class HybridZeroOptimizer(BaseOptimizer): """ assert closure is None, "closure is not supported by step()" -<<<<<<< HEAD -======= - # do all-reduce for layernorm when sequence_parallel is True - if gpc.config.parallel.sequence_parallel is True: - for group_id in range(len(self._fp16_param_groups)): - norm_bucket = TensorBucket(size=0) - for param in self._fp16_param_groups[group_id]: - if hasattr(param, IS_SEQUENCE_PARALLEL) and getattr(param, IS_SEQUENCE_PARALLEL) is True: - norm_bucket.add_to_bucket(param.grad, allow_oversize=True) - if not norm_bucket.is_empty(): - norm_bucket.flatten() - norm_bucket.commu_handle = reduce_tensor( - tensor=norm_bucket.get_flat_tensor(), - dtype=None, - dst_rank=None, - parallel_mode=ParallelMode.TENSOR, - ) - norm_bucket.commu_handle.wait() - norm_bucket.unflatten_and_copy() - ->>>>>>> c517ec5b8cdf9c675f97dcc615bfd39c2ffda010 # if not overlapping communication (no reduction hook is attached) # we need to manually reduce these gradients if not self._overlap_sync_grad: