mirror of https://github.com/InternLM/InternLM
fix conflicts
parent
b80e6cdcf3
commit
b5e4d04a9a
|
@ -643,27 +643,6 @@ class HybridZeroOptimizer(BaseOptimizer):
|
||||||
"""
|
"""
|
||||||
assert closure is None, "closure is not supported by step()"
|
assert closure is None, "closure is not supported by step()"
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
=======
|
|
||||||
# do all-reduce for layernorm when sequence_parallel is True
|
|
||||||
if gpc.config.parallel.sequence_parallel is True:
|
|
||||||
for group_id in range(len(self._fp16_param_groups)):
|
|
||||||
norm_bucket = TensorBucket(size=0)
|
|
||||||
for param in self._fp16_param_groups[group_id]:
|
|
||||||
if hasattr(param, IS_SEQUENCE_PARALLEL) and getattr(param, IS_SEQUENCE_PARALLEL) is True:
|
|
||||||
norm_bucket.add_to_bucket(param.grad, allow_oversize=True)
|
|
||||||
if not norm_bucket.is_empty():
|
|
||||||
norm_bucket.flatten()
|
|
||||||
norm_bucket.commu_handle = reduce_tensor(
|
|
||||||
tensor=norm_bucket.get_flat_tensor(),
|
|
||||||
dtype=None,
|
|
||||||
dst_rank=None,
|
|
||||||
parallel_mode=ParallelMode.TENSOR,
|
|
||||||
)
|
|
||||||
norm_bucket.commu_handle.wait()
|
|
||||||
norm_bucket.unflatten_and_copy()
|
|
||||||
|
|
||||||
>>>>>>> c517ec5b8cdf9c675f97dcc615bfd39c2ffda010
|
|
||||||
# if not overlapping communication (no reduction hook is attached)
|
# if not overlapping communication (no reduction hook is attached)
|
||||||
# we need to manually reduce these gradients
|
# we need to manually reduce these gradients
|
||||||
if not self._overlap_sync_grad:
|
if not self._overlap_sync_grad:
|
||||||
|
|
Loading…
Reference in New Issue