From 548d1bd7afd5faca808be4e862ecc8c60bdcf765 Mon Sep 17 00:00:00 2001 From: zhanglei Date: Fri, 22 Sep 2023 12:30:02 +0800 Subject: [PATCH] refactor code --- internlm/solver/optimizer/hybrid_zero_optim.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/internlm/solver/optimizer/hybrid_zero_optim.py b/internlm/solver/optimizer/hybrid_zero_optim.py index 7e021d8..ccdb53c 100644 --- a/internlm/solver/optimizer/hybrid_zero_optim.py +++ b/internlm/solver/optimizer/hybrid_zero_optim.py @@ -695,9 +695,11 @@ class HybridZeroOptimizer(BaseOptimizer): # Parameters shared within a TP group, such as norm and moe gate, have precision inconsistency in gradients. # Therefore, it is recommended to synchronize gradients within the TP group to eliminate accumulated errors. - is_tp_shared_params = (self._is_norm_group(self.optim.param_groups[group_id]) - or self._is_gate_group(self.optim.param_groups[group_id])) - if is_tp_shared_params: + is_tp_sync_groups = ( + self._is_norm_group(self.optim.param_groups[group_id]), + self._is_gate_group(self.optim.param_groups[group_id]), + ) + if any(is_tp_sync_groups): dist.all_reduce( flat_fp32_avg_grads, op=dist.ReduceOp.AVG,