From 7ca5da27e8679723cd42d38fc7b00dbf3a8febdd Mon Sep 17 00:00:00 2001 From: Wenwen Qu Date: Thu, 31 Aug 2023 18:46:13 +0800 Subject: [PATCH] fix group_norms computing in hybrid_zero_optim --- internlm/solver/optimizer/hybrid_zero_optim.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internlm/solver/optimizer/hybrid_zero_optim.py b/internlm/solver/optimizer/hybrid_zero_optim.py index c43611f..3e43b3a 100644 --- a/internlm/solver/optimizer/hybrid_zero_optim.py +++ b/internlm/solver/optimizer/hybrid_zero_optim.py @@ -574,7 +574,10 @@ class HybridZeroOptimizer(BaseOptimizer): # compute norm for gradients in the before bucket groups_norms = [] for group_id in range(self.num_param_groups): - groups_norms.append(self._compute_norm_with_stage(group_id=group_id)) + if self._is_moe_group(self.optim.param_groups[group_id]): + groups_norms.append([]) + else: + groups_norms.append(self._compute_norm_with_stage(group_id=group_id)) # clear reduced grads if self._overlap_sync_grad: