diff --git a/internlm/solver/optimizer/hybrid_zero_optim.py b/internlm/solver/optimizer/hybrid_zero_optim.py index 0d0c8a3..d2c894c 100644 --- a/internlm/solver/optimizer/hybrid_zero_optim.py +++ b/internlm/solver/optimizer/hybrid_zero_optim.py @@ -350,7 +350,7 @@ class HybridZeroOptimizer(BaseOptimizer): _param.grad.add_(_grad) # release cuda memory. - gpc.fstp_handler.release_reduce_scatter_memory(key=tuple(_grad.size()), index=_grad.index) + self._fstp_handler.release_reduce_scatter_memory(key=tuple(_grad.size()), index=_grad.index) self._fstp_handler.reduce_scatter_handlers[_key] = None bucket.reset_by_rank(reduce_rank)