mirror of https://github.com/InternLM/InternLM
fix accumulate grads bug
parent
b48687a7ff
commit
0996c47e49
|
@ -590,14 +590,14 @@ class HybridZeroOptimizer(BaseOptimizer):
|
|||
if param.grad is not None:
|
||||
self._store_and_try_reduce_grads_by_bucket(param)
|
||||
|
||||
# we need to reduce the gradients left in the communication bucket
|
||||
for group_id in range(self.num_param_groups):
|
||||
self._reduce_grads_stored_in_bucket(self._bucket_store[group_id], reduce_rank=None, last_bucket=True)
|
||||
|
||||
# we need to accumulate gradients left in the accumulate gardient bucket
|
||||
for group_id in range(self.num_param_groups):
|
||||
self._accum_grads_store_in_bucket(self._accum_grad_buckets[group_id], reduce_rank=None)
|
||||
|
||||
# we need to reduce the gradients left in the communication bucket
|
||||
for group_id in range(self.num_param_groups):
|
||||
self._reduce_grads_stored_in_bucket(self._bucket_store[group_id], reduce_rank=None, last_bucket=True)
|
||||
|
||||
# compute norm for gradients in the before bucket
|
||||
groups_norms = []
|
||||
for group_id in range(self.num_param_groups):
|
||||
|
|
Loading…
Reference in New Issue