fix prepare grads in sharded optim (#749)

pull/751/head
ver217 2022-04-13 22:36:11 +08:00 committed by GitHub
parent 097772546e
commit 4b048a8728
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 0 deletions

View File

@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
def _prepare_grads(self):
for group in self.optim.param_groups:
for p in group['params']:
if p.colo_attr.saved_grad.is_null():
continue
p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE)
# FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation
# If we change p.grad directly