mirror of https://github.com/hpcaitech/ColossalAI
fix prepare grads in sharded optim (#749)
parent
097772546e
commit
4b048a8728
|
@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
|
||||||
def _prepare_grads(self):
|
def _prepare_grads(self):
|
||||||
for group in self.optim.param_groups:
|
for group in self.optim.param_groups:
|
||||||
for p in group['params']:
|
for p in group['params']:
|
||||||
|
if p.colo_attr.saved_grad.is_null():
|
||||||
|
continue
|
||||||
p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE)
|
p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE)
|
||||||
# FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation
|
# FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation
|
||||||
# If we change p.grad directly
|
# If we change p.grad directly
|
||||||
|
|
Loading…
Reference in New Issue