mirror of https://github.com/InternLM/InternLM
add memory print
parent
918dff7257
commit
363275b500
|
|
@ -316,7 +316,8 @@ class FSTPOverlapSchedulerHook(SchedulerHook):
|
||||||
self._overlap_handler.set_forward_mode(True)
|
self._overlap_handler.set_forward_mode(True)
|
||||||
|
|
||||||
def after_forward(self, scheduler, outputs) -> None:
|
def after_forward(self, scheduler, outputs) -> None:
|
||||||
pass
|
print("after forward allocated memory: ", torch.cuda.memory_allocated() / 1024 / 1024 /1024, flush=True)
|
||||||
|
print("after forward max memory: ", torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, flush=True)
|
||||||
|
|
||||||
def before_criterion(self, scheduler, outputs, label) -> None:
|
def before_criterion(self, scheduler, outputs, label) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
2
train.py
2
train.py
|
|
@ -255,6 +255,8 @@ def main(args):
|
||||||
# update parameters, and returns (success_update, grad_norm)
|
# update parameters, and returns (success_update, grad_norm)
|
||||||
trainer_result = trainer.step()
|
trainer_result = trainer.step()
|
||||||
assert trainer_result is not None
|
assert trainer_result is not None
|
||||||
|
print("after step: ", torch.cuda.memory_allocated() / 1024 / 1024 /1024, flush=True)
|
||||||
|
print("after step: ", torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, flush=True)
|
||||||
|
|
||||||
success_update, grad_norm_groups = trainer_result
|
success_update, grad_norm_groups = trainer_result
|
||||||
if success_update: # update parameters successfully
|
if success_update: # update parameters successfully
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue