diff --git a/internlm/model/overlap_handler.py b/internlm/model/overlap_handler.py index d2fef8d..35d8a59 100644 --- a/internlm/model/overlap_handler.py +++ b/internlm/model/overlap_handler.py @@ -316,8 +316,7 @@ class FSTPOverlapSchedulerHook(SchedulerHook): self._overlap_handler.set_forward_mode(True) def after_forward(self, scheduler, outputs) -> None: - print("after forward allocated memory: ", torch.cuda.memory_allocated() / 1024 / 1024 /1024, flush=True) - print("after forward max memory: ", torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, flush=True) + pass def before_criterion(self, scheduler, outputs, label) -> None: pass diff --git a/train.py b/train.py index e1b8dff..ae86728 100644 --- a/train.py +++ b/train.py @@ -255,8 +255,6 @@ def main(args): # update parameters, and returns (success_update, grad_norm) trainer_result = trainer.step() assert trainer_result is not None - print("after step: ", torch.cuda.memory_allocated() / 1024 / 1024 /1024, flush=True) - print("after step: ", torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, flush=True) success_update, grad_norm_groups = trainer_result if success_update: # update parameters successfully