diff --git a/internlm/model/overlap_handler.py b/internlm/model/overlap_handler.py
index d2fef8d..35d8a59 100644
--- a/internlm/model/overlap_handler.py
+++ b/internlm/model/overlap_handler.py
@@ -316,8 +316,7 @@ class FSTPOverlapSchedulerHook(SchedulerHook):
             self._overlap_handler.set_forward_mode(True)
 
     def after_forward(self, scheduler, outputs) -> None:
-        print("after forward allocated memory: ", torch.cuda.memory_allocated() / 1024 / 1024 /1024, flush=True)
-        print("after forward max memory: ", torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, flush=True)
+        pass
 
     def before_criterion(self, scheduler, outputs, label) -> None:
         pass
diff --git a/train.py b/train.py
index e1b8dff..ae86728 100644
--- a/train.py
+++ b/train.py
@@ -255,8 +255,6 @@ def main(args):
             # update parameters, and returns (success_update, grad_norm)
             trainer_result = trainer.step()
             assert trainer_result is not None
-            print("after step: ", torch.cuda.memory_allocated() / 1024 / 1024 /1024, flush=True)
-            print("after step: ", torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024, flush=True)
 
             success_update, grad_norm_groups = trainer_result
             if success_update:  # update parameters successfully