diff --git a/internlm/initialize/launch.py b/internlm/initialize/launch.py
index 8ae8ee0..323715e 100644
--- a/internlm/initialize/launch.py
+++ b/internlm/initialize/launch.py
@@ -2,6 +2,7 @@
 # -*- encoding: utf-8 -*-
 
 import argparse
+import gc
 import os
 from pathlib import Path
 from typing import Dict, Union
@@ -446,6 +447,8 @@ def initialize_distributed_env(
         master_port (str): The master port for distributed training. 8888 by default.
         seed (int, optional): Specified random seed for every process. 1024 by default.
     """
+    # close automatic garbage collection
+    gc.disable()
 
     torch.cuda.empty_cache()
 
diff --git a/internlm/utils/gputest.py b/internlm/utils/gputest.py
index ddb4932..48877b9 100644
--- a/internlm/utils/gputest.py
+++ b/internlm/utils/gputest.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 
+import gc
 import math
 import socket
 
@@ -41,6 +42,8 @@ def empty_cache_and_diag(batch_count, interval=50):
                 bench_net()
         # do empty_cache after the bench
         torch.cuda.empty_cache()
+        # do garbage collection
+        gc.collect()
 
 
 def benchmark_forward(