diff --git a/internlm/initialize/launch.py b/internlm/initialize/launch.py
index 491e2b0..ddf380a 100644
--- a/internlm/initialize/launch.py
+++ b/internlm/initialize/launch.py
@@ -533,9 +533,6 @@ def initialize_distributed_env(
         seed (int, optional): Specified random seed for every process. 1024 by default.
     """
 
-    # close automatic garbage collection
-    gc.disable()
-
     torch.cuda.empty_cache()
 
     if launcher == "torch":
diff --git a/train.py b/train.py
index 6874f9e..11ce3de 100644
--- a/train.py
+++ b/train.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 
+import gc
 import socket
 import time
 import traceback
@@ -191,6 +192,9 @@ def main(args):
     # transfer the train data loader into train data iterator
     train_iter = iter(train_dl)
 
+    # close automatic garbage collection
+    gc.disable()
+
     with initialize_llm_profile(profiling=args.profiling, start_time=current_time) as prof:
         # start iterating the train data and begin training
         for batch_count in range(train_state.batch_count, total_steps):