From 78400c21b83bd1fd75f17ff071346005c8cd5c9a Mon Sep 17 00:00:00 2001 From: zigzagcai Date: Tue, 19 Dec 2023 15:29:55 +0800 Subject: [PATCH] move manual gc before train loop starts --- internlm/initialize/launch.py | 3 --- train.py | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/internlm/initialize/launch.py b/internlm/initialize/launch.py index 491e2b0..ddf380a 100644 --- a/internlm/initialize/launch.py +++ b/internlm/initialize/launch.py @@ -533,9 +533,6 @@ def initialize_distributed_env( seed (int, optional): Specified random seed for every process. 1024 by default. """ - # close automatic garbage collection - gc.disable() - torch.cuda.empty_cache() if launcher == "torch": diff --git a/train.py b/train.py index 6874f9e..11ce3de 100644 --- a/train.py +++ b/train.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- +import gc import socket import time import traceback @@ -191,6 +192,9 @@ def main(args): # transfer the train data loader into train data iterator train_iter = iter(train_dl) + # close automatic garbage collection + gc.disable() + with initialize_llm_profile(profiling=args.profiling, start_time=current_time) as prof: # start iterating the train data and begin training for batch_count in range(train_state.batch_count, total_steps):