mirror of https://github.com/InternLM/InternLM
move manual gc before train loop starts
parent
2afeebe5b0
commit
78400c21b8
|
@ -533,9 +533,6 @@ def initialize_distributed_env(
|
||||||
seed (int, optional): Specified random seed for every process. 1024 by default.
|
seed (int, optional): Specified random seed for every process. 1024 by default.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# close automatic garbage collection
|
|
||||||
gc.disable()
|
|
||||||
|
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
if launcher == "torch":
|
if launcher == "torch":
|
||||||
|
|
4
train.py
4
train.py
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
import gc
|
||||||
import socket
|
import socket
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -191,6 +192,9 @@ def main(args):
|
||||||
# transfer the train data loader into train data iterator
|
# transfer the train data loader into train data iterator
|
||||||
train_iter = iter(train_dl)
|
train_iter = iter(train_dl)
|
||||||
|
|
||||||
|
# close automatic garbage collection
|
||||||
|
gc.disable()
|
||||||
|
|
||||||
with initialize_llm_profile(profiling=args.profiling, start_time=current_time) as prof:
|
with initialize_llm_profile(profiling=args.profiling, start_time=current_time) as prof:
|
||||||
# start iterating the train data and begin training
|
# start iterating the train data and begin training
|
||||||
for batch_count in range(train_state.batch_count, total_steps):
|
for batch_count in range(train_state.batch_count, total_steps):
|
||||||
|
|
Loading…
Reference in New Issue