From f5337f6e02eef6a933722bd1cb33f28beda1bb0e Mon Sep 17 00:00:00 2001 From: jiaxingli <43110891+li126com@users.noreply.github.com> Date: Fri, 22 Sep 2023 13:52:25 +0800 Subject: [PATCH] Feat(PythonGC): Do garbage collection manually (#326) * feat:add gc control * feat:add gc control * feat:add gc control * feat:add gc * re-lint --- internlm/initialize/launch.py | 3 +++ internlm/utils/gputest.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/internlm/initialize/launch.py b/internlm/initialize/launch.py index 8ae8ee0..323715e 100644 --- a/internlm/initialize/launch.py +++ b/internlm/initialize/launch.py @@ -2,6 +2,7 @@ # -*- encoding: utf-8 -*- import argparse +import gc import os from pathlib import Path from typing import Dict, Union @@ -446,6 +447,8 @@ def initialize_distributed_env( master_port (str): The master port for distributed training. 8888 by default. seed (int, optional): Specified random seed for every process. 1024 by default. """ + # close automatic garbage collection + gc.disable() torch.cuda.empty_cache() diff --git a/internlm/utils/gputest.py b/internlm/utils/gputest.py index ddb4932..48877b9 100644 --- a/internlm/utils/gputest.py +++ b/internlm/utils/gputest.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- +import gc import math import socket @@ -41,6 +42,8 @@ def empty_cache_and_diag(batch_count, interval=50): bench_net() # do empty_cache after the bench torch.cuda.empty_cache() + # do garbage collection + gc.collect() def benchmark_forward(