From d3ca22cf3d71ae273ab368cc72e8969b5222c73c Mon Sep 17 00:00:00 2001 From: lijiaxing Date: Tue, 19 Dec 2023 17:49:49 +0800 Subject: [PATCH] no overlap for save ckpt --- internlm/utils/gputest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internlm/utils/gputest.py b/internlm/utils/gputest.py index ec61a04..31ebde3 100644 --- a/internlm/utils/gputest.py +++ b/internlm/utils/gputest.py @@ -36,8 +36,7 @@ def empty_cache_and_diag(batch_count, interval=50): if interval <= 0: interval = 50 - if not gpc.config.hybrid_zero_optimizer.overlap_sync_param: - cuda_memory_analyze(batch_count, batch_count % int(interval) == 0 or batch_count <= 5) + cuda_memory_analyze(batch_count, batch_count % int(interval) == 0 or batch_count <= 5) if batch_count % int(interval) == 0: # there is no need to do diag on the first batch @@ -302,7 +301,6 @@ def warmup_process_group(): def cuda_memory_analyze(step=0, print_mm_suage=False): global n_caching_allocator_flushes - torch.cuda.synchronize() g_rank = gpc.get_global_rank() tp_rank = gpc.get_local_rank(ParallelMode.TENSOR)