From bd809a61f21cf468f7c436b58f36b49d06ff4984 Mon Sep 17 00:00:00 2001 From: huangting4201 <1538303371@qq.com> Date: Mon, 9 Oct 2023 14:47:10 +0800 Subject: [PATCH] fix(internlm/model): reset dropout_selective_checkpoint=True --- internlm/model/modeling_internlm.py | 2 +- internlm/utils/model_checkpoint.py | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/internlm/model/modeling_internlm.py b/internlm/model/modeling_internlm.py index d9ac340..2856a78 100644 --- a/internlm/model/modeling_internlm.py +++ b/internlm/model/modeling_internlm.py @@ -461,7 +461,7 @@ def build_model_with_cfg( apply_post_layer_norm=False, # pylint: disable=W0613 layer_norm_epsilon=1e-5, is_reward=False, - dropout_selective_checkpoint=False, + dropout_selective_checkpoint=True, use_scaled_init: bool = True, use_swiglu: bool = True, use_flash_attn: bool = True, diff --git a/internlm/utils/model_checkpoint.py b/internlm/utils/model_checkpoint.py index a9fea67..d84dd62 100644 --- a/internlm/utils/model_checkpoint.py +++ b/internlm/utils/model_checkpoint.py @@ -169,12 +169,7 @@ def get_shard_state_dict(shard_model): """ - # TODO: rank0_only can save memory for non-rank0 gpu, but when tp is enabled, model saving will left some parameters - # save_policy = FullStateDictConfig(offload_to_cpu=True, rank0_only=False) - # with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT, save_policy): - # states = model.state_dict() - - # in this version, FSDP model can only save with sharded shapeLOCAL_STATE_DICT + # FSDP model can only save with sharded shape SHARDED_STATE_DICT when set use_orig_params=True with FSDP.state_dict_type(shard_model, StateDictType.SHARDED_STATE_DICT): shard_states = shard_model.state_dict()