From bd809a61f21cf468f7c436b58f36b49d06ff4984 Mon Sep 17 00:00:00 2001
From: huangting4201 <1538303371@qq.com>
Date: Mon, 9 Oct 2023 14:47:10 +0800
Subject: [PATCH] fix(internlm/model): reset dropout_selective_checkpoint=True

---
 internlm/model/modeling_internlm.py | 2 +-
 internlm/utils/model_checkpoint.py  | 7 +------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/internlm/model/modeling_internlm.py b/internlm/model/modeling_internlm.py
index d9ac340..2856a78 100644
--- a/internlm/model/modeling_internlm.py
+++ b/internlm/model/modeling_internlm.py
@@ -461,7 +461,7 @@ def build_model_with_cfg(
     apply_post_layer_norm=False,  # pylint: disable=W0613
     layer_norm_epsilon=1e-5,
     is_reward=False,
-    dropout_selective_checkpoint=False,
+    dropout_selective_checkpoint=True,
     use_scaled_init: bool = True,
     use_swiglu: bool = True,
     use_flash_attn: bool = True,
diff --git a/internlm/utils/model_checkpoint.py b/internlm/utils/model_checkpoint.py
index a9fea67..d84dd62 100644
--- a/internlm/utils/model_checkpoint.py
+++ b/internlm/utils/model_checkpoint.py
@@ -169,12 +169,7 @@ def get_shard_state_dict(shard_model):
 
     """
 
-    # TODO: rank0_only can save memory for non-rank0 gpu, but when tp is enabled, model saving will left some parameters
-    # save_policy = FullStateDictConfig(offload_to_cpu=True, rank0_only=False)
-    # with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT, save_policy):
-    #     states = model.state_dict()
-
-    # in this version, FSDP model can only save with sharded shapeLOCAL_STATE_DICT
+    # FSDP model can only save with sharded shape SHARDED_STATE_DICT when set use_orig_params=True
     with FSDP.state_dict_type(shard_model, StateDictType.SHARDED_STATE_DICT):
         shard_states = shard_model.state_dict()