fix(internlm/model): reset dropout_selective_checkpoint=True

pull/293/head
huangting4201 2023-10-09 14:47:10 +08:00
parent 5bca32e4dc
commit bd809a61f2
2 changed files with 2 additions and 7 deletions

View File

@ -461,7 +461,7 @@ def build_model_with_cfg(
apply_post_layer_norm=False, # pylint: disable=W0613
layer_norm_epsilon=1e-5,
is_reward=False,
dropout_selective_checkpoint=False,
dropout_selective_checkpoint=True,
use_scaled_init: bool = True,
use_swiglu: bool = True,
use_flash_attn: bool = True,

View File

@ -169,12 +169,7 @@ def get_shard_state_dict(shard_model):
"""
# TODO: rank0_only can save memory for non-rank0 gpu, but when tp is enabled, model saving will left some parameters
# save_policy = FullStateDictConfig(offload_to_cpu=True, rank0_only=False)
# with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT, save_policy):
# states = model.state_dict()
# in this version, FSDP model can only save with sharded shapeLOCAL_STATE_DICT
# FSDP model can only save with sharded shape SHARDED_STATE_DICT when set use_orig_params=True
with FSDP.state_dict_type(shard_model, StateDictType.SHARDED_STATE_DICT):
shard_states = shard_model.state_dict()