From 07c98c4a39d0d4c3f753c3144151f68f65c77217 Mon Sep 17 00:00:00 2001 From: Wenwen Qu Date: Thu, 4 Jan 2024 10:51:56 +0800 Subject: [PATCH] remove suffix for gate key --- internlm/utils/model_checkpoint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internlm/utils/model_checkpoint.py b/internlm/utils/model_checkpoint.py index b9326de..0bc7261 100644 --- a/internlm/utils/model_checkpoint.py +++ b/internlm/utils/model_checkpoint.py @@ -612,7 +612,7 @@ def try_save_moe_checkpoint(folder, model, tp_rank, pp_rank): # get all moe parameters moe_state_dict = {} for n, p in module.state_dict().items(): - if "expert" in n and "moe_layer.gate.wg.weight" not in n: + if "expert" in n and "moe_layer.gate" not in n: moe_state_dict[n_module + "." + n] = p moe_str_prefix = ".moe_layer.experts.experts." # Reorder the moe name rank, so that each checkpoint only has one expert @@ -647,7 +647,7 @@ def get_non_moe_state_dict(full_state_dict): Get the state dict of the non-moe layers """ for key in list(full_state_dict.keys()): - if "expert" in key and "moe_layer.gate.wg.weight" not in key: + if "expert" in key and "moe_layer.gate" not in key: full_state_dict.pop(key) return full_state_dict