From ec88e35306bf11bcd9a24db85d8ae36fcb9212e0 Mon Sep 17 00:00:00 2001 From: YWMditto <862779238@qq.com> Date: Mon, 6 Nov 2023 20:30:34 +0800 Subject: [PATCH] add rotary config in configuration_internlm.py --- .../internlm_model/configuration_internlm.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/transformers/internlm_model/configuration_internlm.py b/tools/transformers/internlm_model/configuration_internlm.py index 298f913..a76c1b8 100644 --- a/tools/transformers/internlm_model/configuration_internlm.py +++ b/tools/transformers/internlm_model/configuration_internlm.py @@ -19,9 +19,8 @@ # limitations under the License. """ InternLM model configuration""" -from transformers.utils import logging from transformers.configuration_utils import PretrainedConfig - +from transformers.utils import logging logger = logging.get_logger(__name__) @@ -30,9 +29,9 @@ INTERNLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {} class InternLMConfig(PretrainedConfig): r""" - This is the configuration class to store the configuration of a [`InternLMModel`]. It is used to instantiate an InternLM - model according to the specified arguments, defining the model architecture. Instantiating a configuration with the - defaults will yield a similar configuration to that of the InternLM-7B. + This is the configuration class to store the configuration of a [`InternLMModel`]. It is used to instantiate + an InternLM model according to the specified arguments, defining the model architecture. Instantiating a + configuration with the defaults will yield a similar configuration to that of the InternLM-7B. Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the documentation from [`PretrainedConfig`] for more information. @@ -81,7 +80,7 @@ class InternLMConfig(PretrainedConfig): model_type = "internlm" _auto_class = "AutoConfig" - def __init__( + def __init__( # pylint: disable=W0102 self, vocab_size=103168, hidden_size=4096, @@ -98,6 +97,7 @@ class InternLMConfig(PretrainedConfig): eos_token_id=2, tie_word_embeddings=False, bias=True, + rotary={"base": 10000, "type": "dynamic"}, # pylint: disable=W0102 **kwargs, ): self.vocab_size = vocab_size @@ -111,6 +111,7 @@ class InternLMConfig(PretrainedConfig): self.rms_norm_eps = rms_norm_eps self.use_cache = use_cache self.bias = bias + self.rotary = rotary super().__init__( pad_token_id=pad_token_id, bos_token_id=bos_token_id,