From 845cccd7568acef7cef7ae60b5e12d47bc34e965 Mon Sep 17 00:00:00 2001 From: YWMditto <862779238@qq.com> Date: Fri, 3 Nov 2023 17:19:37 +0800 Subject: [PATCH] add rope doc --- .../internlm_model/modeling_internlm.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/transformers/internlm_model/modeling_internlm.py b/tools/transformers/internlm_model/modeling_internlm.py index a850b26..e2d52ed 100644 --- a/tools/transformers/internlm_model/modeling_internlm.py +++ b/tools/transformers/internlm_model/modeling_internlm.py @@ -105,6 +105,14 @@ class InternLMRMSNorm(nn.Module): class InternLMRotaryEmbedding(torch.nn.Module): + """Implement InternLM's rotary embedding. + + Args: + dim (int): Characteristic dimension of each self-attentional head. + max_position_embeddings (int, optional): Model's training length. Defaults to 2048. + base (int, optional): The rotation position encodes the rotation Angle base number. Defaults to 10000. + device (Any, optional): Running device. Defaults to None. + """ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float().to(device) / dim)) @@ -137,7 +145,14 @@ class InternLMRotaryEmbedding(torch.nn.Module): class InternLMDynamicNTKScalingRotaryEmbedding(torch.nn.Module): - """Implement dynamic ntk rope. + """Implement InternLM's DyanmicNTK extrapolation method, thereby broadening the model support context to 16K. + + Args: + dim (int): Characteristic dimension of each self-attentional head. + max_position_embeddings (int, optional): Model's training length. Defaults to 2048. + base (int, optional): The rotation position encodes the rotation Angle base number. Defaults to 10000. + device (Any, optional): Running device. Defaults to None. + scaling_factor (float, optional): NTK method extrapolation coefficient. Defaults to 1.0. """ def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, scaling_factor=1.0):