modified: internlm/model/modeling_internlm.py

2023-08-11 11:58:22 +08:00 · 2023-08-11 11:58:22 +08:00 · 73998a0bb7
parent ac168bd9c1
commit 73998a0bb7
1 changed files with 1 additions and 3 deletions
--- a/internlm/model/modeling_internlm.py
+++ b/internlm/model/modeling_internlm.py
@ -123,7 +123,6 @@ class PackedFlashBaseLayer1D(nn.Module):
            self.norm1 = nn.LayerNorm(hidden_size, eps=layer_norm_epsilon)
            self.norm2 = nn.LayerNorm(hidden_size, eps=layer_norm_epsilon)

-        # TODO: replace num_experts and epsize with function parameter
        self.num_experts = num_experts
        self.moe_gate_k = moe_gate_k
        self.moe_capacity_factor = moe_capacity_factor
@ -582,7 +581,7 @@ def build_model_with_cfg(
    moe_noisy_gate_policy: str = None,
    moe_drop_tokens: bool = True,
    moe_use_rts: bool = True,
-    moe_use_residual: bool = True,
+    moe_use_residual: bool = False,
 ):
    """
    Builde model with config
@ -646,7 +645,6 @@ def build_model_with_cfg(
        use_scaled_init=use_scaled_init,
        use_swiglu=use_swiglu,
        use_flash_attn=use_flash_attn,
-        sequence_parallel=sequence_parallel,
        num_experts=num_experts,
        moe_gate_k=moe_gate_k,
        moe_capacity_factor=moe_capacity_factor,