From 73998a0bb74d8b436589249db3c50f3375dc9ec4 Mon Sep 17 00:00:00 2001
From: Wenwen Qu <vinny_qu@163.com>
Date: Fri, 11 Aug 2023 11:58:22 +0800
Subject: [PATCH] 	modified:   internlm/model/modeling_internlm.py

---
 internlm/model/modeling_internlm.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/internlm/model/modeling_internlm.py b/internlm/model/modeling_internlm.py
index a8f1973..19d68d7 100644
--- a/internlm/model/modeling_internlm.py
+++ b/internlm/model/modeling_internlm.py
@@ -123,7 +123,6 @@ class PackedFlashBaseLayer1D(nn.Module):
             self.norm1 = nn.LayerNorm(hidden_size, eps=layer_norm_epsilon)
             self.norm2 = nn.LayerNorm(hidden_size, eps=layer_norm_epsilon)
 
-        # TODO: replace num_experts and epsize with function parameter
         self.num_experts = num_experts
         self.moe_gate_k = moe_gate_k
         self.moe_capacity_factor = moe_capacity_factor
@@ -582,7 +581,7 @@ def build_model_with_cfg(
     moe_noisy_gate_policy: str = None,
     moe_drop_tokens: bool = True,
     moe_use_rts: bool = True,
-    moe_use_residual: bool = True,
+    moe_use_residual: bool = False,
 ):
     """
     Builde model with config
@@ -646,7 +645,6 @@ def build_model_with_cfg(
         use_scaled_init=use_scaled_init,
         use_swiglu=use_swiglu,
         use_flash_attn=use_flash_attn,
-        sequence_parallel=sequence_parallel,
         num_experts=num_experts,
         moe_gate_k=moe_gate_k,
         moe_capacity_factor=moe_capacity_factor,