update moe config to fit training on 8 GPU

2023-12-18 14:02:33 +08:00 · 2023-12-18 14:02:33 +08:00 · 35778efff3
parent c801336732
commit 35778efff3
1 changed files with 1 additions and 1 deletions
--- a/configs/7B_MoE4_sft.py
+++ b/configs/7B_MoE4_sft.py
@ -141,7 +141,7 @@ model = dict(
    layer_norm_epsilon=1e-5,
    use_flash_attn=True,
    num_chunks=1,  # if num_chunks > 1, interleaved pipeline scheduler is used.
-    num_experts=8,
+    num_experts=4,
    moe_use_residual=False,
    moe_gate_k=2,
 )