diff --git a/internlm/model/moe.py b/internlm/model/moe.py index 0865097..28e5ae6 100644 --- a/internlm/model/moe.py +++ b/internlm/model/moe.py @@ -53,6 +53,7 @@ class MoE(torch.nn.Module): device=None, dtype=None, ): + super().__init__() assert (