diff --git a/internlm/core/context/parallel_context.py b/internlm/core/context/parallel_context.py index 10cbb03..72ebcd3 100644 --- a/internlm/core/context/parallel_context.py +++ b/internlm/core/context/parallel_context.py @@ -143,7 +143,7 @@ class ParallelContext(metaclass=SingletonMeta): self.pipeline_parallel_size = 1 self.tensor_parallel_size = 1 self.zero1_parallel_size = -1 - self.expert_parallel_size = -1 + self.expert_parallel_size = 1 self.num_processes_on_current_node = -1 self.virtual_pipeline_parallel_size = None self.virtual_pipeline_parallel_rank = None diff --git a/internlm/initialize/launch.py b/internlm/initialize/launch.py index 014278e..d257364 100644 --- a/internlm/initialize/launch.py +++ b/internlm/initialize/launch.py @@ -60,6 +60,9 @@ def args_sanity_check(): if "tensor" not in gpc.config.parallel: gpc.config.parallel._add_item("tensor", 1) + if "expert" not in gpc.config.parallel: + gpc.config.parallel._add_item("expert", 1) + # processing the data config in gpc data = gpc.config.data