diff --git a/internlm/core/context/parallel_context.py b/internlm/core/context/parallel_context.py index 7f3e415..da6a0d7 100644 --- a/internlm/core/context/parallel_context.py +++ b/internlm/core/context/parallel_context.py @@ -568,7 +568,8 @@ class ParallelContext(metaclass=SingletonMeta): # during model construction), this is because the random state will be different in different tensor parallel # device of the same data parallel group. The underlying reason is that the device of tp_rank = 0 will perform # additional random operations during the RowParallelLinear module building process. - set_mode(ParallelMode.DUMMY) + # set_mode(ParallelMode.DUMMY) + set_mode(ParallelMode.TENSOR) seeds = get_seeds() seed_str = ", ".join([f"{k}: {v}" for k, v in seeds.items()])