mirror of https://github.com/InternLM/InternLM
fix moe and zero1 check in args_sanity_check
parent
93bb5c2760
commit
816ecf8e04
|
@ -150,7 +150,7 @@ pipeline parallel (dict):
|
|||
tensor parallel: tensor parallel size, usually the number of GPUs per node.
|
||||
"""
|
||||
parallel = dict(
|
||||
zero1=-1,
|
||||
zero1=dict(size=-1, fsdp=False),
|
||||
tensor=2,
|
||||
pipeline=dict(size=1, interleaved_overlap=True),
|
||||
sequence_parallel=False,
|
||||
|
@ -168,4 +168,4 @@ monitor = dict(
|
|||
),
|
||||
)
|
||||
|
||||
model_type = "INTERNLM_MoE"
|
||||
model_type = "INTERNLM_MoE"
|
||||
|
|
|
@ -349,7 +349,7 @@ def args_sanity_check():
|
|||
assert (
|
||||
not optim_ckpt.overlap_sync_grad & optim_ckpt.overlap_sync_param
|
||||
), "not support overlap and moe at the same time"
|
||||
assert gpc.config.parallel.zero1 == -1, "moe only support zero1, set zero1=-1 can fix this"
|
||||
assert gpc.config.parallel.zero1.size == -1, "moe only support zero1, set zero1=dict(size=-1,...) can fix this"
|
||||
|
||||
|
||||
def launch(
|
||||
|
|
Loading…
Reference in New Issue