mirror of https://github.com/InternLM/InternLM
fix moe and zero1 check in args_sanity_check
parent
93bb5c2760
commit
816ecf8e04
|
@ -150,7 +150,7 @@ pipeline parallel (dict):
|
||||||
tensor parallel: tensor parallel size, usually the number of GPUs per node.
|
tensor parallel: tensor parallel size, usually the number of GPUs per node.
|
||||||
"""
|
"""
|
||||||
parallel = dict(
|
parallel = dict(
|
||||||
zero1=-1,
|
zero1=dict(size=-1, fsdp=False),
|
||||||
tensor=2,
|
tensor=2,
|
||||||
pipeline=dict(size=1, interleaved_overlap=True),
|
pipeline=dict(size=1, interleaved_overlap=True),
|
||||||
sequence_parallel=False,
|
sequence_parallel=False,
|
||||||
|
@ -168,4 +168,4 @@ monitor = dict(
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
model_type = "INTERNLM_MoE"
|
model_type = "INTERNLM_MoE"
|
||||||
|
|
|
@ -349,7 +349,7 @@ def args_sanity_check():
|
||||||
assert (
|
assert (
|
||||||
not optim_ckpt.overlap_sync_grad & optim_ckpt.overlap_sync_param
|
not optim_ckpt.overlap_sync_grad & optim_ckpt.overlap_sync_param
|
||||||
), "not support overlap and moe at the same time"
|
), "not support overlap and moe at the same time"
|
||||||
assert gpc.config.parallel.zero1 == -1, "moe only support zero1, set zero1=-1 can fix this"
|
assert gpc.config.parallel.zero1.size == -1, "moe only support zero1, set zero1=dict(size=-1,...) can fix this"
|
||||||
|
|
||||||
|
|
||||||
def launch(
|
def launch(
|
||||||
|
|
Loading…
Reference in New Issue