mirror of https://github.com/hpcaitech/ColossalAI
[fix] debug zbv llama test;
parent
52dcc73313
commit
90939b77e0
|
@ -756,11 +756,9 @@ def run_with_hybridplugin(test_config):
|
||||||
@parameterize(
|
@parameterize(
|
||||||
"config",
|
"config",
|
||||||
[
|
[
|
||||||
# TODO:ERR in second iter
|
|
||||||
(0, 1, 4, 1, 1),
|
(0, 1, 4, 1, 1),
|
||||||
(1, 2, 2, 1, 1),
|
(1, 2, 2, 1, 1),
|
||||||
(1, 1, 2, 2, 1),
|
(1, 1, 2, 2, 1),
|
||||||
# Pass
|
|
||||||
(1, 2, 1, 2, 1),
|
(1, 2, 1, 2, 1),
|
||||||
(1, 2, 1, 1, 2),
|
(1, 2, 1, 1, 2),
|
||||||
],
|
],
|
||||||
|
|
|
@ -277,32 +277,33 @@ def check_forward_backward(model_fn, data_gen_fn, output_transform_fn, loss_fn,
|
||||||
"precision": "fp16",
|
"precision": "fp16",
|
||||||
"initial_scale": 1,
|
"initial_scale": 1,
|
||||||
},
|
},
|
||||||
{
|
# TODO: assert layer error
|
||||||
"tp_size": 2,
|
# {
|
||||||
"pp_size": 2,
|
# "tp_size": 2,
|
||||||
"pp_style": "zbv",
|
# "pp_size": 2,
|
||||||
"num_model_chunks": 2,
|
# "pp_style": "zbv",
|
||||||
"num_microbatches": 4,
|
# "num_model_chunks": 2,
|
||||||
"enable_all_optimization": False,
|
# "num_microbatches": 4,
|
||||||
"precision": "fp16",
|
# "enable_all_optimization": False,
|
||||||
"zero_stage": 0,
|
# "precision": "fp16",
|
||||||
"initial_scale": 1,
|
# "zero_stage": 0,
|
||||||
"enable_gradient_checkpointing": True,
|
# "initial_scale": 1,
|
||||||
"parallel_output": False,
|
# "enable_gradient_checkpointing": True,
|
||||||
},
|
# "parallel_output": False,
|
||||||
{
|
# },
|
||||||
"tp_size": 2,
|
# {
|
||||||
"pp_size": 2,
|
# "tp_size": 2,
|
||||||
"pp_style": "zbv",
|
# "pp_size": 2,
|
||||||
"num_model_chunks": 2,
|
# "pp_style": "zbv",
|
||||||
"num_microbatches": 4,
|
# "num_model_chunks": 2,
|
||||||
"enable_all_optimization": False,
|
# "num_microbatches": 4,
|
||||||
"precision": "fp16",
|
# "enable_all_optimization": False,
|
||||||
"zero_stage": 1,
|
# "precision": "fp16",
|
||||||
"initial_scale": 1,
|
# "zero_stage": 1,
|
||||||
"enable_gradient_checkpointing": True,
|
# "initial_scale": 1,
|
||||||
"parallel_output": False,
|
# "enable_gradient_checkpointing": True,
|
||||||
},
|
# "parallel_output": False,
|
||||||
|
# },
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def run_llama_test(test_config):
|
def run_llama_test(test_config):
|
||||||
|
|
Loading…
Reference in New Issue