mirror of https://github.com/hpcaitech/ColossalAI
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
517 B
25 lines
517 B
1 year ago
|
{
|
||
|
"architectures": [
|
||
|
"OpenMoeForCausalLM"
|
||
|
],
|
||
|
"intermediate_size": 2048,
|
||
|
"hidden_size": 768,
|
||
|
"num_hidden_layers": 12,
|
||
|
"head_dim": 64,
|
||
|
"num_attention_heads": 12,
|
||
|
"dropout_rate": 0.0,
|
||
|
"layer_norm_epsilon": 1e-06,
|
||
|
"vocab_size": 256384,
|
||
|
"hidden_act": "swiglu",
|
||
|
"num_experts": 16,
|
||
|
"topk": 2,
|
||
|
"capacity_factor_train": 1.25,
|
||
|
"capacity_factor_eval": 2.0,
|
||
|
"min_capacity": 4,
|
||
|
"noisy_policy": null,
|
||
|
"drop_tks": true,
|
||
|
"expert_parallel": null,
|
||
|
"gated": true,
|
||
|
"moe_layer_interval": 4
|
||
|
}
|