mirror of https://github.com/hpcaitech/ColossalAI
59 lines
1.1 KiB
JSON
59 lines
1.1 KiB
JSON
{
|
|
"model": [
|
|
{
|
|
"name": "model1"
|
|
},
|
|
{
|
|
"name": "model2"
|
|
}
|
|
],
|
|
"dataset": [
|
|
{
|
|
"name": "mmlu",
|
|
"metrics": [
|
|
"first_token_accuracy",
|
|
"single_choice_accuracy",
|
|
"perplexity",
|
|
"ppl_score",
|
|
"ppl_score_over_choices"
|
|
]
|
|
},
|
|
{
|
|
"name": "cmmlu",
|
|
"metrics": [
|
|
"first_token_accuracy",
|
|
"single_choice_accuracy",
|
|
"perplexity",
|
|
"ppl_score",
|
|
"ppl_score_over_choices"
|
|
]
|
|
},
|
|
{
|
|
"name": "agieval",
|
|
"metrics": [
|
|
"first_token_accuracy",
|
|
"single_choice_accuracy",
|
|
"multi_choice_accuracy",
|
|
"math_equivalence",
|
|
"perplexity",
|
|
"ppl_score_over_choices",
|
|
"ppl_score"
|
|
]
|
|
},
|
|
{
|
|
"name": "gaokaobench",
|
|
"metrics": [
|
|
"first_token_accuracy",
|
|
"single_choice_accuracy",
|
|
"multi_choice_accuracy",
|
|
"math_equivalence",
|
|
"rouge_score",
|
|
"rouge_zh_score",
|
|
"perplexity",
|
|
"ppl_score_over_choices",
|
|
"ppl_score"
|
|
]
|
|
}
|
|
]
|
|
}
|