ColossalAI/applications/ColossalEval/examples/dataset_evaluation/config/evaluation/config.json

59 lines
1.1 KiB
JSON

{
"model": [
{
"name": "model1"
},
{
"name": "model2"
}
],
"dataset": [
{
"name": "mmlu",
"metrics": [
"first_token_accuracy",
"single_choice_accuracy",
"perplexity",
"ppl_score",
"ppl_score_over_choices"
]
},
{
"name": "cmmlu",
"metrics": [
"first_token_accuracy",
"single_choice_accuracy",
"perplexity",
"ppl_score",
"ppl_score_over_choices"
]
},
{
"name": "agieval",
"metrics": [
"first_token_accuracy",
"single_choice_accuracy",
"multi_choice_accuracy",
"math_equivalence",
"perplexity",
"ppl_score_over_choices",
"ppl_score"
]
},
{
"name": "gaokaobench",
"metrics": [
"first_token_accuracy",
"single_choice_accuracy",
"multi_choice_accuracy",
"math_equivalence",
"rouge_score",
"rouge_zh_score",
"perplexity",
"ppl_score_over_choices",
"ppl_score"
]
}
]
}