ColossalAI/applications/Chat/evaluate/config/config_en.json

284 lines
5.0 KiB
JSON
Raw Normal View History

{
"language": "en",
"path_for_UniEval": {
"summarization": "path to unieval-sum",
"dialogue": "path to unieval-dialog",
"data2text": "path to unieval-sum"
},
"category": {
"brainstorming": {
"GPT": [
"language organization",
"relevance",
"creativity",
"practicality",
"reasonableness"
],
"Metrics": [
"Distinct"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"chat": {
"GPT": [
"language organization",
"naturalness",
"engagingness",
"fidelity"
],
"Metrics": [
"Distinct"
],
"UniEval": [
"summarization-fluency",
"dialogue-naturalness",
"dialogue-coherence",
"dialogue-understandability",
"data2text-naturalness",
"data2text-informativeness"
]
},
"classification": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
"Precision",
"Recall",
"F1 score",
"CHRF"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"closed_qa": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
"BLEU",
"ROUGE",
"BERTScore",
"CHRF"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"extraction": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
"Precision",
"Recall",
"F1 score",
"CHRF"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"generation": {
"GPT": [
"language organization",
"relevance",
"diversity"
],
"Metrics": [
"BLEU",
"ROUGE",
"BERTScore"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"logical_reasoning": {
"GPT": [
"correctness",
"relevance",
"reasonableness"
],
"Metrics": [
"BLEU",
"ROUGE",
"BERTScore",
"CHRF"
],
"UniEval": [
]
},
"open_qa": {
"GPT": [
"language organization",
"relevance",
"correctness"
],
"Metrics": [
"Distinct"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"rewriting": {
"GPT": [
"language organization",
"relevance",
"correctness"
],
"Metrics": [
"BLEU",
"ROUGE",
"BERTScore"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"roleplay": {
"GPT": [
"language organization",
"relevance",
"fidelity",
"creativity"
],
"Metrics": [
"Distinct"
],
"UniEval": [
"summarization-fluency",
"data2text-naturalness",
"data2text-informativeness"
]
},
"summarization": {
"GPT": [
"language organization",
"relevance",
"correctness",
"conciseness"
],
"Metrics": [
"BLEU",
"ROUGE",
"BERTScore",
"CHRF"
],
"UniEval": [
]
},
"Finance": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"Law": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"Education": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"Medical": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"STEM": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"SocialScience": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"Humanity": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"Other": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
},
"ethics": {
"GPT": [
"relevance",
"correctness"
],
"Metrics": [
],
"UniEval": [
]
}
}
}