2023-06-05 13:24:21 +00:00
|
|
|
{
|
|
|
|
"language": "en",
|
2023-06-08 09:38:47 +00:00
|
|
|
"path_for_UniEval": {
|
|
|
|
"summarization": "path to unieval-sum",
|
|
|
|
"dialogue": "path to unieval-dialog",
|
|
|
|
"data2text": "path to unieval-sum"
|
|
|
|
},
|
2023-06-05 13:24:21 +00:00
|
|
|
"category": {
|
|
|
|
"brainstorming": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"relevance",
|
|
|
|
"creativity",
|
|
|
|
"practicality",
|
2023-06-13 07:12:29 +00:00
|
|
|
"reasonableness"
|
2023-06-05 13:24:21 +00:00
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"Distinct"
|
2023-06-08 09:38:47 +00:00
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"chat": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"naturalness",
|
|
|
|
"engagingness",
|
2023-08-21 06:30:25 +00:00
|
|
|
"fidelity"
|
2023-06-05 13:24:21 +00:00
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"Distinct"
|
2023-06-08 09:38:47 +00:00
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"dialogue-naturalness",
|
|
|
|
"dialogue-coherence",
|
|
|
|
"dialogue-understandability",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"classification": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"Precision",
|
|
|
|
"Recall",
|
2023-06-08 09:38:47 +00:00
|
|
|
"F1 score",
|
|
|
|
"CHRF"
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"closed_qa": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"BLEU",
|
|
|
|
"ROUGE",
|
2023-06-08 09:38:47 +00:00
|
|
|
"BERTScore",
|
|
|
|
"CHRF"
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"extraction": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"Precision",
|
|
|
|
"Recall",
|
2023-06-08 09:38:47 +00:00
|
|
|
"F1 score",
|
|
|
|
"CHRF"
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"generation": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"relevance",
|
|
|
|
"diversity"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"BLEU",
|
|
|
|
"ROUGE",
|
|
|
|
"BERTScore"
|
2023-06-08 09:38:47 +00:00
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
2023-08-21 06:30:25 +00:00
|
|
|
"logical_reasoning": {
|
|
|
|
"GPT": [
|
|
|
|
"correctness",
|
|
|
|
"relevance",
|
|
|
|
"reasonableness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"BLEU",
|
|
|
|
"ROUGE",
|
|
|
|
"BERTScore",
|
|
|
|
"CHRF"
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
2023-06-05 13:24:21 +00:00
|
|
|
"open_qa": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"Distinct"
|
2023-06-08 09:38:47 +00:00
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"rewriting": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"BLEU",
|
|
|
|
"ROUGE",
|
|
|
|
"BERTScore"
|
2023-06-08 09:38:47 +00:00
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"roleplay": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"relevance",
|
|
|
|
"fidelity",
|
|
|
|
"creativity"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"Distinct"
|
2023-06-08 09:38:47 +00:00
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
"summarization-fluency",
|
|
|
|
"data2text-naturalness",
|
|
|
|
"data2text-informativeness"
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"summarization": {
|
|
|
|
"GPT": [
|
|
|
|
"language organization",
|
|
|
|
"relevance",
|
|
|
|
"correctness",
|
|
|
|
"conciseness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
"BLEU",
|
|
|
|
"ROUGE",
|
2023-06-08 09:38:47 +00:00
|
|
|
"BERTScore",
|
|
|
|
"CHRF"
|
|
|
|
],
|
|
|
|
"UniEval": [
|
2023-08-21 06:30:25 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"Finance": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"Law": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"Education": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"Medical": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"STEM": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"SocialScience": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"Humanity": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"Other": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"ethics": {
|
|
|
|
"GPT": [
|
|
|
|
"relevance",
|
|
|
|
"correctness"
|
|
|
|
],
|
|
|
|
"Metrics": [
|
|
|
|
],
|
|
|
|
"UniEval": [
|
2023-06-05 13:24:21 +00:00
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|