{ "language": "en", "path_for_UniEval": { "summarization": "path to unieval-sum", "dialogue": "path to unieval-dialog", "data2text": "path to unieval-sum" }, "category": { "brainstorming": { "GPT": [ "language organization", "relevance", "creativity", "practicality", "reasonableness" ], "Metrics": [ "Distinct" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "chat": { "GPT": [ "language organization", "naturalness", "engagingness", "fidelity" ], "Metrics": [ "Distinct" ], "UniEval": [ "summarization-fluency", "dialogue-naturalness", "dialogue-coherence", "dialogue-understandability", "data2text-naturalness", "data2text-informativeness" ] }, "classification": { "GPT": [ "relevance", "correctness" ], "Metrics": [ "Precision", "Recall", "F1 score", "CHRF" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "closed_qa": { "GPT": [ "relevance", "correctness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore", "CHRF" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "extraction": { "GPT": [ "relevance", "correctness" ], "Metrics": [ "Precision", "Recall", "F1 score", "CHRF" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "generation": { "GPT": [ "language organization", "relevance", "diversity" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "logical_reasoning": { "GPT": [ "correctness", "relevance", "reasonableness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore", "CHRF" ], "UniEval": [ ] }, "open_qa": { "GPT": [ "language organization", "relevance", "correctness" ], "Metrics": [ "Distinct" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "rewriting": { "GPT": [ "language organization", "relevance", "correctness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "roleplay": { "GPT": [ "language organization", "relevance", "fidelity", "creativity" ], "Metrics": [ "Distinct" ], "UniEval": [ "summarization-fluency", "data2text-naturalness", "data2text-informativeness" ] }, "summarization": { "GPT": [ "language organization", "relevance", "correctness", "conciseness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore", "CHRF" ], "UniEval": [ ] }, "Finance": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "Law": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "Education": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "Medical": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "STEM": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "SocialScience": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "Humanity": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "Other": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] }, "ethics": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ], "UniEval": [ ] } } }