{ "language": "cn", "category": { "brainstorming": { "GPT": [ "language organization", "relevance", "creativity", "practicality", "reasonableness" ], "Metrics": [ "Distinct" ] }, "chat": { "GPT": [ "language organization", "naturalness", "engagingness", "fidelity" ], "Metrics": [ "Distinct" ] }, "classification": { "GPT": [ "relevance", "correctness" ], "Metrics": [ "Precision", "Recall", "F1 score", "CHRF" ] }, "closed_qa": { "GPT": [ "relevance", "correctness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore", "CHRF" ] }, "extraction": { "GPT": [ "relevance", "correctness" ], "Metrics": [ "Precision", "Recall", "F1 score", "CHRF" ] }, "generation": { "GPT": [ "language organization", "relevance", "diversity" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore" ] }, "logical_reasoning": { "GPT": [ "correctness", "relevance", "reasonableness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore", "CHRF" ] }, "open_qa": { "GPT": [ "language organization", "relevance", "correctness" ], "Metrics": [ "Distinct" ] }, "rewriting": { "GPT": [ "language organization", "relevance", "correctness" ], "Metrics": [ "BLEU", "ROUGE", "BERTScore" ] }, "roleplay": { "GPT": [ "language organization", "relevance", "fidelity", "creativity" ], "Metrics": [ "Distinct" ] }, "summarization": { "GPT": [ "language organization", "relevance", "correctness", "conciseness" ], "Metrics": [ ] }, "Finance": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "Law": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "Education": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "Medical": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "STEM": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "SocialScience": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "Humanity": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "Other": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] }, "ethics": { "GPT": [ "relevance", "correctness" ], "Metrics": [ ] } } }