ColossalAI/applications/ColossalEval/colossal_eval/dataset/ceval.py

import copy
import csv
import os
from typing import Dict, List

from colossalai.logging import DistributedLogger

from .base import BaseDataset

ceval_subject_mapping = {
    "computer_network": ["Computer Network", "计算机网络", "STEM"],
    "operating_system": ["Operating System", "操作系统", "STEM"],
    "computer_architecture": ["Computer Architecture", "计算机组成", "STEM"],
    "college_programming": ["College Programming", "大学编程", "STEM"],
    "college_physics": ["College Physics", "大学物理", "STEM"],
    "college_chemistry": ["College Chemistry", "大学化学", "STEM"],
    "advanced_mathematics": ["Advanced Mathematics", "高等数学", "STEM"],
    "probability_and_statistics": ["Probability and Statistics", "概率统计", "STEM"],
    "discrete_mathematics": ["Discrete Mathematics", "离散数学", "STEM"],
    "electrical_engineer": ["Electrical Engineer", "注册电气工程师", "STEM"],
    "metrology_engineer": ["Metrology Engineer", "注册计量师", "STEM"],
    "high_school_mathematics": ["High School Mathematics", "高中数学", "STEM"],
    "high_school_physics": ["High School Physics", "高中物理", "STEM"],
    "high_school_chemistry": ["High School Chemistry", "高中化学", "STEM"],
    "high_school_biology": ["High School Biology", "高中生物", "STEM"],
    "middle_school_mathematics": ["Middle School Mathematics", "初中数学", "STEM"],
    "middle_school_biology": ["Middle School Biology", "初中生物", "STEM"],
    "middle_school_physics": ["Middle School Physics", "初中物理", "STEM"],
    "middle_school_chemistry": ["Middle School Chemistry", "初中化学", "STEM"],
    "veterinary_medicine": ["Veterinary Medicine", "兽医学", "STEM"],
    "college_economics": ["College Economics", "大学经济学", "Social Science"],
    "business_administration": ["Business Administration", "工商管理", "Social Science"],
    "marxism": ["Marxism", "马克思主义基本原理", "Social Science"],
    "mao_zedong_thought": ["Mao Zedong Thought", "毛泽东思想和中国特色社会主义理论体系概论", "Social Science"],
    "education_science": ["Education Science", "教育学", "Social Science"],
    "teacher_qualification": ["Teacher Qualification", "教师资格", "Social Science"],
    "high_school_politics": ["High School Politics", "高中政治", "Social Science"],
    "high_school_geography": ["High School Geography", "高中地理", "Social Science"],
    "middle_school_politics": ["Middle School Politics", "初中政治", "Social Science"],
    "middle_school_geography": ["Middle School Geography", "初中地理", "Social Science"],
    "modern_chinese_history": ["Modern Chinese History", "近代史纲要", "Humanities"],
    "ideological_and_moral_cultivation": ["Ideological and Moral Cultivation", "思想道德修养与法律基础", "Humanities"],
    "logic": ["Logic", "逻辑学", "Humanities"],
    "law": ["Law", "法学", "Humanities"],
    "chinese_language_and_literature": ["Chinese Language and Literature", "中国语言文学", "Humanities"],
    "art_studies": ["Art Studies", "艺术学", "Humanities"],
    "professional_tour_guide": ["Professional Tour Guide", "导游资格", "Humanities"],
    "legal_professional": ["Legal Professional", "法律职业资格", "Humanities"],
    "high_school_chinese": ["High School Chinese", "高中语文", "Humanities"],
    "high_school_history": ["High School History", "高中历史", "Humanities"],
    "middle_school_history": ["Middle School History", "初中历史", "Humanities"],
    "civil_servant": ["Civil Servant", "公务员", "Other"],
    "sports_science": ["Sports Science", "体育学", "Other"],
    "plant_protection": ["Plant Protection", "植物保护", "Other"],
    "basic_medicine": ["Basic Medicine", "基础医学", "Other"],
    "clinical_medicine": ["Clinical Medicine", "临床医学", "Other"],
    "urban_and_rural_planner": ["Urban and Rural Planner", "注册城乡规划师", "Other"],
    "accountant": ["Accountant", "注册会计师", "Other"],
    "fire_engineer": ["Fire Engineer", "注册消防工程师", "Other"],
    "environmental_impact_assessment_engineer": [
        "Environmental Impact Assessment Engineer",
        "环境影响评价工程师",
        "Other",
    ],
    "tax_accountant": ["Tax Accountant", "税务师", "Other"],
    "physician": ["Physician", "医师资格", "Other"],
}

default_inference_kwargs = {
    "calculate_loss": False,
    "all_classes": ["A", "B", "C", "D"],
    "language": "Chinese",
    "calculate_overall_loss": False,
    "max_new_tokens": 32,
}


def get_few_shot_data(data: List[Dict], subject):
    few_shot_data = [f"以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。"]
    for i in data:
        few_shot_data.append(i["input"] + i["target"])
    return few_shot_data


class CEvalDataset(BaseDataset):
    """
    Dataset class for CEval dataset.
    Data source: https://huggingface.co/datasets/ceval/ceval-exam
    This dataset class will convert the original dataset into the inference dataset.
    """

    @staticmethod
    def load(path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs) -> List[Dict]:
        dataset = {"dev": {}, "test": {}}
        for split in ["dev", "test"]:
            files = os.listdir(os.path.join(path, split))
            files.sort()

            for file in files:
                subject = file[0 : -len(f"_{split}.csv")]
                subject = ceval_subject_mapping[subject][1]

                file_dir = os.path.join(path, split, file)

                dataset[split][subject] = {"data": []}

                # It's been tested that each data sample in one subcategory have same inference arguments.
                dataset[split][subject]["inference_kwargs"] = copy.deepcopy(default_inference_kwargs)

                if split == "test" and few_shot:
                    dataset[split][subject]["inference_kwargs"]["few_shot_data"] = get_few_shot_data(
                        dataset["dev"][subject]["data"], subject
                    )

                with open(file_dir, encoding="utf-8") as f:
                    reader = csv.reader(f)
                    _ = next(reader)
                    for row in reader:
                        # Dev split have answer and explanation so len(row) is 8
                        # But test split doesn't contain answer and explanation, so len(row) is 6
                        assert len(row) >= 6
                        choices = f"A. {row[2]}\nB. {row[3]}\nC. {row[4]}\nD. {row[5]}"
                        data_sample = {
                            "dataset": "ceval",
                            "split": split,
                            "category": subject,
                            "instruction": f"以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。",
                            "input": f"题目：{row[1]}\n{choices}\n答案：",
                            "output": "",
                            "target": row[6] if split == "dev" else "",
                            "id": int(row[0]),
                        }

                        dataset[split][subject]["data"].append(data_sample)

        return dataset
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786) * Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> 1 year ago			`import copy`
			`import csv`
			`import os`
			`from typing import Dict, List`

			`from colossalai.logging import DistributedLogger`

			`from .base import BaseDataset`

			`ceval_subject_mapping = {`
			`"computer_network": ["Computer Network", "计算机网络", "STEM"],`
			`"operating_system": ["Operating System", "操作系统", "STEM"],`
			`"computer_architecture": ["Computer Architecture", "计算机组成", "STEM"],`
			`"college_programming": ["College Programming", "大学编程", "STEM"],`
			`"college_physics": ["College Physics", "大学物理", "STEM"],`
			`"college_chemistry": ["College Chemistry", "大学化学", "STEM"],`
			`"advanced_mathematics": ["Advanced Mathematics", "高等数学", "STEM"],`
			`"probability_and_statistics": ["Probability and Statistics", "概率统计", "STEM"],`
			`"discrete_mathematics": ["Discrete Mathematics", "离散数学", "STEM"],`
			`"electrical_engineer": ["Electrical Engineer", "注册电气工程师", "STEM"],`
			`"metrology_engineer": ["Metrology Engineer", "注册计量师", "STEM"],`
			`"high_school_mathematics": ["High School Mathematics", "高中数学", "STEM"],`
			`"high_school_physics": ["High School Physics", "高中物理", "STEM"],`
			`"high_school_chemistry": ["High School Chemistry", "高中化学", "STEM"],`
			`"high_school_biology": ["High School Biology", "高中生物", "STEM"],`
			`"middle_school_mathematics": ["Middle School Mathematics", "初中数学", "STEM"],`
			`"middle_school_biology": ["Middle School Biology", "初中生物", "STEM"],`
			`"middle_school_physics": ["Middle School Physics", "初中物理", "STEM"],`
			`"middle_school_chemistry": ["Middle School Chemistry", "初中化学", "STEM"],`
			`"veterinary_medicine": ["Veterinary Medicine", "兽医学", "STEM"],`
			`"college_economics": ["College Economics", "大学经济学", "Social Science"],`
			`"business_administration": ["Business Administration", "工商管理", "Social Science"],`
			`"marxism": ["Marxism", "马克思主义基本原理", "Social Science"],`
			`"mao_zedong_thought": ["Mao Zedong Thought", "毛泽东思想和中国特色社会主义理论体系概论", "Social Science"],`
			`"education_science": ["Education Science", "教育学", "Social Science"],`
			`"teacher_qualification": ["Teacher Qualification", "教师资格", "Social Science"],`
			`"high_school_politics": ["High School Politics", "高中政治", "Social Science"],`
			`"high_school_geography": ["High School Geography", "高中地理", "Social Science"],`
			`"middle_school_politics": ["Middle School Politics", "初中政治", "Social Science"],`
			`"middle_school_geography": ["Middle School Geography", "初中地理", "Social Science"],`
			`"modern_chinese_history": ["Modern Chinese History", "近代史纲要", "Humanities"],`
			`"ideological_and_moral_cultivation": ["Ideological and Moral Cultivation", "思想道德修养与法律基础", "Humanities"],`
			`"logic": ["Logic", "逻辑学", "Humanities"],`
			`"law": ["Law", "法学", "Humanities"],`
			`"chinese_language_and_literature": ["Chinese Language and Literature", "中国语言文学", "Humanities"],`
			`"art_studies": ["Art Studies", "艺术学", "Humanities"],`
			`"professional_tour_guide": ["Professional Tour Guide", "导游资格", "Humanities"],`
			`"legal_professional": ["Legal Professional", "法律职业资格", "Humanities"],`
			`"high_school_chinese": ["High School Chinese", "高中语文", "Humanities"],`
			`"high_school_history": ["High School History", "高中历史", "Humanities"],`
			`"middle_school_history": ["Middle School History", "初中历史", "Humanities"],`
			`"civil_servant": ["Civil Servant", "公务员", "Other"],`
			`"sports_science": ["Sports Science", "体育学", "Other"],`
			`"plant_protection": ["Plant Protection", "植物保护", "Other"],`
			`"basic_medicine": ["Basic Medicine", "基础医学", "Other"],`
			`"clinical_medicine": ["Clinical Medicine", "临床医学", "Other"],`
			`"urban_and_rural_planner": ["Urban and Rural Planner", "注册城乡规划师", "Other"],`
			`"accountant": ["Accountant", "注册会计师", "Other"],`
			`"fire_engineer": ["Fire Engineer", "注册消防工程师", "Other"],`
[pre-commit.ci] pre-commit autoupdate (#5572) * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/PyCQA/autoflake: v2.2.1 → v2.3.1](https://github.com/PyCQA/autoflake/compare/v2.2.1...v2.3.1) - [github.com/pycqa/isort: 5.12.0 → 5.13.2](https://github.com/pycqa/isort/compare/5.12.0...5.13.2) - [github.com/psf/black-pre-commit-mirror: 23.9.1 → 24.4.2](https://github.com/psf/black-pre-commit-mirror/compare/23.9.1...24.4.2) - [github.com/pre-commit/mirrors-clang-format: v13.0.1 → v18.1.7](https://github.com/pre-commit/mirrors-clang-format/compare/v13.0.1...v18.1.7) - [github.com/pre-commit/pre-commit-hooks: v4.3.0 → v4.6.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.3.0...v4.6.0) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 5 months ago			`"environmental_impact_assessment_engineer": [`
			`"Environmental Impact Assessment Engineer",`
			`"环境影响评价工程师",`
			`"Other",`
			`],`
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786) * Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> 1 year ago			`"tax_accountant": ["Tax Accountant", "税务师", "Other"],`
			`"physician": ["Physician", "医师资格", "Other"],`
			`}`

			`default_inference_kwargs = {`
			`"calculate_loss": False,`
			`"all_classes": ["A", "B", "C", "D"],`
			`"language": "Chinese",`
[ColossalEval] support for vllm (#6056) * support vllm * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * modify vllm and update readme * run pre-commit * remove dupilicated lines and refine code * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update param name * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refine code * update readme * refine code * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2 months ago			`"calculate_overall_loss": False,`
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786) * Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> 1 year ago			`"max_new_tokens": 32,`
			`}`


[ColossalEval] Support GSM, Data Leakage Evaluation and Tensor Parallel (#5169) * Support GSM, Data Leakage Evaluation and Tensor Parallel * remove redundant code and update inference.py in examples/gpt_evaluation --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> 12 months ago			`def get_few_shot_data(data: List[Dict], subject):`
			`few_shot_data = [f"以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。"]`
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786) * Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> 1 year ago			`for i in data:`
			`few_shot_data.append(i["input"] + i["target"])`
			`return few_shot_data`


			`class CEvalDataset(BaseDataset):`
			`"""`
			`Dataset class for CEval dataset.`
			`Data source: https://huggingface.co/datasets/ceval/ceval-exam`
			`This dataset class will convert the original dataset into the inference dataset.`
			`"""`

			`@staticmethod`
[feat] Dist Loader for Eval (#5950) * support auto distributed data loader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * support auto distributed data loader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix tp error * remove unused parameters * remove unused * update inference * update docs * update inference --------- Co-authored-by: Michelle <qianranma8@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 4 months ago			`def load(path: str, logger: DistributedLogger, few_shot: bool, args, *kwargs) -> List[Dict]:`
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786) * Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> 1 year ago			`dataset = {"dev": {}, "test": {}}`
			`for split in ["dev", "test"]:`
			`files = os.listdir(os.path.join(path, split))`
			`files.sort()`

			`for file in files:`
			`subject = file[0 : -len(f"_{split}.csv")]`
			`subject = ceval_subject_mapping[subject][1]`

			`file_dir = os.path.join(path, split, file)`

			`dataset[split][subject] = {"data": []}`

			`# It's been tested that each data sample in one subcategory have same inference arguments.`
			`dataset[split][subject]["inference_kwargs"] = copy.deepcopy(default_inference_kwargs)`

			`if split == "test" and few_shot:`
			`dataset[split][subject]["inference_kwargs"]["few_shot_data"] = get_few_shot_data(`
[ColossalEval] Support GSM, Data Leakage Evaluation and Tensor Parallel (#5169) * Support GSM, Data Leakage Evaluation and Tensor Parallel * remove redundant code and update inference.py in examples/gpt_evaluation --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> 12 months ago			`dataset["dev"][subject]["data"], subject`
[feature] ColossalEval: Evaluation Pipeline for LLMs (#4786) * Add ColossalEval * Delete evaluate in Chat --------- Co-authored-by: Xu Yuanchen <yuanchen.xu00@gmail.com> Co-authored-by: Tong Li <tong.li352711588@gmail.com> 1 year ago			`)`

			`with open(file_dir, encoding="utf-8") as f:`
			`reader = csv.reader(f)`
			`_ = next(reader)`
			`for row in reader:`
			`# Dev split have answer and explanation so len(row) is 8`
			`# But test split doesn't contain answer and explanation, so len(row) is 6`
			`assert len(row) >= 6`
			`choices = f"A. {row[2]}\nB. {row[3]}\nC. {row[4]}\nD. {row[5]}"`
			`data_sample = {`
			`"dataset": "ceval",`
			`"split": split,`
			`"category": subject,`
			`"instruction": f"以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。",`
			`"input": f"题目：{row[1]}\n{choices}\n答案：",`
			`"output": "",`
			`"target": row[6] if split == "dev" else "",`
			`"id": int(row[0]),`
			`}`

			`dataset[split][subject]["data"].append(data_sample)`

			`return dataset`