ColossalAI/applications/ColossalEval/examples/dataset_evaluation/eval_dataset.py

74 lines
2.7 KiB
Python

import argparse
import os
import tabulate
from colossal_eval.evaluate.dataset_evaluator import DatasetEvaluator
from colossal_eval.utils import jdump, jload
def main(args):
config = jload(args.config)
evaluation_results = {dataset["name"]: {} for dataset in config["dataset"]}
evaluation_results_table = {dataset["name"]: {} for dataset in config["dataset"]}
evaluator = DatasetEvaluator()
for dataset_parameter in config["dataset"]:
dataset_name = dataset_parameter["name"]
metrics = dataset_parameter["metrics"]
results_metric_model = {metric: {model["name"]: None for model in config["model"]} for metric in metrics}
for model in config["model"]:
model_name = model["name"]
data = jload(
os.path.join(args.inference_results_path, model_name, f"{dataset_name}_inference_results.json")
)
results = evaluator.get_evaluation_results(data, dataset_name, model_name, metrics)
for metric, score in results.items():
results_metric_model[metric][model_name] = score["ALL"]
evaluation_results[dataset_name][model_name] = results
evaluation_results_table[dataset_name] = results_metric_model
table = []
header = ["dataset", "metric"] + [model["name"] for model in config["model"]]
table.append(header)
for dataset_parameter in config["dataset"]:
dataset_name = dataset_parameter["name"]
metrics = dataset_parameter["metrics"]
for metric, model_results in evaluation_results_table[dataset_name].items():
row = [dataset_name]
for model, score in model_results.items():
if len(row) == 1:
row.extend([metric, "{:.02f}".format(score)])
else:
row.append("{:.02f}".format(score))
table.append(row)
table = tabulate.tabulate(table, headers="firstrow")
print(table)
os.makedirs(args.evaluation_results_save_path, exist_ok=True)
with open(os.path.join(args.evaluation_results_save_path, "evaluation_results_table.txt"), "w") as file:
file.write(table)
jdump(evaluation_results, os.path.join(args.evaluation_results_save_path, "evaluation_results.json"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="ColossalEval evaluation process.")
parser.add_argument("--config", type=str, default=None, required=True, help="path to config file")
parser.add_argument("--inference_results_path", type=str, default=None, help="path to inference results")
parser.add_argument(
"--evaluation_results_save_path", type=str, default=None, help="path to save evaluation results"
)
args = parser.parse_args()
main(args)