mirror of https://github.com/hpcaitech/ColossalAI
parent
79718fae04
commit
3ff60d13b0
|
@ -58,12 +58,12 @@ class DatasetEvaluator(object):
|
||||||
[sample["output"] for sample in self.data[category]["data"]]
|
[sample["output"] for sample in self.data[category]["data"]]
|
||||||
|
|
||||||
flag = False
|
flag = False
|
||||||
softmaxs = []
|
logits = []
|
||||||
for i, sample in enumerate(self.data[category]["data"]):
|
for i, sample in enumerate(self.data[category]["data"]):
|
||||||
if np.any(np.isnan(np.array(list(sample["softmax_over_choices"].values())))):
|
if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))):
|
||||||
if not flag:
|
if not flag:
|
||||||
print(
|
print(
|
||||||
f"NaN in the softmax, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
|
f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
|
||||||
)
|
)
|
||||||
flag = True
|
flag = True
|
||||||
score = 0
|
score = 0
|
||||||
|
@ -79,13 +79,13 @@ class DatasetEvaluator(object):
|
||||||
score,
|
score,
|
||||||
metric_helper.accuracy_by_options(sample["input"], sample["output"], ref),
|
metric_helper.accuracy_by_options(sample["input"], sample["output"], ref),
|
||||||
)
|
)
|
||||||
softmaxs.append(references[i] if score == 1 else -1)
|
logits.append(references[i] if score == 1 else -1)
|
||||||
else:
|
else:
|
||||||
softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values()))))
|
logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values()))))
|
||||||
|
|
||||||
references = np.array(references)
|
references = np.array(references)
|
||||||
softmaxs = np.array(softmaxs)
|
logits = np.array(logits)
|
||||||
scores = np.sum(references == softmaxs) / len(self.data[category]["data"]) * 100
|
scores = np.sum(references == logits) / len(self.data[category]["data"]) * 100
|
||||||
|
|
||||||
self.evaluation_results[metric][category] = (scores, len(self.data[category]["data"]))
|
self.evaluation_results[metric][category] = (scores, len(self.data[category]["data"]))
|
||||||
self.evaluation_results[metric]["ALL"] += scores * weight
|
self.evaluation_results[metric]["ALL"] += scores * weight
|
||||||
|
@ -105,12 +105,12 @@ class DatasetEvaluator(object):
|
||||||
predictions = [sample["output"] for sample in self.data[category]["data"]]
|
predictions = [sample["output"] for sample in self.data[category]["data"]]
|
||||||
|
|
||||||
flag = False
|
flag = False
|
||||||
softmaxs = []
|
logits = []
|
||||||
for i, sample in enumerate(self.data[category]["data"]):
|
for i, sample in enumerate(self.data[category]["data"]):
|
||||||
if np.any(np.isnan(np.array(list(sample["softmax_over_choices"].values())))):
|
if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))):
|
||||||
if not flag:
|
if not flag:
|
||||||
print(
|
print(
|
||||||
f"NaN in the softmax, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
|
f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
|
||||||
)
|
)
|
||||||
flag = True
|
flag = True
|
||||||
score = 0
|
score = 0
|
||||||
|
@ -121,16 +121,14 @@ class DatasetEvaluator(object):
|
||||||
sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"]
|
sample["output"], ref, all_classes=self.data[category]["inference_kwargs"]["all_classes"]
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
softmaxs.append(references[i] if score == 1 else -1)
|
logits.append(references[i] if score == 1 else -1)
|
||||||
else:
|
else:
|
||||||
softmaxs.append(np.argmax(np.array(list(sample["softmax_over_choices"].values()))))
|
logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values()))))
|
||||||
|
|
||||||
metric_method = eval("metric_helper." + metric)
|
metric_method = eval("metric_helper." + metric)
|
||||||
|
|
||||||
total_score = 0.0
|
total_score = 0.0
|
||||||
for prediction, reference, references_label, softmax in zip(
|
for prediction, reference, references_label, softmax in zip(predictions, references, references_labels, logits):
|
||||||
predictions, references, references_labels, softmaxs
|
|
||||||
):
|
|
||||||
score = 0.0
|
score = 0.0
|
||||||
|
|
||||||
for ref in reference:
|
for ref in reference:
|
||||||
|
|
|
@ -116,10 +116,10 @@ class HuggingFaceModel(BaseModel):
|
||||||
shard_config: Shard config for tensor parallel.
|
shard_config: Shard config for tensor parallel.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
model_kwargs.setdefault("torch_dtype", torch.float16)
|
|
||||||
|
|
||||||
if "torch_dtype" in model_kwargs:
|
if "torch_dtype" in model_kwargs:
|
||||||
model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"])
|
model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"])
|
||||||
|
else:
|
||||||
|
model_kwargs.setdefault("torch_dtype", torch.float16)
|
||||||
|
|
||||||
if "config" in model_kwargs:
|
if "config" in model_kwargs:
|
||||||
model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"])
|
model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"])
|
||||||
|
@ -586,11 +586,10 @@ class HuggingFaceCausalLM(HuggingFaceModel):
|
||||||
shard_config: Shard config for tensor parallel.
|
shard_config: Shard config for tensor parallel.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_kwargs.setdefault("torch_dtype", torch.float16)
|
|
||||||
|
|
||||||
if "torch_dtype" in model_kwargs:
|
if "torch_dtype" in model_kwargs:
|
||||||
model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"])
|
model_kwargs["torch_dtype"] = eval(model_kwargs["torch_dtype"])
|
||||||
|
else:
|
||||||
|
model_kwargs.setdefault("torch_dtype", torch.float16)
|
||||||
|
|
||||||
if "config" in model_kwargs:
|
if "config" in model_kwargs:
|
||||||
model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"])
|
model_kwargs["config"] = AutoConfig.from_pretrained(model_kwargs["config"])
|
||||||
|
|
Loading…
Reference in New Issue