mirror of https://github.com/hpcaitech/ColossalAI
113 lines
4.0 KiB
Python
113 lines
4.0 KiB
Python
|
from typing import Callable
|
||
|
|
||
|
from torch.utils.data import Dataset
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
from .utils import is_rank_0
|
||
|
|
||
|
|
||
|
# Dahaos/rm-static
|
||
|
class RmStaticDataset(Dataset):
|
||
|
"""
|
||
|
Dataset for reward model
|
||
|
|
||
|
Args:
|
||
|
dataset: dataset for reward model
|
||
|
tokenizer: tokenizer for reward model
|
||
|
max_length: max length of input
|
||
|
special_token: special token at the end of sentence
|
||
|
"""
|
||
|
|
||
|
def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:
|
||
|
super().__init__()
|
||
|
self.chosen = []
|
||
|
self.reject = []
|
||
|
if special_token is None:
|
||
|
self.end_token = tokenizer.eos_token
|
||
|
else:
|
||
|
self.end_token = special_token
|
||
|
for data in tqdm(dataset, disable=not is_rank_0()):
|
||
|
prompt = data['prompt']
|
||
|
|
||
|
chosen = prompt + data['chosen'] + self.end_token
|
||
|
chosen_token = tokenizer(chosen,
|
||
|
max_length=max_length,
|
||
|
padding="max_length",
|
||
|
truncation=True,
|
||
|
return_tensors="pt")
|
||
|
self.chosen.append({
|
||
|
"input_ids": chosen_token['input_ids'],
|
||
|
"attention_mask": chosen_token['attention_mask']
|
||
|
})
|
||
|
|
||
|
reject = prompt + data['rejected'] + self.end_token
|
||
|
reject_token = tokenizer(reject,
|
||
|
max_length=max_length,
|
||
|
padding="max_length",
|
||
|
truncation=True,
|
||
|
return_tensors="pt")
|
||
|
self.reject.append({
|
||
|
"input_ids": reject_token['input_ids'],
|
||
|
"attention_mask": reject_token['attention_mask']
|
||
|
})
|
||
|
|
||
|
def __len__(self):
|
||
|
length = len(self.chosen)
|
||
|
return length
|
||
|
|
||
|
def __getitem__(self, idx):
|
||
|
return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx][
|
||
|
"input_ids"], self.reject[idx]["attention_mask"]
|
||
|
|
||
|
|
||
|
# Anthropic/hh-rlhf
|
||
|
class HhRlhfDataset(Dataset):
|
||
|
"""
|
||
|
Dataset for reward model
|
||
|
|
||
|
Args:
|
||
|
dataset: dataset for reward model
|
||
|
tokenizer: tokenizer for reward model
|
||
|
max_length: max length of input
|
||
|
special_token: special token at the end of sentence
|
||
|
"""
|
||
|
|
||
|
def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:
|
||
|
super().__init__()
|
||
|
self.chosen = []
|
||
|
self.reject = []
|
||
|
if special_token is None:
|
||
|
self.end_token = tokenizer.eos_token
|
||
|
else:
|
||
|
self.end_token = special_token
|
||
|
for data in tqdm(dataset, disable=not is_rank_0()):
|
||
|
chosen = data['chosen'] + self.end_token
|
||
|
chosen_token = tokenizer(chosen,
|
||
|
max_length=max_length,
|
||
|
padding="max_length",
|
||
|
truncation=True,
|
||
|
return_tensors="pt")
|
||
|
self.chosen.append({
|
||
|
"input_ids": chosen_token['input_ids'],
|
||
|
"attention_mask": chosen_token['attention_mask']
|
||
|
})
|
||
|
|
||
|
reject = data['rejected'] + self.end_token
|
||
|
reject_token = tokenizer(reject,
|
||
|
max_length=max_length,
|
||
|
padding="max_length",
|
||
|
truncation=True,
|
||
|
return_tensors="pt")
|
||
|
self.reject.append({
|
||
|
"input_ids": reject_token['input_ids'],
|
||
|
"attention_mask": reject_token['attention_mask']
|
||
|
})
|
||
|
|
||
|
def __len__(self):
|
||
|
length = len(self.chosen)
|
||
|
return length
|
||
|
|
||
|
def __getitem__(self, idx):
|
||
|
return self.chosen[idx]["input_ids"], self.chosen[idx]["attention_mask"], self.reject[idx][
|
||
|
"input_ids"], self.reject[idx]["attention_mask"]
|