ColossalAI/applications/Chat/coati/dataset/reward_dataset.py

from typing import Callable

from torch.utils.data import Dataset
from tqdm import tqdm

from .utils import is_rank_0


# Dahoas/rm-static
class RmStaticDataset(Dataset):
    """
    Dataset for reward model

    Args:
        dataset: dataset for reward model
        tokenizer: tokenizer for reward model
        max_length: max length of input
        special_token: special token at the end of sentence
    """

    def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:
        super().__init__()
        self.end_token = tokenizer.eos_token \
            if special_token is None else special_token

        chosen = [
            data["prompt"] + data["chosen"] + self.end_token
            for data in tqdm(dataset, disable=not is_rank_0())
        ]
        chosen_token = tokenizer(chosen,
                                 max_length=max_length,
                                 padding="max_length",
                                 truncation=True,
                                 return_tensors="pt")
        self.chosen = {
            "input_ids": chosen_token["input_ids"],
            "attention_mask": chosen_token["attention_mask"]
        }

        reject = [
            data["prompt"] + data["rejected"] + self.end_token
            for data in tqdm(dataset, disable=not is_rank_0())
        ]
        reject_token = tokenizer(reject,
                                 max_length=max_length,
                                 padding="max_length",
                                 truncation=True,
                                 return_tensors="pt")
        self.reject = {
            "input_ids": reject_token["input_ids"],
            "attention_mask": reject_token["attention_mask"]
        }

    def __len__(self):
        length = self.chosen["input_ids"].shape[0]
        return length

    def __getitem__(self, idx):
        return self.chosen["input_ids"][idx], self.chosen["attention_mask"][idx], \
            self.reject["input_ids"][idx], self.reject["attention_mask"][idx]


# Anthropic/hh-rlhf
class HhRlhfDataset(Dataset):
    """
    Dataset for reward model

    Args:
        dataset: dataset for reward model
        tokenizer: tokenizer for reward model
        max_length: max length of input
        special_token: special token at the end of sentence
    """

    def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:
        super().__init__()
        self.end_token = tokenizer.eos_token \
            if special_token is None else special_token

        chosen = [
            data["chosen"] + self.end_token
            for data in tqdm(dataset, disable=not is_rank_0())
        ]
        chosen_token = tokenizer(chosen,
                                 max_length=max_length,
                                 padding="max_length",
                                 truncation=True,
                                 return_tensors="pt")
        self.chosen = {
            "input_ids": chosen_token["input_ids"],
            "attention_mask": chosen_token["attention_mask"]
        }

        reject = [
            data["rejected"] + self.end_token
            for data in tqdm(dataset, disable=not is_rank_0())
        ]
        reject_token = tokenizer(reject,
                                 max_length=max_length,
                                 padding="max_length",
                                 truncation=True,
                                 return_tensors="pt")
        self.reject = {
            "input_ids": reject_token["input_ids"],
            "attention_mask": reject_token["attention_mask"]
        }

    def __len__(self):
        length = self.chosen["input_ids"].shape[0]
        return length

    def __getitem__(self, idx):
        return self.chosen["input_ids"][idx], self.chosen["attention_mask"][idx], \
            self.reject["input_ids"][idx], self.reject["attention_mask"][idx]
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`from typing import Callable`

			`from torch.utils.data import Dataset`
			`from tqdm import tqdm`

			`from .utils import is_rank_0`


[NFC]fix typo colossalai/auto_parallel nn utils etc. (#3779) * fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. 2023-05-23 07:28:20 +00:00			`# Dahoas/rm-static`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`class RmStaticDataset(Dataset):`
			`"""`
			`Dataset for reward model`

			`Args:`
			`dataset: dataset for reward model`
			`tokenizer: tokenizer for reward model`
			`max_length: max length of input`
			`special_token: special token at the end of sentence`
			`"""`

			`def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:`
			`super().__init__()`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`self.end_token = tokenizer.eos_token \`
			`if special_token is None else special_token`

			`chosen = [`
			`data["prompt"] + data["chosen"] + self.end_token`
			`for data in tqdm(dataset, disable=not is_rank_0())`
			`]`
			`chosen_token = tokenizer(chosen,`
			`max_length=max_length,`
			`padding="max_length",`
			`truncation=True,`
			`return_tensors="pt")`
			`self.chosen = {`
			`"input_ids": chosen_token["input_ids"],`
			`"attention_mask": chosen_token["attention_mask"]`
			`}`

			`reject = [`
			`data["prompt"] + data["rejected"] + self.end_token`
			`for data in tqdm(dataset, disable=not is_rank_0())`
			`]`
			`reject_token = tokenizer(reject,`
			`max_length=max_length,`
			`padding="max_length",`
			`truncation=True,`
			`return_tensors="pt")`
			`self.reject = {`
			`"input_ids": reject_token["input_ids"],`
			`"attention_mask": reject_token["attention_mask"]`
			`}`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`def __len__(self):`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`length = self.chosen["input_ids"].shape[0]`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`return length`

			`def __getitem__(self, idx):`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`return self.chosen["input_ids"][idx], self.chosen["attention_mask"][idx], \`
			`self.reject["input_ids"][idx], self.reject["attention_mask"][idx]`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00

			`# Anthropic/hh-rlhf`
			`class HhRlhfDataset(Dataset):`
			`"""`
			`Dataset for reward model`

			`Args:`
			`dataset: dataset for reward model`
			`tokenizer: tokenizer for reward model`
			`max_length: max length of input`
			`special_token: special token at the end of sentence`
			`"""`

			`def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:`
			`super().__init__()`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`self.end_token = tokenizer.eos_token \`
			`if special_token is None else special_token`

			`chosen = [`
			`data["chosen"] + self.end_token`
			`for data in tqdm(dataset, disable=not is_rank_0())`
			`]`
			`chosen_token = tokenizer(chosen,`
			`max_length=max_length,`
			`padding="max_length",`
			`truncation=True,`
			`return_tensors="pt")`
			`self.chosen = {`
			`"input_ids": chosen_token["input_ids"],`
			`"attention_mask": chosen_token["attention_mask"]`
			`}`

			`reject = [`
			`data["rejected"] + self.end_token`
			`for data in tqdm(dataset, disable=not is_rank_0())`
			`]`
			`reject_token = tokenizer(reject,`
			`max_length=max_length,`
			`padding="max_length",`
			`truncation=True,`
			`return_tensors="pt")`
			`self.reject = {`
			`"input_ids": reject_token["input_ids"],`
			`"attention_mask": reject_token["attention_mask"]`
			`}`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`def __len__(self):`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`length = self.chosen["input_ids"].shape[0]`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`return length`

			`def __getitem__(self, idx):`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`return self.chosen["input_ids"][idx], self.chosen["attention_mask"][idx], \`
			`self.reject["input_ids"][idx], self.reject["attention_mask"][idx]`