ColossalAI/applications/Chat/coati/dataset/reward_dataset.py

from typing import Callable

from torch.utils.data import Dataset
from tqdm import tqdm

from .utils import is_rank_0


# Dahoas/rm-static
class RmStaticDataset(Dataset):
    """
    Dataset for reward model

    Args:
        dataset: dataset for reward model
        tokenizer: tokenizer for reward model
        max_length: max length of input
        special_token: special token at the end of sentence
    """

    def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:
        super().__init__()
        self.end_token = tokenizer.eos_token if special_token is None else special_token

        chosen = [data["prompt"] + data["chosen"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]
        chosen_token = tokenizer(
            chosen, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        self.chosen = {"input_ids": chosen_token["input_ids"], "attention_mask": chosen_token["attention_mask"]}

        reject = [data["prompt"] + data["rejected"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]
        reject_token = tokenizer(
            reject, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        self.reject = {"input_ids": reject_token["input_ids"], "attention_mask": reject_token["attention_mask"]}

    def __len__(self):
        length = self.chosen["input_ids"].shape[0]
        return length

    def __getitem__(self, idx):
        return (
            self.chosen["input_ids"][idx],
            self.chosen["attention_mask"][idx],
            self.reject["input_ids"][idx],
            self.reject["attention_mask"][idx],
        )


# Anthropic/hh-rlhf
class HhRlhfDataset(Dataset):
    """
    Dataset for reward model

    Args:
        dataset: dataset for reward model
        tokenizer: tokenizer for reward model
        max_length: max length of input
        special_token: special token at the end of sentence
    """

    def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:
        super().__init__()
        self.end_token = tokenizer.eos_token if special_token is None else special_token

        chosen = [data["chosen"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]
        chosen_token = tokenizer(
            chosen, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        self.chosen = {"input_ids": chosen_token["input_ids"], "attention_mask": chosen_token["attention_mask"]}

        reject = [data["rejected"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]
        reject_token = tokenizer(
            reject, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        self.reject = {"input_ids": reject_token["input_ids"], "attention_mask": reject_token["attention_mask"]}

    def __len__(self):
        length = self.chosen["input_ids"].shape[0]
        return length

    def __getitem__(self, idx):
        return (
            self.chosen["input_ids"][idx],
            self.chosen["attention_mask"][idx],
            self.reject["input_ids"][idx],
            self.reject["attention_mask"][idx],
        )
[Coati] first commit (#3283) 2 years ago			`from typing import Callable`

			`from torch.utils.data import Dataset`
			`from tqdm import tqdm`

			`from .utils import is_rank_0`


[NFC]fix typo colossalai/auto_parallel nn utils etc. (#3779) * fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc. 2 years ago			`# Dahoas/rm-static`
[Coati] first commit (#3283) 2 years ago			`class RmStaticDataset(Dataset):`
			`"""`
			`Dataset for reward model`

			`Args:`
			`dataset: dataset for reward model`
			`tokenizer: tokenizer for reward model`
			`max_length: max length of input`
			`special_token: special token at the end of sentence`
			`"""`

			`def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:`
			`super().__init__()`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`self.end_token = tokenizer.eos_token if special_token is None else special_token`

			`chosen = [data["prompt"] + data["chosen"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]`
			`chosen_token = tokenizer(`
			`chosen, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"`
			`)`
			`self.chosen = {"input_ids": chosen_token["input_ids"], "attention_mask": chosen_token["attention_mask"]}`

			`reject = [data["prompt"] + data["rejected"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]`
			`reject_token = tokenizer(`
			`reject, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"`
			`)`
			`self.reject = {"input_ids": reject_token["input_ids"], "attention_mask": reject_token["attention_mask"]}`
[Coati] first commit (#3283) 2 years ago
			`def __len__(self):`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 1 year ago			`length = self.chosen["input_ids"].shape[0]`
[Coati] first commit (#3283) 2 years ago			`return length`

			`def __getitem__(self, idx):`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`return (`
			`self.chosen["input_ids"][idx],`
			`self.chosen["attention_mask"][idx],`
			`self.reject["input_ids"][idx],`
			`self.reject["attention_mask"][idx],`
			`)`
[Coati] first commit (#3283) 2 years ago

			`# Anthropic/hh-rlhf`
			`class HhRlhfDataset(Dataset):`
			`"""`
			`Dataset for reward model`

			`Args:`
			`dataset: dataset for reward model`
			`tokenizer: tokenizer for reward model`
			`max_length: max length of input`
			`special_token: special token at the end of sentence`
			`"""`

			`def __init__(self, dataset, tokenizer: Callable, max_length: int, special_token=None) -> None:`
			`super().__init__()`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`self.end_token = tokenizer.eos_token if special_token is None else special_token`

			`chosen = [data["chosen"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]`
			`chosen_token = tokenizer(`
			`chosen, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"`
			`)`
			`self.chosen = {"input_ids": chosen_token["input_ids"], "attention_mask": chosen_token["attention_mask"]}`

			`reject = [data["rejected"] + self.end_token for data in tqdm(dataset, disable=not is_rank_0())]`
			`reject_token = tokenizer(`
			`reject, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt"`
			`)`
			`self.reject = {"input_ids": reject_token["input_ids"], "attention_mask": reject_token["attention_mask"]}`
[Coati] first commit (#3283) 2 years ago
			`def __len__(self):`
[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 1 year ago			`length = self.chosen["input_ids"].shape[0]`
[Coati] first commit (#3283) 2 years ago			`return length`

			`def __getitem__(self, idx):`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 1 year ago			`return (`
			`self.chosen["input_ids"][idx],`
			`self.chosen["attention_mask"][idx],`
			`self.reject["input_ids"][idx],`
			`self.reject["attention_mask"][idx],`
			`)`