ColossalAI/applications/Chat/coati/experience_buffer/naive.py

import random
from typing import List

import torch
from coati.experience_maker.base import Experience

from .base import ExperienceBuffer
from .utils import BufferItem, make_experience_batch, split_experience_batch


class NaiveExperienceBuffer(ExperienceBuffer):
    """Naive experience buffer class. It stores experience.

    Args:
        sample_batch_size (int): Batch size when sampling.
        limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
        cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
    """

    def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:
        super().__init__(sample_batch_size, limit)
        self.cpu_offload = cpu_offload
        self.target_device = torch.device(f"cuda:{torch.cuda.current_device()}")
        # TODO(ver217): add prefetch
        self.items: List[BufferItem] = []

    @torch.no_grad()
    def append(self, experience: Experience) -> None:
        if self.cpu_offload:
            experience.to_device(torch.device("cpu"))
        items = split_experience_batch(experience)
        self.items.extend(items)
        if self.limit > 0:
            samples_to_remove = len(self.items) - self.limit
            if samples_to_remove > 0:
                self.items = self.items[samples_to_remove:]

    def clear(self) -> None:
        self.items.clear()

    @torch.no_grad()
    def sample(self) -> Experience:
        items = random.sample(self.items, self.sample_batch_size)
        experience = make_experience_batch(items)
        if self.cpu_offload:
            experience.to_device(self.target_device)
        return experience

    def __len__(self) -> int:
        return len(self.items)

    def __getitem__(self, idx: int) -> BufferItem:
        return self.items[idx]

    def collate_fn(self, batch) -> Experience:
        experience = make_experience_batch(batch)
        return experience
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`import random`
			`from typing import List`

			`import torch`
			`from coati.experience_maker.base import Experience`

[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`from .base import ExperienceBuffer`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`from .utils import BufferItem, make_experience_batch, split_experience_batch`


[chat] fix bugs and add unit tests (#4213) * style: rename replay buffer Experience replay is typically for off policy algorithms. Use this name in PPO maybe misleading. * fix: fix wrong zero2 default arg * test: update experience tests * style: rename zero_pad fn * fix: defer init in CycledDataLoader * test: add benchmark test * style: rename internal fn of generation * style: rename internal fn of lora * fix: remove unused loss fn * fix: remove unused utils fn * refactor: remove generate_with_actor fn * fix: fix type annotation * test: add models tests * fix: skip llama due to long execution time * style: modify dataset * style: apply formatter * perf: update reward dataset * fix: fix wrong IGNORE_INDEX in sft dataset * fix: remove DataCollatorForSupervisedDataset * test: add dataset tests * style: apply formatter * style: rename test_ci to test_train * feat: add llama in inference * test: add inference tests * test: change test scripts directory * fix: update ci * fix: fix typo * fix: skip llama due to oom * fix: fix file mod * style: apply formatter * refactor: remove duplicated llama_gptq * style: apply formatter * to: update rm test * feat: add tokenizer arg * feat: add download model script * test: update train tests * fix: modify gemini load and save pretrained * test: update checkpoint io test * to: modify nproc_per_node * fix: do not remove existing dir * fix: modify save path * test: add random choice * fix: fix sft path * fix: enlarge nproc_per_node to avoid oom * fix: add num_retry * fix: make lora config of rm and critic consistent * fix: add warning about lora weights * fix: skip some gpt2 tests * fix: remove grad ckpt in rm and critic due to errors * refactor: directly use Actor in train_sft * test: add more arguments * fix: disable grad ckpt when using lora * fix: fix save_pretrained and related tests * test: enable zero2 tests * revert: remove useless fn * style: polish code * test: modify test args 2023-08-02 02:17:36 +00:00			`class NaiveExperienceBuffer(ExperienceBuffer):`
			`"""Naive experience buffer class. It stores experience.`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`Args:`
			`sample_batch_size (int): Batch size when sampling.`
			`limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.`
			`cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`"""`

			`def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:`
			`super().__init__(sample_batch_size, limit)`
			`self.cpu_offload = cpu_offload`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`self.target_device = torch.device(f"cuda:{torch.cuda.current_device()}")`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`# TODO(ver217): add prefetch`
			`self.items: List[BufferItem] = []`

			`@torch.no_grad()`
			`def append(self, experience: Experience) -> None:`
			`if self.cpu_offload:`
[misc] update pre-commit and run all files (#4752) * [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format 2023-09-19 06:20:26 +00:00			`experience.to_device(torch.device("cpu"))`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`items = split_experience_batch(experience)`
			`self.items.extend(items)`
			`if self.limit > 0:`
			`samples_to_remove = len(self.items) - self.limit`
			`if samples_to_remove > 0:`
			`self.items = self.items[samples_to_remove:]`

			`def clear(self) -> None:`
			`self.items.clear()`

			`@torch.no_grad()`
			`def sample(self) -> Experience:`
			`items = random.sample(self.items, self.sample_batch_size)`
			`experience = make_experience_batch(items)`
			`if self.cpu_offload:`
			`experience.to_device(self.target_device)`
			`return experience`

			`def __len__(self) -> int:`
			`return len(self.items)`

			`def __getitem__(self, idx: int) -> BufferItem:`
			`return self.items[idx]`

			`def collate_fn(self, batch) -> Experience:`
			`experience = make_experience_batch(batch)`
			`return experience`