|
|
|
import copy
|
|
|
|
import os
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import torch
|
|
|
|
import torch.distributed as dist
|
|
|
|
from coati.experience_buffer import NaiveExperienceBuffer
|
|
|
|
from coati.experience_maker import NaiveExperienceMaker
|
|
|
|
from coati.models.base import RewardModel
|
|
|
|
from coati.models.gpt import GPTActor, GPTCritic
|
|
|
|
from coati.trainer.ppo import _set_default_generate_kwargs
|
|
|
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy
|
|
|
|
from coati.trainer.strategies.colossalai import LowLevelZeroStrategy
|
|
|
|
from transformers.models.gpt2.configuration_gpt2 import GPT2Config
|
|
|
|
|
|
|
|
from colossalai.testing import rerun_if_address_is_in_use, spawn
|
|
|
|
|
|
|
|
GPT_CONFIG = GPT2Config(n_embd=128, n_layer=4, n_head=4)
|
|
|
|
|
|
|
|
|
|
|
|
def get_data(batch_size: int, seq_len: int = 10) -> dict:
|
|
|
|
input_ids = torch.randint(0, 50257, (batch_size, seq_len), device="cuda")
|
|
|
|
attention_mask = torch.ones_like(input_ids)
|
|
|
|
return dict(input_ids=input_ids, attention_mask=attention_mask)
|
|
|
|
|
|
|
|
|
|
|
|
def gather_and_equal(tensor: torch.Tensor) -> bool:
|
|
|
|
world_size = dist.get_world_size()
|
|
|
|
outputs = [torch.empty_like(tensor) for _ in range(world_size)]
|
|
|
|
dist.all_gather(outputs, tensor.contiguous())
|
|
|
|
for t in outputs[1:]:
|
|
|
|
if not torch.equal(outputs[0], t):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def make_and_consume_experience(strategy):
|
|
|
|
EXPERIENCE_BATCH_SIZE = 4
|
|
|
|
SAMPLE_BATCH_SIZE = 2
|
|
|
|
|
|
|
|
if strategy == "ddp":
|
|
|
|
strategy = DDPStrategy()
|
|
|
|
elif strategy == "colossalai-zero2":
|
|
|
|
strategy = LowLevelZeroStrategy()
|
|
|
|
elif strategy == "colossalai-gemini":
|
|
|
|
strategy = GeminiStrategy(placement_policy="static")
|
|
|
|
else:
|
|
|
|
raise ValueError(f'Unsupported strategy "{strategy}"')
|
|
|
|
|
|
|
|
with strategy.model_init_context():
|
|
|
|
actor = GPTActor(config=GPT_CONFIG).cuda()
|
|
|
|
critic = GPTCritic(config=GPT_CONFIG).cuda()
|
|
|
|
|
|
|
|
initial_model = GPTActor(config=GPT_CONFIG).cuda()
|
|
|
|
reward_model = RewardModel(model=copy.deepcopy(critic.model)).cuda()
|
|
|
|
|
|
|
|
actor, critic, initial_model, reward_model = strategy.prepare(actor, critic, initial_model, reward_model)
|
|
|
|
|
|
|
|
class MockTokenizer:
|
|
|
|
def __init__(self):
|
|
|
|
self.padding_side = "left"
|
|
|
|
self.eos_token_id = 0
|
|
|
|
self.pad_token_id = 0
|
|
|
|
|
|
|
|
tokenizer = MockTokenizer()
|
|
|
|
experience_maker = NaiveExperienceMaker(actor, critic, reward_model, initial_model, tokenizer)
|
|
|
|
data_buffer = NaiveExperienceBuffer(SAMPLE_BATCH_SIZE, cpu_offload=False)
|
|
|
|
|
|
|
|
generate_kwargs = dict(do_sample=True, max_length=16)
|
|
|
|
generate_kwargs = _set_default_generate_kwargs(strategy, generate_kwargs, actor)
|
|
|
|
|
|
|
|
# experience of all ranks should be the same
|
|
|
|
for _ in range(2):
|
|
|
|
data = get_data(EXPERIENCE_BATCH_SIZE)
|
|
|
|
assert gather_and_equal(data["input_ids"])
|
|
|
|
assert gather_and_equal(data["attention_mask"])
|
|
|
|
experience = experience_maker.make_experience(**data, do_sample=True, max_length=16)
|
|
|
|
assert gather_and_equal(experience.sequences)
|
|
|
|
assert gather_and_equal(experience.action_log_probs)
|
|
|
|
assert gather_and_equal(experience.values)
|
|
|
|
assert gather_and_equal(experience.reward)
|
|
|
|
assert gather_and_equal(experience.advantages)
|
|
|
|
assert gather_and_equal(experience.action_mask)
|
|
|
|
assert gather_and_equal(experience.attention_mask)
|
|
|
|
data_buffer.append(experience)
|
|
|
|
|
|
|
|
# data buffer's data should be the same
|
|
|
|
buffer_size = torch.tensor([len(data_buffer)], device="cuda")
|
|
|
|
assert gather_and_equal(buffer_size)
|
|
|
|
for item in data_buffer.items:
|
|
|
|
assert gather_and_equal(item.sequences)
|
|
|
|
assert gather_and_equal(item.action_log_probs)
|
|
|
|
assert gather_and_equal(item.values)
|
|
|
|
assert gather_and_equal(item.reward)
|
|
|
|
assert gather_and_equal(item.advantages)
|
|
|
|
assert gather_and_equal(item.action_mask)
|
|
|
|
assert gather_and_equal(item.attention_mask)
|
|
|
|
|
|
|
|
# dataloader of each rank should have the same size and different batch
|
|
|
|
dataloader = strategy.setup_dataloader(data_buffer)
|
|
|
|
dataloader_size = torch.tensor([len(dataloader)], device="cuda")
|
|
|
|
assert gather_and_equal(dataloader_size)
|
|
|
|
for experience in dataloader:
|
|
|
|
assert not gather_and_equal(experience.sequences)
|
|
|
|
assert not gather_and_equal(experience.action_log_probs)
|
|
|
|
assert not gather_and_equal(experience.values)
|
|
|
|
assert not gather_and_equal(experience.reward)
|
|
|
|
assert not gather_and_equal(experience.advantages)
|
|
|
|
# action mask and attention mask may be same
|
|
|
|
|
|
|
|
|
|
|
|
def run_dist(rank, world_size, port, strategy):
|
|
|
|
os.environ["RANK"] = str(rank)
|
|
|
|
os.environ["LOCAL_RANK"] = str(rank)
|
|
|
|
os.environ["WORLD_SIZE"] = str(world_size)
|
|
|
|
os.environ["MASTER_ADDR"] = "localhost"
|
|
|
|
os.environ["MASTER_PORT"] = str(port)
|
|
|
|
make_and_consume_experience(strategy)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.dist
|
|
|
|
@pytest.mark.parametrize("world_size", [2])
|
|
|
|
@pytest.mark.parametrize("strategy", ["ddp", "colossalai-zero2", "colossalai-gemini"])
|
|
|
|
@rerun_if_address_is_in_use()
|
|
|
|
def test_experience(world_size, strategy):
|
|
|
|
spawn(run_dist, world_size, strategy=strategy)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_experience(2, "colossalai-zero2")
|