2023-08-02 02:17:36 +00:00
|
|
|
import copy
|
|
|
|
from typing import Any, Callable, Dict, Tuple
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import torch
|
|
|
|
import torch.nn as nn
|
|
|
|
from coati.models.base import Actor, Critic, RewardModel, get_base_model
|
|
|
|
from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
|
2023-09-19 06:20:26 +00:00
|
|
|
from coati.models.chatglm import ChatGLMActor
|
|
|
|
from coati.models.chatglm.chatglm_tokenizer import ChatGLMTokenizer
|
2023-08-02 02:17:36 +00:00
|
|
|
from coati.models.generation import generate
|
|
|
|
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
2023-09-19 06:20:26 +00:00
|
|
|
from coati.models.llama import LlamaActor
|
2023-08-02 02:17:36 +00:00
|
|
|
from coati.models.lora import LoraLinear, convert_to_lora_module
|
|
|
|
from coati.models.loss import GPTLMLoss, LogExpLoss, LogSigLoss, PolicyLoss, ValueLoss
|
|
|
|
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
2023-09-20 07:53:58 +00:00
|
|
|
from coati.models.utils import calc_action_log_probs, masked_mean
|
2023-09-19 06:20:26 +00:00
|
|
|
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
@pytest.mark.parametrize("batch_size", [4])
|
|
|
|
@pytest.mark.parametrize("seq_len", [32])
|
2023-08-16 10:56:52 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"actor_maker",
|
|
|
|
[
|
|
|
|
lambda: BLOOMActor(),
|
|
|
|
lambda: GPTActor(),
|
2023-09-19 06:20:26 +00:00
|
|
|
# HACK: skip llama due to long execution time
|
|
|
|
# lambda: LlamaActor(),
|
|
|
|
lambda: OPTActor(),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"generate_kwargs",
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"max_length": 64,
|
|
|
|
"use_cache": True,
|
|
|
|
"do_sample": True,
|
|
|
|
"temperature": 1.0,
|
|
|
|
"top_k": 50,
|
|
|
|
}
|
|
|
|
],
|
|
|
|
)
|
2023-08-16 10:56:52 +00:00
|
|
|
def test_generation(actor_maker: Callable[[], Actor], batch_size: int, seq_len: int, generate_kwargs: Dict[str, Any]):
|
2023-09-20 07:53:58 +00:00
|
|
|
class MockTokenizer:
|
|
|
|
def __init__(self):
|
|
|
|
self.padding_side = "left"
|
|
|
|
self.eos_token_id = 0
|
|
|
|
self.pad_token_id = 0
|
|
|
|
|
2023-08-02 02:17:36 +00:00
|
|
|
actor = actor_maker()
|
|
|
|
input_ids = torch.randint(0, 100, (batch_size, seq_len)).cuda()
|
2023-09-20 07:53:58 +00:00
|
|
|
tokenizer = MockTokenizer()
|
|
|
|
sequences = generate(actor.cuda(), input_ids, tokenizer, **generate_kwargs)
|
2023-08-02 02:17:36 +00:00
|
|
|
assert sequences.shape == (batch_size, generate_kwargs["max_length"])
|
|
|
|
|
|
|
|
|
|
|
|
def test_utils():
|
2023-08-16 10:56:52 +00:00
|
|
|
fn_input = {"tensor": torch.ones((10,)), "mask": torch.randint(0, 2, (10,))}
|
2023-08-02 02:17:36 +00:00
|
|
|
fn_output = masked_mean(dim=0, **fn_input)
|
|
|
|
assert fn_output.dim() == 0
|
|
|
|
assert torch.allclose(fn_output, torch.tensor(1.0))
|
|
|
|
|
|
|
|
batch_size = 4
|
|
|
|
seq_len = 32
|
|
|
|
num_labels = 10
|
|
|
|
num_actions = 2
|
|
|
|
fn_input = {
|
2023-09-20 07:53:58 +00:00
|
|
|
"logits": torch.randn((batch_size, seq_len, num_labels)),
|
2023-08-02 02:17:36 +00:00
|
|
|
"sequences": torch.randint(0, num_labels, (batch_size, seq_len)),
|
|
|
|
"num_actions": num_actions,
|
|
|
|
}
|
|
|
|
fn_output = calc_action_log_probs(**fn_input)
|
|
|
|
assert fn_output.shape == (batch_size, num_actions)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("lora_rank", [4])
|
|
|
|
@pytest.mark.parametrize("num_dim", [32])
|
|
|
|
@pytest.mark.parametrize("num_layers", [4])
|
2023-08-16 10:56:52 +00:00
|
|
|
def test_lora(lora_rank: int, num_dim: int, num_layers: int):
|
|
|
|
model = nn.ModuleList([nn.Linear(num_dim, num_dim) for _ in range(num_layers)])
|
2023-08-02 02:17:36 +00:00
|
|
|
lora_model = convert_to_lora_module(model, lora_rank)
|
|
|
|
assert isinstance(lora_model, nn.ModuleList)
|
|
|
|
for i in range(num_layers):
|
|
|
|
assert isinstance(lora_model[i], LoraLinear)
|
|
|
|
assert lora_model[i].lora_A.shape == (lora_rank, num_dim)
|
|
|
|
assert lora_model[i].lora_B.shape == (num_dim, lora_rank)
|
|
|
|
|
|
|
|
old_model = copy.deepcopy(lora_model)
|
|
|
|
for i in range(num_layers):
|
|
|
|
assert isinstance(lora_model[i], LoraLinear)
|
|
|
|
assert torch.allclose(old_model[i].weight, lora_model[i].weight)
|
|
|
|
assert torch.allclose(old_model[i].bias, lora_model[i].bias)
|
2023-08-16 10:56:52 +00:00
|
|
|
assert torch.allclose(old_model[i].lora_B @ old_model[i].lora_A, lora_model[i].lora_B @ lora_model[i].lora_A)
|
2023-08-02 02:17:36 +00:00
|
|
|
optimizer = torch.optim.Adam(lora_model.parameters())
|
|
|
|
x = torch.randn(8, num_dim)
|
|
|
|
for i in range(num_layers):
|
|
|
|
x = lora_model[i](x)
|
|
|
|
loss = x.sum()
|
|
|
|
loss.backward()
|
|
|
|
optimizer.step()
|
|
|
|
for i in range(num_layers):
|
|
|
|
assert isinstance(lora_model[i], LoraLinear)
|
|
|
|
assert torch.allclose(old_model[i].weight, lora_model[i].weight)
|
|
|
|
assert torch.allclose(old_model[i].bias, lora_model[i].bias)
|
2023-09-19 06:20:26 +00:00
|
|
|
assert not torch.allclose(
|
|
|
|
old_model[i].lora_B @ old_model[i].lora_A, lora_model[i].lora_B @ lora_model[i].lora_A
|
|
|
|
)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("batch_size", [8])
|
|
|
|
@pytest.mark.parametrize("seq_len", [128])
|
2023-08-16 10:56:52 +00:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"models_maker",
|
|
|
|
[
|
|
|
|
lambda: (BLOOMActor(), BLOOMCritic(), BLOOMRM()),
|
|
|
|
lambda: (GPTActor(), GPTCritic(), GPTRM()),
|
2023-09-19 06:20:26 +00:00
|
|
|
# HACK: skip llama due to long execution time
|
|
|
|
# lambda: (LlamaActor(), LlamaCritic(), LlamaRM()),
|
|
|
|
lambda: (OPTActor(), OPTCritic(), OPTRM()),
|
|
|
|
lambda: (ChatGLMActor(), None, None),
|
|
|
|
],
|
|
|
|
)
|
2023-08-02 02:17:36 +00:00
|
|
|
@torch.no_grad()
|
2023-09-19 06:20:26 +00:00
|
|
|
def test_models(models_maker: Callable[[], Tuple[Actor, Critic, RewardModel]], batch_size: int, seq_len: int):
|
2023-08-02 02:17:36 +00:00
|
|
|
actor_input = {
|
|
|
|
"input_ids": torch.randint(0, 100, (batch_size, seq_len)),
|
2023-09-19 06:20:26 +00:00
|
|
|
"attention_mask": torch.randint(0, 2, (batch_size, seq_len)),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
|
|
|
critic_input = {
|
|
|
|
"sequences": torch.randint(0, 100, (batch_size, seq_len)),
|
2023-09-19 06:20:26 +00:00
|
|
|
"attention_mask": torch.randint(0, 2, (batch_size, seq_len)),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
|
|
|
rm_input = {
|
|
|
|
"sequences": torch.randint(0, 100, (batch_size, seq_len)),
|
2023-09-19 06:20:26 +00:00
|
|
|
"attention_mask": torch.randint(0, 2, (batch_size, seq_len)),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
actor, critic, rm = models_maker()
|
2023-08-29 09:58:51 +00:00
|
|
|
if isinstance(actor, ChatGLMActor):
|
|
|
|
actor = actor.float()
|
2023-09-19 06:20:26 +00:00
|
|
|
tokenizer = ChatGLMTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
2023-08-29 09:58:51 +00:00
|
|
|
chatglm_special_token = torch.tensor([tokenizer.gmask_token_id, tokenizer.bos_token_id]).repeat(batch_size, 1)
|
2023-09-19 06:20:26 +00:00
|
|
|
actor_input = {
|
|
|
|
"input_ids": torch.cat(
|
|
|
|
(
|
|
|
|
torch.randint(0, 100, (batch_size, seq_len // 2)),
|
|
|
|
chatglm_special_token,
|
|
|
|
torch.randint(0, 100, (batch_size, seq_len // 2 - 2)),
|
|
|
|
),
|
|
|
|
dim=1,
|
|
|
|
),
|
|
|
|
"attention_mask": torch.randint(0, 2, (batch_size, 1, seq_len, seq_len)),
|
|
|
|
}
|
2023-08-02 02:17:36 +00:00
|
|
|
assert isinstance(actor, Actor)
|
2023-09-19 06:20:26 +00:00
|
|
|
get_base_model(actor)
|
2023-08-02 02:17:36 +00:00
|
|
|
actor_output = actor(**actor_input)
|
|
|
|
assert actor_output.logits.shape[:2] == (batch_size, seq_len)
|
2023-08-29 09:58:51 +00:00
|
|
|
|
|
|
|
if critic:
|
|
|
|
assert isinstance(critic, Critic)
|
2023-09-19 06:20:26 +00:00
|
|
|
get_base_model(critic)
|
2023-08-29 09:58:51 +00:00
|
|
|
critic_output = critic(**critic_input)
|
2023-09-19 06:20:26 +00:00
|
|
|
assert critic_output.shape == (batch_size,)
|
|
|
|
|
2023-08-29 09:58:51 +00:00
|
|
|
if rm:
|
|
|
|
assert isinstance(rm, RewardModel)
|
2023-09-19 06:20:26 +00:00
|
|
|
get_base_model(rm)
|
2023-08-29 09:58:51 +00:00
|
|
|
rm_output = rm(**rm_input)
|
2023-09-19 06:20:26 +00:00
|
|
|
assert rm_output.shape == (batch_size,)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("batch_size", [16])
|
|
|
|
@pytest.mark.parametrize("seq_len", [128])
|
|
|
|
@pytest.mark.parametrize("num_labels", [100])
|
2023-08-16 10:56:52 +00:00
|
|
|
def test_loss(batch_size: int, seq_len: int, num_labels: int):
|
2023-08-02 02:17:36 +00:00
|
|
|
loss = GPTLMLoss()
|
|
|
|
loss_input = {
|
|
|
|
"logits": torch.randn(batch_size, seq_len, num_labels),
|
2023-09-19 06:20:26 +00:00
|
|
|
"labels": torch.randint(0, num_labels, (batch_size, seq_len)),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
2023-09-19 06:20:26 +00:00
|
|
|
loss(**loss_input)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
loss = PolicyLoss()
|
|
|
|
loss_input = {
|
2023-09-19 06:20:26 +00:00
|
|
|
"log_probs": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
|
|
|
"old_log_probs": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
|
|
|
"advantages": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
2023-09-19 06:20:26 +00:00
|
|
|
loss(**loss_input)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
loss = ValueLoss()
|
|
|
|
loss_input = {
|
2023-09-19 06:20:26 +00:00
|
|
|
"values": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
|
|
|
"old_values": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
|
|
|
"reward": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
2023-09-19 06:20:26 +00:00
|
|
|
loss(**loss_input)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
loss = LogSigLoss()
|
|
|
|
loss_input = {
|
2023-09-19 06:20:26 +00:00
|
|
|
"chosen_reward": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
|
|
|
"reject_reward": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
2023-09-19 06:20:26 +00:00
|
|
|
loss(**loss_input)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
loss = LogExpLoss()
|
|
|
|
loss_input = {
|
2023-09-19 06:20:26 +00:00
|
|
|
"chosen_reward": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
|
|
|
"reject_reward": torch.randn(
|
|
|
|
batch_size,
|
|
|
|
),
|
2023-08-02 02:17:36 +00:00
|
|
|
}
|
2023-09-19 06:20:26 +00:00
|
|
|
loss(**loss_input)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-08-16 10:56:52 +00:00
|
|
|
generate_kwargs = dict(max_length=40, use_cache=True, do_sample=True, temperature=1.0, top_k=50)
|
|
|
|
test_generation(lambda: LlamaActor(), batch_size=4, seq_len=32, generate_kwargs=generate_kwargs)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
|
|
|
test_utils()
|
|
|
|
|
|
|
|
test_lora(lora_rank=2, num_dim=8, num_layers=2)
|
|
|
|
|
2023-08-16 10:56:52 +00:00
|
|
|
test_models(models_maker=lambda: (BLOOMActor(), BLOOMCritic(), BLOOMRM()), batch_size=8, seq_len=128)
|
2023-08-02 02:17:36 +00:00
|
|
|
|
2023-09-19 06:20:26 +00:00
|
|
|
test_loss(batch_size=8, seq_len=128, num_labels=100)
|