ColossalAI/applications/ColossalChat/tests/test_lora.py

import torch
import torch.nn as nn
import torch.optim as optim
from coati.models import convert_to_lora_module
from coati.models.lora import LoraConfig, LoraEmbedding, LoraLinear
from torch.utils.data import DataLoader, TensorDataset


class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


def test_overfit():
    input_size = 1000
    hidden_size = 200
    num_classes = 5
    batch_size = 64
    learning_rate = 0.01
    num_epochs = 200

    # Synthesized dataset
    X = torch.randn(batch_size, input_size)
    Y = torch.randint(0, num_classes, (batch_size,))

    # Convert to DataLoader
    dataset = TensorDataset(X, Y)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Build and convert model
    model = SimpleNN(input_size, hidden_size, num_classes)
    weight_to_compare = model.fc1.weight.detach().clone()
    model = convert_to_lora_module(model, lora_config=LoraConfig(r=32))

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model
    for _ in range(num_epochs):
        for i, (inputs, labels) in enumerate(loader):
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Check if model has overfitted
    outputs = model(X)
    _, predicted = torch.max(outputs.data, 1)
    total = labels.size(0)
    correct = (predicted == Y).sum().item()
    assert correct / total > 0.95
    assert (weight_to_compare - model.fc1.weight).sum() < 0.01


def test_lora_linear_accuracy():

    weight = torch.randn(10, 5)
    linear = nn.Linear(5, 10)
    linear.weight.data = weight
    x = torch.randn(10, 5)
    out_linear = linear(x)

    # lora linear Pissa
    linear.weight.data = weight
    lora_linear = LoraLinear(linear.weight, linear.bias, r=2, lora_initialization_method="PiSSA")
    out_lora = lora_linear(x)
    assert torch.allclose(out_linear, out_lora, atol=1e-5, rtol=1e-05)

    # lora linear
    linear.weight.data = weight
    lora_linear = LoraLinear(linear.weight, linear.bias, r=2)
    out_lora = lora_linear(x)
    assert torch.allclose(out_linear, out_lora, atol=1e-5, rtol=1e-05)


def test_lora_embedding_accuracy():
    weight = torch.randn(10, 5)
    embedding = nn.Embedding(10, 5)
    embedding.weight.data = weight
    x = torch.randint(0, 10, (10,))
    out_embedding = embedding(x)

    # lora embedding Pissa
    embedding.weight.data = weight
    lora_embedding = LoraEmbedding(
        embedding.weight, r=2, lora_initialization_method="PiSSA", num_embeddings=10, embedding_dim=5
    )
    out_lora = lora_embedding(x)
    assert torch.allclose(out_embedding, out_lora, atol=1e-5, rtol=1e-05)

    # lora embedding
    embedding.weight.data = weight
    lora_embedding = LoraEmbedding(embedding.weight, r=2, num_embeddings=10, embedding_dim=5)
    out_lora = lora_embedding(x)
    assert torch.allclose(out_embedding, out_lora, atol=1e-5, rtol=1e-05)


if __name__ == "__main__":
    test_overfit()
    test_lora_linear_accuracy()
    test_lora_embedding_accuracy()
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 8 months ago			`import torch`
			`import torch.nn as nn`
			`import torch.optim as optim`
			`from coati.models import convert_to_lora_module`
[Chat] Fix lora (#5946) * fix merging * remove filepath * fix style 4 months ago			`from coati.models.lora import LoraConfig, LoraEmbedding, LoraLinear`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 8 months ago			`from torch.utils.data import DataLoader, TensorDataset`


			`class SimpleNN(nn.Module):`
			`def __init__(self, input_size, hidden_size, num_classes):`
			`super(SimpleNN, self).__init__()`
			`self.fc1 = nn.Linear(input_size, hidden_size)`
			`self.relu = nn.ReLU()`
			`self.fc2 = nn.Linear(hidden_size, num_classes)`

			`def forward(self, x):`
			`out = self.fc1(x)`
			`out = self.relu(out)`
			`out = self.fc2(out)`
			`return out`


			`def test_overfit():`
			`input_size = 1000`
			`hidden_size = 200`
			`num_classes = 5`
			`batch_size = 64`
			`learning_rate = 0.01`
			`num_epochs = 200`

			`# Synthesized dataset`
			`X = torch.randn(batch_size, input_size)`
			`Y = torch.randint(0, num_classes, (batch_size,))`

			`# Convert to DataLoader`
			`dataset = TensorDataset(X, Y)`
			`loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)`

			`# Build and convert model`
			`model = SimpleNN(input_size, hidden_size, num_classes)`
			`weight_to_compare = model.fc1.weight.detach().clone()`
[Chat] Fix lora (#5946) * fix merging * remove filepath * fix style 4 months ago			`model = convert_to_lora_module(model, lora_config=LoraConfig(r=32))`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 8 months ago
			`# Loss and optimizer`
			`criterion = nn.CrossEntropyLoss()`
			`optimizer = optim.Adam(model.parameters(), lr=learning_rate)`

			`# Train the model`
			`for _ in range(num_epochs):`
			`for i, (inputs, labels) in enumerate(loader):`
			`# Forward pass`
			`outputs = model(inputs)`
			`loss = criterion(outputs, labels)`
			`# Backward and optimize`
			`optimizer.zero_grad()`
			`loss.backward()`
			`optimizer.step()`

			`# Check if model has overfitted`
			`outputs = model(X)`
			`_, predicted = torch.max(outputs.data, 1)`
			`total = labels.size(0)`
			`correct = (predicted == Y).sum().item()`
[ColossalChat] Add PP support (#6001) * support pp training * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update rm * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update test case * fix * change to 4 * fix eval * test * add pp * hotfix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * support pp training * update rm * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update test case * fix * change to 4 * fix eval * test * add pp * hotfix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update * skip pp eval * update all reduce * update sft * update ignore * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update no cache * add eval * remove fi * remove debug * remove parentheses to avoid warning * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Revert "add eval" This reverts commit 3ab2f6fa329b6d12959fb3c668d278b4b225c5f0. * add all reduce --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 3 months ago			`assert correct / total > 0.95`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 8 months ago			`assert (weight_to_compare - model.fc1.weight).sum() < 0.01`


[Chat] Fix lora (#5946) * fix merging * remove filepath * fix style 4 months ago			`def test_lora_linear_accuracy():`

			`weight = torch.randn(10, 5)`
			`linear = nn.Linear(5, 10)`
			`linear.weight.data = weight`
			`x = torch.randn(10, 5)`
			`out_linear = linear(x)`

			`# lora linear Pissa`
			`linear.weight.data = weight`
			`lora_linear = LoraLinear(linear.weight, linear.bias, r=2, lora_initialization_method="PiSSA")`
			`out_lora = lora_linear(x)`
			`assert torch.allclose(out_linear, out_lora, atol=1e-5, rtol=1e-05)`

			`# lora linear`
			`linear.weight.data = weight`
			`lora_linear = LoraLinear(linear.weight, linear.bias, r=2)`
			`out_lora = lora_linear(x)`
			`assert torch.allclose(out_linear, out_lora, atol=1e-5, rtol=1e-05)`


			`def test_lora_embedding_accuracy():`
			`weight = torch.randn(10, 5)`
			`embedding = nn.Embedding(10, 5)`
			`embedding.weight.data = weight`
			`x = torch.randint(0, 10, (10,))`
			`out_embedding = embedding(x)`

			`# lora embedding Pissa`
			`embedding.weight.data = weight`
			`lora_embedding = LoraEmbedding(`
			`embedding.weight, r=2, lora_initialization_method="PiSSA", num_embeddings=10, embedding_dim=5`
			`)`
			`out_lora = lora_embedding(x)`
			`assert torch.allclose(out_embedding, out_lora, atol=1e-5, rtol=1e-05)`

			`# lora embedding`
			`embedding.weight.data = weight`
			`lora_embedding = LoraEmbedding(embedding.weight, r=2, num_embeddings=10, embedding_dim=5)`
			`out_lora = lora_embedding(x)`
			`assert torch.allclose(out_embedding, out_lora, atol=1e-5, rtol=1e-05)`


[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 8 months ago			`if __name__ == "__main__":`
			`test_overfit()`
[Chat] Fix lora (#5946) * fix merging * remove filepath * fix style 4 months ago			`test_lora_linear_accuracy()`
			`test_lora_embedding_accuracy()`