ColossalAI/applications/Chat/coati/trainer/sft.py

import math
import time
from typing import List, Optional

import torch
import torch.distributed as dist
import wandb
from torch.optim import Optimizer
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer import get_scheduler

from .base import Trainer
from .callbacks import Callback
from .strategies import ColossalAIStrategy, Strategy
from .utils import is_rank_0, to_device


class SFTTrainer(Trainer):
    """
        Trainer to use while training reward model.

    Args:
        model (torch.nn.Module): the model to train
        strategy (Strategy): the strategy to use for training
        optim(Optimizer): the optimizer to use for training
        train_dataloader: the dataloader to use for training
        eval_dataloader: the dataloader to use for evaluation
        batch_size (int, defaults to 1): the batch size while training
        max_epochs (int, defaults to 2): the number of epochs to train
        callbacks (List[Callback], defaults to []): the callbacks to call during training process
        optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer
    """

    def __init__(
        self,
        model,
        strategy: Strategy,
        optim: Optimizer,
        train_dataloader: DataLoader,
        eval_dataloader: DataLoader = None,
        max_epochs: int = 2,
        accumulation_steps: int = 8,
        callbacks: List[Callback] = [],
    ) -> None:
        if accumulation_steps > 1 and isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3:
            raise ValueError("Accumulation steps are not supported in stage 3 of ColossalAI")
        super().__init__(strategy, max_epochs, callbacks=callbacks)
        self.train_dataloader = train_dataloader
        self.eval_dataloader = eval_dataloader
        self.model = model
        self.optimizer = optim

        self.accumulation_steps = accumulation_steps
        num_update_steps_per_epoch = len(train_dataloader) // self.accumulation_steps
        max_steps = math.ceil(self.max_epochs * num_update_steps_per_epoch)

        self.scheduler = get_scheduler("cosine",
                                       self.optimizer,
                                       num_warmup_steps=math.ceil(max_steps * 0.03),
                                       num_training_steps=max_steps)

    def fit(self, logger, use_wandb: bool = False):
        if use_wandb:
            wandb.init(project="Coati", name=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
            wandb.watch(self.model)
        total_loss = 0
        # epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0())
        step_bar = tqdm(range(len(self.train_dataloader) // self.accumulation_steps * self.max_epochs),
                        desc=f'steps',
                        disable=not is_rank_0())
        for epoch in range(self.max_epochs):

            # process_bar = tqdm(range(len(self.train_dataloader)), desc=f'Train process for{epoch}', disable=not is_rank_0())
            # train
            self.model.train()
            for batch_id, batch in enumerate(self.train_dataloader):

                batch = to_device(batch, torch.cuda.current_device())
                outputs = self.model(batch["input_ids"], attention_mask=batch["attention_mask"], labels=batch["labels"])

                loss = outputs.loss

                if loss >= 2.5 and is_rank_0():
                    logger.warning(f"batch_id:{batch_id}, abnormal loss: {loss}")

                loss = loss / self.accumulation_steps

                self.strategy.backward(loss, self.model, self.optimizer)

                total_loss += loss.item()

                # gradient accumulation
                if (batch_id + 1) % self.accumulation_steps == 0:
                    self.strategy.optimizer_step(self.optimizer)
                    self.optimizer.zero_grad()
                    self.scheduler.step()
                    if is_rank_0() and use_wandb:
                        wandb.log({
                            "loss": total_loss / self.accumulation_steps,
                            "lr": self.scheduler.get_last_lr()[0],
                            "epoch": epoch,
                            "batch_id": batch_id
                        })
                    total_loss = 0
                    step_bar.update()

                # if batch_id % log_interval == 0:
                # logger.info(f'Train Epoch {epoch}/{self.epochs} Batch {batch_id} Rank {dist.get_rank()} loss {loss.item()}')
                # wandb.log({"loss": loss.item()})

                # process_bar.update()

            # eval
            if self.eval_dataloader is not None:
                self.model.eval()
                with torch.no_grad():
                    loss_sum = 0
                    num_seen = 0
                    for batch in self.eval_dataloader:
                        batch = to_device(batch, torch.cuda.current_device())
                        outputs = self.model(batch["input_ids"],
                                             attention_mask=batch["attention_mask"],
                                             labels=batch["labels"])
                        loss = outputs.loss

                        loss_sum += loss.item()
                        num_seen += batch["input_ids"].size(0)

                    loss_mean = loss_sum / num_seen
                    if dist.get_rank() == 0:
                        logger.info(f'Eval Epoch {epoch}/{self.max_epochs} loss {loss_mean}')

            # epoch_bar.update()
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`import math`
			`import time`
[chat] refactor trainer (#3648) * [chat] ppo trainer remove useless args * [chat] update examples * [chat] update benchmark * [chat] update examples * [chat] fix sft training with wandb * [chat] polish docstr 2023-04-26 10:11:49 +00:00			`from typing import List, Optional`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`import torch`
			`import torch.distributed as dist`
			`import wandb`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`from torch.optim import Optimizer`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`from torch.utils.data import DataLoader`
			`from tqdm import tqdm`
			`from transformers.tokenization_utils_base import PreTrainedTokenizerBase`
			`from transformers.trainer import get_scheduler`

reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`from .base import Trainer`
[chat] refactor trainer (#3648) * [chat] ppo trainer remove useless args * [chat] update examples * [chat] update benchmark * [chat] update examples * [chat] fix sft training with wandb * [chat] polish docstr 2023-04-26 10:11:49 +00:00			`from .callbacks import Callback`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`from .strategies import ColossalAIStrategy, Strategy`
			`from .utils import is_rank_0, to_device`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00

reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`class SFTTrainer(Trainer):`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`"""`
			`Trainer to use while training reward model.`

			`Args:`
			`model (torch.nn.Module): the model to train`
			`strategy (Strategy): the strategy to use for training`
			`optim(Optimizer): the optimizer to use for training`
			`train_dataloader: the dataloader to use for training`
			`eval_dataloader: the dataloader to use for evaluation`
			`batch_size (int, defaults to 1): the batch size while training`
			`max_epochs (int, defaults to 2): the number of epochs to train`
reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`callbacks (List[Callback], defaults to []): the callbacks to call during training process`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer`
			`"""`

			`def __init__(`
			`self,`
			`model,`
			`strategy: Strategy,`
			`optim: Optimizer,`
			`train_dataloader: DataLoader,`
			`eval_dataloader: DataLoader = None,`
			`max_epochs: int = 2,`
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`accumulation_steps: int = 8,`
reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`callbacks: List[Callback] = [],`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`) -> None:`
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`if accumulation_steps > 1 and isinstance(strategy, ColossalAIStrategy) and strategy.stage == 3:`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`raise ValueError("Accumulation steps are not supported in stage 3 of ColossalAI")`
reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`super().__init__(strategy, max_epochs, callbacks=callbacks)`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`self.train_dataloader = train_dataloader`
			`self.eval_dataloader = eval_dataloader`
[chat] refactor model save/load logic (#3654) * [chat] strategy refactor unwrap model * [chat] strategy refactor save model * [chat] add docstr * [chat] refactor trainer save model * [chat] fix strategy typing * [chat] refactor trainer save model * [chat] update readme * [chat] fix unit test 2023-04-27 10:41:49 +00:00			`self.model = model`
			`self.optimizer = optim`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`self.accumulation_steps = accumulation_steps`
			`num_update_steps_per_epoch = len(train_dataloader) // self.accumulation_steps`
reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`max_steps = math.ceil(self.max_epochs * num_update_steps_per_epoch)`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`self.scheduler = get_scheduler("cosine",`
			`self.optimizer,`
			`num_warmup_steps=math.ceil(max_steps * 0.03),`
			`num_training_steps=max_steps)`

[chat] refactor trainer (#3648) * [chat] ppo trainer remove useless args * [chat] update examples * [chat] update benchmark * [chat] update examples * [chat] fix sft training with wandb * [chat] polish docstr 2023-04-26 10:11:49 +00:00			`def fit(self, logger, use_wandb: bool = False):`
			`if use_wandb:`
			`wandb.init(project="Coati", name=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))`
			`wandb.watch(self.model)`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`total_loss = 0`
			`# epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0())`
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`step_bar = tqdm(range(len(self.train_dataloader) // self.accumulation_steps * self.max_epochs),`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`desc=f'steps',`
			`disable=not is_rank_0())`
reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`for epoch in range(self.max_epochs):`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`# process_bar = tqdm(range(len(self.train_dataloader)), desc=f'Train process for{epoch}', disable=not is_rank_0())`
			`# train`
			`self.model.train()`
			`for batch_id, batch in enumerate(self.train_dataloader):`

[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`batch = to_device(batch, torch.cuda.current_device())`
			`outputs = self.model(batch["input_ids"], attention_mask=batch["attention_mask"], labels=batch["labels"])`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`loss = outputs.loss`

fix: fix sft (#3568) 2023-04-17 08:47:44 +00:00			`if loss >= 2.5 and is_rank_0():`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`logger.warning(f"batch_id:{batch_id}, abnormal loss: {loss}")`

[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`loss = loss / self.accumulation_steps`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`self.strategy.backward(loss, self.model, self.optimizer)`

			`total_loss += loss.item()`

			`# gradient accumulation`
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`if (batch_id + 1) % self.accumulation_steps == 0:`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`self.strategy.optimizer_step(self.optimizer)`
			`self.optimizer.zero_grad()`
			`self.scheduler.step()`
[chat] refactor trainer (#3648) * [chat] ppo trainer remove useless args * [chat] update examples * [chat] update benchmark * [chat] update examples * [chat] fix sft training with wandb * [chat] polish docstr 2023-04-26 10:11:49 +00:00			`if is_rank_0() and use_wandb:`
fix: fix sft (#3568) 2023-04-17 08:47:44 +00:00			`wandb.log({`
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`"loss": total_loss / self.accumulation_steps,`
fix: fix sft (#3568) 2023-04-17 08:47:44 +00:00			`"lr": self.scheduler.get_last_lr()[0],`
			`"epoch": epoch,`
			`"batch_id": batch_id`
			`})`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`total_loss = 0`
			`step_bar.update()`

			`# if batch_id % log_interval == 0:`
			`# logger.info(f'Train Epoch {epoch}/{self.epochs} Batch {batch_id} Rank {dist.get_rank()} loss {loss.item()}')`
			`# wandb.log({"loss": loss.item()})`

			`# process_bar.update()`

			`# eval`
			`if self.eval_dataloader is not None:`
			`self.model.eval()`
			`with torch.no_grad():`
			`loss_sum = 0`
			`num_seen = 0`
			`for batch in self.eval_dataloader:`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`batch = to_device(batch, torch.cuda.current_device())`
			`outputs = self.model(batch["input_ids"],`
			`attention_mask=batch["attention_mask"],`
			`labels=batch["labels"])`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`loss = outputs.loss`

			`loss_sum += loss.item()`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`num_seen += batch["input_ids"].size(0)`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`loss_mean = loss_sum / num_seen`
			`if dist.get_rank() == 0:`
reconstruct chat trainer and fix training script (#3588) Co-authored-by: Yuanchen Xu <yuanchen.xu00@gmail.com> 2023-04-18 08:44:03 +00:00			`logger.info(f'Eval Epoch {epoch}/{self.max_epochs} loss {loss_mean}')`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`# epoch_bar.update()`