ColossalAI/applications/Chat/coati/trainer/sft.py

import time
from typing import Optional

import torch
import torch.distributed as dist
import tqdm
import wandb
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader

from colossalai.logging import DistributedLogger

from .base import SLTrainer
from .strategies import GeminiStrategy, Strategy
from .utils import is_rank_0, to_device


class SFTTrainer(SLTrainer):
    """
        Trainer to use while training reward model.

    Args:
        model (torch.nn.Module): the model to train
        strategy (Strategy): the strategy to use for training
        optim(Optimizer): the optimizer to use for training
        lr_scheduler(_LRScheduler): the lr scheduler to use for training
        max_epochs (int, defaults to 2): the number of epochs to train
        accumulation_steps (int, defaults to 8): the number of steps to accumulate gradients
    """

    def __init__(
        self,
        model,
        strategy: Strategy,
        optim: Optimizer,
        lr_scheduler: _LRScheduler,
        max_epochs: int = 2,
        accumulation_steps: int = 8,
    ) -> None:
        if accumulation_steps > 1:
            assert not isinstance(strategy, GeminiStrategy), \
                "Accumulation steps are not supported in stage 3 of ColossalAI"

        super().__init__(strategy, max_epochs, model, optim)

        self.accumulation_steps = accumulation_steps
        self.scheduler = lr_scheduler

    def _train(self, epoch: int):
        self.model.train()
        for batch_id, batch in enumerate(self.train_dataloader):

            batch = to_device(batch, torch.cuda.current_device())
            if "attention_mask" in batch:
                outputs = self.model(batch["input_ids"],
                                    attention_mask=batch["attention_mask"],
                                    labels=batch["labels"])
            else:
                outputs = self.model(batch["input_ids"],
                                    labels=batch["labels"])

            loss = outputs.loss
            loss = loss / self.accumulation_steps

            self.strategy.backward(loss, self.model, self.optimizer)

            self.total_loss += loss.item()

            # gradient accumulation
            if (batch_id + 1) % self.accumulation_steps == 0:
                self.strategy.optimizer_step(self.optimizer)
                self.optimizer.zero_grad()
                self.scheduler.step()
                if is_rank_0() and self.use_wandb:
                    wandb.log({
                        "loss": self.total_loss / self.accumulation_steps,
                        "lr": self.scheduler.get_last_lr()[0],
                        "epoch": epoch,
                        "batch_id": batch_id
                    })
                self.total_loss = 0
                self.step_bar.update()

    def _eval(self, epoch: int):
        if self.eval_dataloader is not None:
            self.model.eval()
            with torch.no_grad():
                loss_sum, num_seen = 0, 0
                for batch in self.eval_dataloader:
                    batch = to_device(batch, torch.cuda.current_device())
                    outputs = self.model(batch["input_ids"],
                                         attention_mask=batch["attention_mask"],
                                         labels=batch["labels"])
                    loss = outputs.loss

                    loss_sum += loss.item()
                    num_seen += batch["input_ids"].size(0)

                loss_mean = loss_sum / num_seen
                if dist.get_rank() == 0:
                    self.logger.info(f'Eval Epoch {epoch}/{self.max_epochs} loss {loss_mean}')

    def _before_fit(self,
                    train_dataloader: DataLoader,
                    eval_dataloader: Optional[DataLoader] = None,
                    logger: Optional[DistributedLogger] = None,
                    use_wandb: bool = False):
        """
        Args:
            train_dataloader: the dataloader to use for training
            eval_dataloader: the dataloader to use for evaluation
        """
        self.train_dataloader = train_dataloader
        self.eval_dataloader = eval_dataloader

        self.logger = logger
        self.use_wandb = use_wandb
        if use_wandb:
            wandb.init(project="Coati", name=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
            wandb.watch(self.model)

        self.total_loss = 0
        self.no_epoch_bar = True
        self.step_bar = tqdm.trange(
            len(self.train_dataloader) // self.accumulation_steps * self.max_epochs,
            desc=f'steps',
            disable=not is_rank_0()
        )
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`import time`
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`from typing import Optional`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
			`import torch`
			`import torch.distributed as dist`
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`import tqdm`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`import wandb`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`from torch.optim import Optimizer`
[chat] refactor strategy class with booster api (#3987) * refactor: adapt boost API in base and naive strategies * fix: initialize plugin after setup_distributed * fix: fix save_pretrained fn * refactor: adapt boost API in DDPStrategy * to: add _post_init check * to: fix ddp backward, modify ddp dataloader and unwrap * feat: adapt boost API in ColossalAIStrategy * fix: call setup_distributed before use get_current_device * fix: fix save_model and save_optimizer * test: remove save_sharded_optimizer test * style: apply formatter * fix: fix stage check and add comments * feat: allow dict type arg in strategy.prepare * to: temporarily remove lr_scheduler for testing * style: simplify init of ColossalAIStrategy * fix: fix lr_scheduler in sft and rm * style: modify comments * test: add train_prompts tests * fix: fix inference only case and use in train_prompts * test: skip failed tests in ci * style: fix CodeFactor check * fix: do not use model.to('cpu') with GeminiPlugin * test: enable colossalai_gemini tests * test: set CUDA_VISIBLE_DEVICES in ci * docs: add note 2023-06-25 09:36:21 +00:00			`from torch.optim.lr_scheduler import _LRScheduler`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`from torch.utils.data import DataLoader`

[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`from colossalai.logging import DistributedLogger`

			`from .base import SLTrainer`
[chat] remove naive strategy and split colossalai strategy (#4094) * feat: remove on_learn_epoch fn as not used * revert: add _on_learn_epoch fn * to: remove the use of NaiveStrategy * test: remove NaiveStrategy tests * feat: remove NaiveStrategy * style: modify comments and params * feat: split ColossalAIStrategy into LowLevelZeroStrategy and GeminiStrategy * fix: remove naive * fix: align with modified colossal strategy * fix: fix ddp _try_init_dist arg 2023-06-29 10:11:00 +00:00			`from .strategies import GeminiStrategy, Strategy`
[chat] remove lm model class (#3653) * [chat] refactor lora * [chat] remove lm class * [chat] refactor save model * [chat] refactor train sft * [chat] fix ci * [chat] fix ci 2023-04-27 07:37:38 +00:00			`from .utils import is_rank_0, to_device`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00

[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`class SFTTrainer(SLTrainer):`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`"""`
			`Trainer to use while training reward model.`

			`Args:`
			`model (torch.nn.Module): the model to train`
			`strategy (Strategy): the strategy to use for training`
			`optim(Optimizer): the optimizer to use for training`
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`lr_scheduler(_LRScheduler): the lr scheduler to use for training`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`max_epochs (int, defaults to 2): the number of epochs to train`
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`accumulation_steps (int, defaults to 8): the number of steps to accumulate gradients`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`"""`

			`def __init__(`
			`self,`
			`model,`
			`strategy: Strategy,`
			`optim: Optimizer,`
[chat] refactor strategy class with booster api (#3987) * refactor: adapt boost API in base and naive strategies * fix: initialize plugin after setup_distributed * fix: fix save_pretrained fn * refactor: adapt boost API in DDPStrategy * to: add _post_init check * to: fix ddp backward, modify ddp dataloader and unwrap * feat: adapt boost API in ColossalAIStrategy * fix: call setup_distributed before use get_current_device * fix: fix save_model and save_optimizer * test: remove save_sharded_optimizer test * style: apply formatter * fix: fix stage check and add comments * feat: allow dict type arg in strategy.prepare * to: temporarily remove lr_scheduler for testing * style: simplify init of ColossalAIStrategy * fix: fix lr_scheduler in sft and rm * style: modify comments * test: add train_prompts tests * fix: fix inference only case and use in train_prompts * test: skip failed tests in ci * style: fix CodeFactor check * fix: do not use model.to('cpu') with GeminiPlugin * test: enable colossalai_gemini tests * test: set CUDA_VISIBLE_DEVICES in ci * docs: add note 2023-06-25 09:36:21 +00:00			`lr_scheduler: _LRScheduler,`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`max_epochs: int = 2,`
[chat] typo accimulation_steps -> accumulation_steps (#3662) 2023-04-28 07:42:57 +00:00			`accumulation_steps: int = 8,`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00			`) -> None:`
[chat] remove naive strategy and split colossalai strategy (#4094) * feat: remove on_learn_epoch fn as not used * revert: add _on_learn_epoch fn * to: remove the use of NaiveStrategy * test: remove NaiveStrategy tests * feat: remove NaiveStrategy * style: modify comments and params * feat: split ColossalAIStrategy into LowLevelZeroStrategy and GeminiStrategy * fix: remove naive * fix: align with modified colossal strategy * fix: fix ddp _try_init_dist arg 2023-06-29 10:11:00 +00:00			`if accumulation_steps > 1:`
			`assert not isinstance(strategy, GeminiStrategy), \`
[chat] refactor strategy class with booster api (#3987) * refactor: adapt boost API in base and naive strategies * fix: initialize plugin after setup_distributed * fix: fix save_pretrained fn * refactor: adapt boost API in DDPStrategy * to: add _post_init check * to: fix ddp backward, modify ddp dataloader and unwrap * feat: adapt boost API in ColossalAIStrategy * fix: call setup_distributed before use get_current_device * fix: fix save_model and save_optimizer * test: remove save_sharded_optimizer test * style: apply formatter * fix: fix stage check and add comments * feat: allow dict type arg in strategy.prepare * to: temporarily remove lr_scheduler for testing * style: simplify init of ColossalAIStrategy * fix: fix lr_scheduler in sft and rm * style: modify comments * test: add train_prompts tests * fix: fix inference only case and use in train_prompts * test: skip failed tests in ci * style: fix CodeFactor check * fix: do not use model.to('cpu') with GeminiPlugin * test: enable colossalai_gemini tests * test: set CUDA_VISIBLE_DEVICES in ci * docs: add note 2023-06-25 09:36:21 +00:00			`"Accumulation steps are not supported in stage 3 of ColossalAI"`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`super().__init__(strategy, max_epochs, model, optim)`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`self.accumulation_steps = accumulation_steps`
[chat] refactor strategy class with booster api (#3987) * refactor: adapt boost API in base and naive strategies * fix: initialize plugin after setup_distributed * fix: fix save_pretrained fn * refactor: adapt boost API in DDPStrategy * to: add _post_init check * to: fix ddp backward, modify ddp dataloader and unwrap * feat: adapt boost API in ColossalAIStrategy * fix: call setup_distributed before use get_current_device * fix: fix save_model and save_optimizer * test: remove save_sharded_optimizer test * style: apply formatter * fix: fix stage check and add comments * feat: allow dict type arg in strategy.prepare * to: temporarily remove lr_scheduler for testing * style: simplify init of ColossalAIStrategy * fix: fix lr_scheduler in sft and rm * style: modify comments * test: add train_prompts tests * fix: fix inference only case and use in train_prompts * test: skip failed tests in ci * style: fix CodeFactor check * fix: do not use model.to('cpu') with GeminiPlugin * test: enable colossalai_gemini tests * test: set CUDA_VISIBLE_DEVICES in ci * docs: add note 2023-06-25 09:36:21 +00:00			`self.scheduler = lr_scheduler`
[Coati] first commit (#3283) 2023-03-28 12:25:36 +00:00
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00			`def _train(self, epoch: int):`
			`self.model.train()`
			`for batch_id, batch in enumerate(self.train_dataloader):`

			`batch = to_device(batch, torch.cuda.current_device())`
[coati] add chatglm model (#4539) * update configuration of chatglm and add support in coati * add unit test & update chatglm default config & fix bos index issue * remove chatglm due to oom * add dataset pkg in requirement-text * fix parameter issue in test_models * add ref in tokenize & rm unnessary parts * separate source & target tokenization in chatglm * add unit test to chatglm * fix test dataset issue * update truncation of chatglm * fix Colossalai version * fix colossal ai version in test 2023-08-29 09:58:51 +00:00			`if "attention_mask" in batch:`
			`outputs = self.model(batch["input_ids"],`
			`attention_mask=batch["attention_mask"],`
			`labels=batch["labels"])`
			`else:`
			`outputs = self.model(batch["input_ids"],`
			`labels=batch["labels"])`
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00
			`loss = outputs.loss`
			`loss = loss / self.accumulation_steps`

			`self.strategy.backward(loss, self.model, self.optimizer)`

			`self.total_loss += loss.item()`

			`# gradient accumulation`
			`if (batch_id + 1) % self.accumulation_steps == 0:`
			`self.strategy.optimizer_step(self.optimizer)`
			`self.optimizer.zero_grad()`
			`self.scheduler.step()`
			`if is_rank_0() and self.use_wandb:`
			`wandb.log({`
			`"loss": self.total_loss / self.accumulation_steps,`
			`"lr": self.scheduler.get_last_lr()[0],`
			`"epoch": epoch,`
			`"batch_id": batch_id`
			`})`
			`self.total_loss = 0`
			`self.step_bar.update()`

			`def _eval(self, epoch: int):`
			`if self.eval_dataloader is not None:`
			`self.model.eval()`
			`with torch.no_grad():`
			`loss_sum, num_seen = 0, 0`
			`for batch in self.eval_dataloader:`
			`batch = to_device(batch, torch.cuda.current_device())`
			`outputs = self.model(batch["input_ids"],`
			`attention_mask=batch["attention_mask"],`
			`labels=batch["labels"])`
			`loss = outputs.loss`

			`loss_sum += loss.item()`
			`num_seen += batch["input_ids"].size(0)`

			`loss_mean = loss_sum / num_seen`
			`if dist.get_rank() == 0:`
			`self.logger.info(f'Eval Epoch {epoch}/{self.max_epochs} loss {loss_mean}')`

			`def _before_fit(self,`
			`train_dataloader: DataLoader,`
			`eval_dataloader: Optional[DataLoader] = None,`
			`logger: Optional[DistributedLogger] = None,`
			`use_wandb: bool = False):`
			`"""`
			`Args:`
			`train_dataloader: the dataloader to use for training`
			`eval_dataloader: the dataloader to use for evaluation`
			`"""`
			`self.train_dataloader = train_dataloader`
			`self.eval_dataloader = eval_dataloader`

			`self.logger = logger`
			`self.use_wandb = use_wandb`
[chat] refactor trainer (#3648) * [chat] ppo trainer remove useless args * [chat] update examples * [chat] update benchmark * [chat] update examples * [chat] fix sft training with wandb * [chat] polish docstr 2023-04-26 10:11:49 +00:00			`if use_wandb:`
			`wandb.init(project="Coati", name=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))`
			`wandb.watch(self.model)`
[chat] refactor trainer class (#4080) * to: add SLTrainer * refactor: refactor RMTrainer and SFTTrainer * fix: fix init file * feat: remove on_learn_epoch fn as not used * fix: align with modified gemini arguments * to: add OnPolicyTrainer * revert: add _on_learn_epoch fn * refactor: refactor PPOTrainer * style: rename PPOTrainer argument * fix: align with modified PPO arguments * test: align with modified train_prompts arguments * chore: modify train_prompts * docs: align with modified arguments * fix: remove unnecessary output * fix: move dataloader to fit fn of SLTrainer * fix: move dataloader to fit fn of OnPolicyTrainer * fix: modify usage of prompt and pretrain dataloader 2023-06-29 02:48:09 +00:00
			`self.total_loss = 0`
			`self.no_epoch_bar = True`
			`self.step_bar = tqdm.trange(`
			`len(self.train_dataloader) // self.accumulation_steps * self.max_epochs,`
			`desc=f'steps',`
			`disable=not is_rank_0()`
			`)`