From 06741d894dcbe958acd4e10d771f22275e20e368 Mon Sep 17 00:00:00 2001 From: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Date: Wed, 22 Mar 2023 17:18:13 +0800 Subject: [PATCH] Add RoBERTa for RLHF Stage 2 & 3 (test) RoBERTa for RLHF Stage 2 & 3 (still in testing) --- .../chatgpt/models/roberta/__init__.py | 5 +++ .../chatgpt/models/roberta/roberta_actor.py | 35 +++++++++++++++++ .../chatgpt/models/roberta/roberta_critic.py | 38 ++++++++++++++++++ .../chatgpt/models/roberta/roberta_rm.py | 39 +++++++++++++++++++ applications/ChatGPT/examples/train_dummy.py | 11 +++++- .../ChatGPT/examples/train_reward_model.py | 10 ++++- 6 files changed, 134 insertions(+), 4 deletions(-) create mode 100644 applications/ChatGPT/chatgpt/models/roberta/__init__.py create mode 100644 applications/ChatGPT/chatgpt/models/roberta/roberta_actor.py create mode 100644 applications/ChatGPT/chatgpt/models/roberta/roberta_critic.py create mode 100644 applications/ChatGPT/chatgpt/models/roberta/roberta_rm.py diff --git a/applications/ChatGPT/chatgpt/models/roberta/__init__.py b/applications/ChatGPT/chatgpt/models/roberta/__init__.py new file mode 100644 index 000000000..7b38d5bdb --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/roberta/__init__.py @@ -0,0 +1,5 @@ +from .roberta_actor import RoBERTaActor +from .roberta_critic import RoBERTaCritic +from .roberta_rm import RoBERTaRM + +__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM'] \ No newline at end of file diff --git a/applications/ChatGPT/chatgpt/models/roberta/roberta_actor.py b/applications/ChatGPT/chatgpt/models/roberta/roberta_actor.py new file mode 100644 index 000000000..e35fa6eb1 --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/roberta/roberta_actor.py @@ -0,0 +1,35 @@ +from typing import Optional + +from transformers.models.roberta.configuration_roberta import RobertaConfig +from transformers.models.roberta.modeling_roberta import RobertaForCausalLM + +from ..base import Actor + +class RoBERTaActor(Actor): + """ + RoBERTa Actor model. + + Args: + pretrained (str): Pretrained model name or path. + config (RoBERTaConfig): Model config. + checkpoint (bool): Enable gradient checkpointing. + lora_rank (int): Rank of the low-rank approximation. + lora_train_bias (str): LoRA bias training mode. + """ + + + def __init__(self, + pretrained: Optional[str] = None, + config: Optional[RobertaConfig] = None, + checkpoint: bool = False, + lora_rank: int = 0, + lora_train_bias: str = 'none') -> None: + if pretrained is not None: + model = RobertaForCausalLM.from_pretrained(pretrained) + elif config is not None: + model = RobertaForCausalLM(config) + else: + model = RobertaForCausalLM(RobertaConfig()) + if checkpoint: + model.gradient_checkpointing_enable() + super().__init__(model, lora_rank, lora_train_bias) diff --git a/applications/ChatGPT/chatgpt/models/roberta/roberta_critic.py b/applications/ChatGPT/chatgpt/models/roberta/roberta_critic.py new file mode 100644 index 000000000..3215473ab --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/roberta/roberta_critic.py @@ -0,0 +1,38 @@ +from typing import Optional + +import torch.nn as nn +from transformers.models.roberta.configuration_roberta import RobertaConfig +from transformers.models.roberta.modeling_roberta import RobertaModel + +from ..base import Critic + + +class RoBERTaCritic(Critic): + """ + RoBERTa Critic model. + + Args: + pretrained (str): Pretrained model name or path. + config (RoBERTa Config): Model config. + checkpoint (bool): Enable gradient checkpointing. + lora_rank (int): Rank of the low-rank approximation. + lora_train_bias (str): LoRA bias training mode. + """ + + def __init__(self, + pretrained: Optional[str] = None, + config: Optional[RobertaConfig] = None, + checkpoint: bool = False, + lora_rank: int = 0, + lora_train_bias: str = 'none', + **kwargs) -> None: + if pretrained is not None: + model = RobertaModel.from_pretrained(pretrained) + elif config is not None: + model = RobertaModel(config) + else: + model = RobertaModel(RobertaConfig()) + if checkpoint: + model.gradient_checkpointing_enable() + value_head = nn.Linear(model.config.hidden_size, 1) + super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/applications/ChatGPT/chatgpt/models/roberta/roberta_rm.py b/applications/ChatGPT/chatgpt/models/roberta/roberta_rm.py new file mode 100644 index 000000000..9930b8ef0 --- /dev/null +++ b/applications/ChatGPT/chatgpt/models/roberta/roberta_rm.py @@ -0,0 +1,39 @@ +from typing import Optional + +import torch.nn as nn +from transformers import RobertaConfig, RobertaModel + + +from ..base import RewardModel + + +class RoBERTaRM(RewardModel): + """ + RoBERTa Reward model. + + Args: + pretrained (str): Pretrained model name or path. + config (RoBERTaConfig): Model config. + checkpoint (bool): Enable gradient checkpointing. + lora_rank (int): Rank of the low-rank approximation. + lora_train_bias (str): LoRA bias training mode. + """ + + def __init__(self, + pretrained: Optional[str] = None, + config: Optional[RobertaConfig] = None, + checkpoint: bool = False, + lora_rank: int = 0, + lora_train_bias: str = 'none') -> None: + if pretrained is not None: + model = RobertaModel.from_pretrained(pretrained) + elif config is not None: + model = RobertaModel(config) + else: + model = RobertaModel(RobertaConfig()) + if checkpoint: + model.gradient_checkpointing_enable() + + value_head = nn.Linear(model.config.hidden_size, 1) + value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1)) + super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/applications/ChatGPT/examples/train_dummy.py b/applications/ChatGPT/examples/train_dummy.py index c0ebf8f9b..30e2a36e1 100644 --- a/applications/ChatGPT/examples/train_dummy.py +++ b/applications/ChatGPT/examples/train_dummy.py @@ -6,11 +6,12 @@ from chatgpt.models.base import RewardModel from chatgpt.models.bloom import BLOOMActor, BLOOMCritic from chatgpt.models.gpt import GPTActor, GPTCritic from chatgpt.models.opt import OPTActor, OPTCritic +from chatgpt.models.roberta import RoBERTaActor, RoBERTaCritic from chatgpt.trainer import PPOTrainer from chatgpt.trainer.callbacks import SaveCheckpoint from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy from torch.optim import Adam -from transformers import AutoTokenizer, BloomTokenizerFast +from transformers import AutoTokenizer, BloomTokenizerFast, RobertaTokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer from colossalai.nn.optimizer import HybridAdam @@ -46,6 +47,9 @@ def main(args): elif args.model == 'opt': actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) critic = OPTCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) + elif args.model == 'roberta': + actor = RoBERTaActor(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) + critic = RoBERTaCritic(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) else: raise ValueError(f'Unsupported model "{args.model}"') @@ -69,6 +73,9 @@ def main(args): tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") + elif args.model == 'roberta': + tokenizer = RobertaTokenizer.from_pretrained("roberta-base") + tokenizer.pad_token = tokenizer.eos_token else: raise ValueError(f'Unsupported model "{args.model}"') @@ -128,7 +135,7 @@ if __name__ == '__main__': parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive') - parser.add_argument('--model', type=str, default='gpt2', choices=['gpt2', 'bloom', 'opt']) + parser.add_argument('--model', type=str, default='gpt2', choices=['gpt2', 'bloom', 'opt', 'roberta']) parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--save_path', type=str, default='actor_checkpoint_dummy.pt') parser.add_argument('--need_optim_ckpt', type=bool, default=False) diff --git a/applications/ChatGPT/examples/train_reward_model.py b/applications/ChatGPT/examples/train_reward_model.py index 47dd988b8..f46400b3f 100644 --- a/applications/ChatGPT/examples/train_reward_model.py +++ b/applications/ChatGPT/examples/train_reward_model.py @@ -8,12 +8,13 @@ from chatgpt.models.base import RewardModel from chatgpt.models.bloom import BLOOMRM from chatgpt.models.gpt import GPTRM from chatgpt.models.opt import OPTRM +from chatgpt.models.roberta import RoBERTaRM from chatgpt.trainer import RewardModelTrainer from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy from datasets import load_dataset from random import randint from torch.optim import Adam -from transformers import AutoTokenizer, BloomTokenizerFast +from transformers import AutoTokenizer, BloomTokenizerFast, RobertaTokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer from colossalai.nn.optimizer import HybridAdam @@ -39,6 +40,8 @@ def train(args): model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) elif args.model == 'gpt2': model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) + elif args.model == 'roberta': + model = RoBERTaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) else: raise ValueError(f'Unsupported model "{args.model}"') @@ -54,6 +57,9 @@ def train(args): tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m') elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") + elif args.model == 'roberta': + tokenizer = RobertaTokenizer.from_pretrained("roberta-base") + tokenizer.pad_token = tokenizer.eos_token else: raise ValueError(f'Unsupported model "{args.model}"') max_len = args.max_len @@ -119,7 +125,7 @@ if __name__ == '__main__': parser.add_argument('--strategy', choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive') - parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt'], default='bloom') + parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'roberta'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--need_optim_ckpt', type=bool, default=False)