[chat] use official transformers and fix some issues (#4117)

* feat: remove on_learn_epoch fn as not used

* revert: add _on_learn_epoch fn

* feat: remove NaiveStrategy

* test: update train_prompts tests

* fix: remove prepare_llama_tokenizer_and_embedding

* test: add lora arg

* feat: remove roberta support in train_prompts due to runtime errs

* feat: remove deberta & roberta in rm as not used

* test: remove deberta and roberta tests

* feat: remove deberta and roberta models as not used

* fix: remove calls to roberta

* fix: remove prepare_llama_tokenizer_and_embedding

* chore: update transformers version

* docs: update transformers version

* fix: fix actor inference

* fix: fix ci

* feat: change llama pad token to unk

* revert: revert ddp setup_distributed

* fix: change llama pad token to unk

* revert: undo unnecessary changes

* fix: use pip to install transformers
pull/4157/head
Wenhao Chen 2023-07-04 13:49:09 +08:00 committed by GitHub
parent 1350ece492
commit 3d8d5d0d58
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 74 additions and 419 deletions

View File

@ -4,11 +4,10 @@ on:
pull_request: pull_request:
types: [synchronize, opened, reopened] types: [synchronize, opened, reopened]
paths: paths:
- 'applications/Chat/coati/**' - "applications/Chat/coati/**"
- 'applications/Chat/requirements.txt' - "applications/Chat/requirements.txt"
- 'applications/Chat/setup.py' - "applications/Chat/setup.py"
- 'applications/Chat/examples/**' - "applications/Chat/examples/**"
jobs: jobs:
tests: tests:
@ -38,10 +37,7 @@ jobs:
- name: Install Transformers - name: Install Transformers
run: | run: |
cd applications/Chat pip install transformers==4.30.2
git clone https://github.com/hpcaitech/transformers
cd transformers
pip install -v .
- name: Execute Examples - name: Execute Examples
run: | run: |

View File

@ -98,12 +98,9 @@ pip install .
``` ```
### Install the Transformers ### Install the Transformers
Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code
```shell ```shell
git clone https://github.com/hpcaitech/transformers pip install transformers==4.30.2
cd transformers
pip install .
``` ```
## How to use? ## How to use?

View File

@ -1,4 +0,0 @@
from .deberta_critic import DebertaCritic
from .deberta_rm import DebertaRM
__all__ = ['DebertaCritic', 'DebertaRM']

View File

@ -1,36 +0,0 @@
from typing import Optional
import torch.nn as nn
from transformers import DebertaV2Config, DebertaV2Model
from ..base import Critic
class DebertaCritic(Critic):
"""
Deberta Critic model.
Args:
pretrained (str): Pretrained model name or path.
config (DebertaV2Config): Model config.
checkpoint (bool): Enable gradient checkpointing.
lora_rank (int): Rank of the LO-RA decomposition.
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
pretrained: Optional[str] = None,
config: Optional[DebertaV2Config] = None,
checkpoint: bool = False,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
if pretrained is not None:
model = DebertaV2Model.from_pretrained(pretrained)
elif config is not None:
model = DebertaV2Model(config)
else:
model = DebertaV2Model(DebertaV2Config())
if checkpoint:
model.gradient_checkpointing_enable()
value_head = nn.Linear(model.config.hidden_size, 1)
super().__init__(model, value_head, lora_rank, lora_train_bias)

View File

@ -1,37 +0,0 @@
from typing import Optional
import torch.nn as nn
from transformers import DebertaV2Config, DebertaV2Model
from ..base import RewardModel
class DebertaRM(RewardModel):
"""
Deberta Reward model.
Args:
pretrained (str): Pretrained model name or path.
config (DebertaV2Config): Model config.
checkpoint (bool): Enable gradient checkpointing.
lora_rank (int): Rank of the LO-RA decomposition.
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
pretrained: str = None,
config: Optional[DebertaV2Config] = None,
checkpoint: bool = False,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
if pretrained is not None:
model = DebertaV2Model.from_pretrained(pretrained)
elif config is not None:
model = DebertaV2Model(config)
else:
model = DebertaV2Model(DebertaV2Config())
if checkpoint:
model.gradient_checkpointing_enable()
value_head = nn.Linear(model.config.hidden_size, 1)
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1))
super().__init__(model, value_head, lora_rank, lora_train_bias)

View File

@ -1,5 +0,0 @@
from .roberta_actor import RoBERTaActor
from .roberta_critic import RoBERTaCritic
from .roberta_rm import RoBERTaRM
__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM']

View File

@ -1,35 +0,0 @@
from typing import Optional
from transformers.models.roberta.configuration_roberta import RobertaConfig
from transformers.models.roberta.modeling_roberta import RobertaForCausalLM
from ..base import Actor
class RoBERTaActor(Actor):
"""
RoBERTa Actor model.
Args:
pretrained (str): Pretrained model name or path.
config (RoBERTaConfig): Model config.
checkpoint (bool): Enable gradient checkpointing.
lora_rank (int): Rank of the low-rank approximation.
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
pretrained: Optional[str] = None,
config: Optional[RobertaConfig] = None,
checkpoint: bool = False,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
if pretrained is not None:
model = RobertaForCausalLM.from_pretrained(pretrained)
elif config is not None:
model = RobertaForCausalLM(config)
else:
model = RobertaForCausalLM(RobertaConfig())
if checkpoint:
model.gradient_checkpointing_enable()
super().__init__(model, lora_rank, lora_train_bias)

View File

@ -1,38 +0,0 @@
from typing import Optional
import torch.nn as nn
from transformers.models.roberta.configuration_roberta import RobertaConfig
from transformers.models.roberta.modeling_roberta import RobertaModel
from ..base import Critic
class RoBERTaCritic(Critic):
"""
RoBERTa Critic model.
Args:
pretrained (str): Pretrained model name or path.
config (RoBERTa Config): Model config.
checkpoint (bool): Enable gradient checkpointing.
lora_rank (int): Rank of the low-rank approximation.
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
pretrained: Optional[str] = None,
config: Optional[RobertaConfig] = None,
checkpoint: bool = False,
lora_rank: int = 0,
lora_train_bias: str = 'none',
**kwargs) -> None:
if pretrained is not None:
model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False)
elif config is not None:
model = RobertaModel(config)
else:
model = RobertaModel(RobertaConfig())
if checkpoint:
model.gradient_checkpointing_enable()
value_head = nn.Linear(model.config.hidden_size, 1)
super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)

View File

@ -1,39 +0,0 @@
from typing import Optional
import torch.nn as nn
from transformers import RobertaConfig, RobertaModel
from ..base import RewardModel
class RoBERTaRM(RewardModel):
"""
RoBERTa Reward model.
Args:
pretrained (str): Pretrained model name or path.
config (RoBERTaConfig): Model config.
checkpoint (bool): Enable gradient checkpointing.
lora_rank (int): Rank of the low-rank approximation.
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
pretrained: Optional[str] = None,
config: Optional[RobertaConfig] = None,
checkpoint: bool = False,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
if pretrained is not None:
model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False)
elif config is not None:
model = RobertaModel(config)
else:
model = RobertaModel(RobertaConfig())
if checkpoint:
model.gradient_checkpointing_enable()
value_head = nn.Linear(model.config.hidden_size, 1)
value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1))
super().__init__(model, value_head, lora_rank, lora_train_bias)

View File

@ -9,10 +9,8 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
from coati.models.gpt import GPTRM, GPTActor, GPTCritic from coati.models.gpt import GPTRM, GPTActor, GPTCritic
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
from coati.models.opt import OPTRM, OPTActor, OPTCritic from coati.models.opt import OPTRM, OPTActor, OPTCritic
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer
def is_rank_0() -> bool: def is_rank_0() -> bool:
@ -36,8 +34,6 @@ def get_actor_from_args(model: str, pretrained: str = None, config=None, lora_ra
actor = OPTActor(pretrained=pretrained, config=config, lora_rank=lora_rank) actor = OPTActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
elif model == 'llama': elif model == 'llama':
actor = LlamaActor(pretrained=pretrained, config=config, lora_rank=lora_rank) actor = LlamaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
elif model == 'roberta':
actor = RoBERTaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
else: else:
raise ValueError(f'Unsupported actor model "{model}"') raise ValueError(f'Unsupported actor model "{model}"')
return actor return actor
@ -52,8 +48,6 @@ def get_critic_from_args(model: str, pretrained: str = None, config=None, lora_r
critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True) critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
elif model == 'llama': elif model == 'llama':
critic = LlamaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True) critic = LlamaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
elif model == 'roberta':
critic = RoBERTaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
else: else:
raise ValueError(f'Unsupported reward model "{model}"') raise ValueError(f'Unsupported reward model "{model}"')
return critic return critic
@ -68,8 +62,6 @@ def get_reward_model_from_args(model: str, pretrained: str = None, config=None):
reward_model = OPTRM(pretrained=pretrained, config=config) reward_model = OPTRM(pretrained=pretrained, config=config)
elif model == 'llama': elif model == 'llama':
reward_model = LlamaRM(pretrained=pretrained, config=config) reward_model = LlamaRM(pretrained=pretrained, config=config)
elif model == 'roberta':
reward_model = RoBERTaRM(pretrained=pretrained, config=config)
else: else:
raise ValueError(f'Unsupported reward model "{model}"') raise ValueError(f'Unsupported reward model "{model}"')
return reward_model return reward_model
@ -101,8 +93,6 @@ def get_tokenizer_from_args(model: str, **kwargs):
elif model == 'llama': elif model == 'llama':
pretrain_path = kwargs["pretrain"] pretrain_path = kwargs["pretrain"]
tokenizer = AutoTokenizer.from_pretrained(pretrain_path) tokenizer = AutoTokenizer.from_pretrained(pretrain_path)
elif model == 'roberta':
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
else: else:
raise ValueError(f'Unsupported model "{model}"') raise ValueError(f'Unsupported model "{model}"')

View File

@ -1,3 +0,0 @@
from .tokenizer_utils import prepare_llama_tokenizer_and_embedding, smart_tokenizer_and_embedding_resize
__all__ = ['smart_tokenizer_and_embedding_resize', 'prepare_llama_tokenizer_and_embedding']

View File

@ -1,73 +0,0 @@
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict
import transformers
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "</s>"
def prepare_llama_tokenizer_and_embedding(
tokenizer: transformers.PreTrainedTokenizer,
model: transformers.PreTrainedModel,
special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN),
):
"""prepare llama tokenizer and embedding.
"""
if tokenizer.pad_token is None:
smart_tokenizer_and_embedding_resize(
special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
tokenizer=tokenizer,
model=model,
)
tokenizer.add_special_tokens({
"eos_token": DEFAULT_EOS_TOKEN,
"bos_token": DEFAULT_BOS_TOKEN,
"unk_token": DEFAULT_UNK_TOKEN,
})
return tokenizer
def smart_tokenizer_and_embedding_resize(
tokenizer: transformers.PreTrainedTokenizer,
model: transformers.PreTrainedModel,
special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN),
):
"""Resize tokenizer and embedding.
Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
"""
if tokenizer.pad_token is None:
num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))
if num_new_tokens > 0:
input_embeddings = model.get_input_embeddings().weight.data
output_embeddings = model.get_output_embeddings().weight.data
input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
input_embeddings[-num_new_tokens:] = input_embeddings_avg
output_embeddings[-num_new_tokens:] = output_embeddings_avg

View File

@ -10,7 +10,6 @@ from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
from coati.models.opt import OPTRM, OPTActor, OPTCritic from coati.models.opt import OPTRM, OPTActor, OPTCritic
from coati.trainer import PPOTrainer from coati.trainer import PPOTrainer
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding
from easy_dataset import EasyPromptsDataset, EasySupervisedDataset from easy_dataset import EasyPromptsDataset, EasySupervisedDataset
from easy_models import BLOOMActor from easy_models import BLOOMActor
from peft import PeftModel from peft import PeftModel
@ -112,21 +111,20 @@ def main(args):
# configure tokenizer # configure tokenizer
if args.model == 'gpt2': if args.model == 'gpt2':
tokenizer = GPT2Tokenizer.from_pretrained(args.rm_pretrain) tokenizer = GPT2Tokenizer.from_pretrained(args.rm_pretrain)
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'bloom': elif args.model == 'bloom':
tokenizer = BloomTokenizerFast.from_pretrained(args.rm_pretrain) tokenizer = BloomTokenizerFast.from_pretrained(args.rm_pretrain)
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt': elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained(args.rm_pretrain) tokenizer = AutoTokenizer.from_pretrained(args.rm_pretrain)
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'llama': elif args.model == 'llama':
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
tokenizer.eos_token = '<\s>' tokenizer.eos_token = '<\s>'
tokenizer.pad_token = tokenizer.unk_token
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
if args.model == 'llama':
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor)
else:
tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
prompt_dataset = EasyPromptsDataset(args.prompt_path, tokenizer) prompt_dataset = EasyPromptsDataset(args.prompt_path, tokenizer)

View File

@ -12,7 +12,6 @@ from coati.models.llama import LlamaLM
from coati.models.opt import OPTLM from coati.models.opt import OPTLM
from coati.trainer import SFTTrainer from coati.trainer import SFTTrainer
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding
from datasets import load_dataset from datasets import load_dataset
from easy_dataset import EasyDataset from easy_dataset import EasyDataset
from peft import LoraConfig, PeftModel, TaskType, get_peft_model from peft import LoraConfig, PeftModel, TaskType, get_peft_model
@ -65,10 +64,11 @@ def train(args):
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'bloom': elif args.model == 'bloom':
tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain) tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt': elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'llama': elif args.model == 'llama':
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
args.pretrain, args.pretrain,
@ -76,23 +76,19 @@ def train(args):
use_fast=False, use_fast=False,
) )
tokenizer.eos_token = '<\s>' tokenizer.eos_token = '<\s>'
tokenizer.pad_token = tokenizer.unk_token
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
tokenizer.pad_token = tokenizer.eos_token
if args.model == 'llama':
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
if args.strategy == 'colossalai_gemini': if args.model == 'llama' and args.strategy == 'colossalai_gemini':
# this is a hack to deal with the resized embedding # this is a hack to deal with the resized embedding
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility # to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
for name, param in model.named_parameters(): for name, param in model.named_parameters():
if not isinstance(param, ColoParameter): if not isinstance(param, ColoParameter):
sub_module_name = '.'.join(name.split('.')[:-1]) sub_module_name = '.'.join(name.split('.')[:-1])
weight_name = name.split('.')[-1] weight_name = name.split('.')[-1]
sub_module = model.get_submodule(sub_module_name) sub_module = model.get_submodule(sub_module_name)
setattr(sub_module, weight_name, ColoParameter(param)) setattr(sub_module, weight_name, ColoParameter(param))
else:
tokenizer.pad_token = tokenizer.eos_token
# configure optimizer # configure optimizer
if args.strategy.startswith('colossalai'): if args.strategy.startswith('colossalai'):

View File

@ -2,10 +2,10 @@ import argparse
import torch import torch
from coati.models.bloom import BLOOMActor from coati.models.bloom import BLOOMActor
from coati.models.generation import generate
from coati.models.gpt import GPTActor from coati.models.gpt import GPTActor
from coati.models.opt import OPTActor from coati.models.opt import OPTActor
from coati.models.roberta import RoBERTaActor from transformers import AutoTokenizer
from transformers import AutoTokenizer, RobertaTokenizer
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
@ -17,13 +17,11 @@ def eval(args):
actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device()) actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device())
elif args.model == 'opt': elif args.model == 'opt':
actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device()) actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
elif args.model == 'roberta':
actor = RoBERTaActor(pretrained=args.pretrain).to(torch.cuda.current_device())
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
state_dict = torch.load(args.model_path) state_dict = torch.load(args.model_path)
actor.model.load_state_dict(state_dict) actor.load_state_dict(state_dict)
# configure tokenizer # configure tokenizer
if args.model == 'gpt2': if args.model == 'gpt2':
@ -34,27 +32,26 @@ def eval(args):
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt': elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m') tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
elif args.model == 'roberta':
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
actor.eval() actor.eval()
input = args.input input = args.input
input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device()) input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device())
outputs = actor.generate(input_ids, outputs = generate(actor,
max_length=args.max_length, input_ids,
do_sample=True, max_length=args.max_length,
top_k=50, do_sample=True,
top_p=0.95, top_k=50,
num_return_sequences=1) top_p=0.95,
num_return_sequences=1)
output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True) output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)
print(output) print(output)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'roberta']) parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
# We suggest to use the pretrained model from HuggingFace, use pretrain to configure model # We suggest to use the pretrained model from HuggingFace, use pretrain to configure model
parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--pretrain', type=str, default=None)
parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--model_path', type=str, default=None)

View File

@ -43,18 +43,18 @@ pip install -r ${BASE}/requirements.txt
wandb init -m offline wandb init -m offline
# FIXME: This is a hack to skip tests that are not working (tested at commit b3ab7fbabf) # FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation # - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: Repository Not Found for url: https://huggingface.co/{...}/resolve/main/tokenizer.model. # - llama-*: These tests can be passed locally, skipped for long execution time
# - roberta-*: RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
SKIPPED_TESTS=( SKIPPED_TESTS=(
"gpt2-ddp" "gpt2-ddp"
"llama-ddp" "llama-colossalai_gemini" "llama-colossalai_zero2" "llama-ddp"
"roberta-ddp" "roberta-colossalai_gemini" "roberta-colossalai_zero2" "llama-colossalai_gemini"
"llama-colossalai_zero2"
) )
# These tests are quick and do not have any dependencies # These tests are quick and do not have any dependencies
for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do for model in 'gpt2' 'bloom' 'opt' 'llama'; do
for strategy in 'ddp' 'colossalai_gemini' 'colossalai_zero2'; do for strategy in 'ddp' 'colossalai_gemini' 'colossalai_zero2'; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " ${model}-${strategy} " ]]; then if [[ " ${SKIPPED_TESTS[*]} " =~ " ${model}-${strategy} " ]]; then
echo "[Test]: Skipped $model-$strategy" echo "[Test]: Skipped $model-$strategy"
@ -64,7 +64,7 @@ for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
--strategy $strategy --model $model \ --strategy $strategy --model $model \
--num_episodes 1 --num_collect_steps 2 --num_update_steps 1 \ --num_episodes 1 --num_collect_steps 2 --num_update_steps 1 \
--train_batch_size 2 --train_batch_size 2 --lora_rank 4
done done
done done
@ -124,22 +124,6 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
--save_path ${BASE}/rm_ckpt.pt --save_path ${BASE}/rm_ckpt.pt
rm -rf ${BASE}/rm_ckpt.pt rm -rf ${BASE}/rm_ckpt.pt
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
--pretrain 'microsoft/deberta-v3-large' --model 'deberta' \
--strategy colossalai_zero2 --loss_fn 'log_sig' \
--dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
--test True --lora_rank 4 \
--save_path ${BASE}/rm_ckpt.pt
rm -rf ${BASE}/rm_ckpt.pt
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
--pretrain 'roberta-base' --model 'roberta' \
--strategy colossalai_zero2 --loss_fn 'log_exp' \
--dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
--test True --lora_rank 4 \
--save_path ${BASE}/rm_ckpt.pt
rm -rf ${BASE}/rm_ckpt.pt
# train rl # train rl
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py \ torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py \
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \

View File

@ -7,14 +7,12 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
from coati.models.gpt import GPTRM, GPTActor, GPTCritic from coati.models.gpt import GPTRM, GPTActor, GPTCritic
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
from coati.models.opt import OPTRM, OPTActor, OPTCritic from coati.models.opt import OPTRM, OPTActor, OPTCritic
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
from coati.trainer import PPOTrainer from coati.trainer import PPOTrainer
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding
from torch.optim import Adam from torch.optim import Adam
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer
from colossalai.nn.optimizer import HybridAdam from colossalai.nn.optimizer import HybridAdam
@ -43,8 +41,6 @@ def main(args):
initial_model = OPTActor(pretrained=args.pretrain) initial_model = OPTActor(pretrained=args.pretrain)
elif args.model == 'llama': elif args.model == 'llama':
initial_model = LlamaActor(pretrained=args.pretrain) initial_model = LlamaActor(pretrained=args.pretrain)
elif args.model == 'roberta':
initial_model = RoBERTaActor(pretrained=args.pretrain)
else: else:
raise ValueError(f'Unsupported actor model "{args.model}"') raise ValueError(f'Unsupported actor model "{args.model}"')
@ -61,8 +57,6 @@ def main(args):
reward_model = OPTRM(pretrained=args.rm_pretrain) reward_model = OPTRM(pretrained=args.rm_pretrain)
elif rm_model_name == 'llama': elif rm_model_name == 'llama':
reward_model = LlamaRM(pretrained=args.rm_pretrain) reward_model = LlamaRM(pretrained=args.rm_pretrain)
elif rm_model_name == 'roberta':
reward_model = RoBERTaRM(pretrained=args.rm_pretrain)
else: else:
raise ValueError(f'Unsupported reward model "{rm_model_name}"') raise ValueError(f'Unsupported reward model "{rm_model_name}"')
@ -80,8 +74,6 @@ def main(args):
actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank) actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
elif args.model == 'llama': elif args.model == 'llama':
actor = LlamaActor(pretrained=args.pretrain, lora_rank=args.lora_rank) actor = LlamaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
elif args.model == 'roberta':
actor = RoBERTaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
else: else:
raise ValueError(f'Unsupported actor model "{args.model}"') raise ValueError(f'Unsupported actor model "{args.model}"')
@ -93,8 +85,6 @@ def main(args):
critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True) critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
elif rm_model_name == 'llama': elif rm_model_name == 'llama':
critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True) critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
elif rm_model_name == 'roberta':
critic = RoBERTaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
else: else:
raise ValueError(f'Unsupported reward model "{rm_model_name}"') raise ValueError(f'Unsupported reward model "{rm_model_name}"')
@ -117,23 +107,20 @@ def main(args):
# configure tokenizer # configure tokenizer
if args.model == 'gpt2': if args.model == 'gpt2':
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'bloom': elif args.model == 'bloom':
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m') tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt': elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'llama': elif args.model == 'llama':
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
tokenizer.eos_token = '<\s>' tokenizer.eos_token = '<\s>'
elif args.model == 'roberta': tokenizer.pad_token = tokenizer.unk_token
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
if args.model == 'llama':
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor)
else:
tokenizer.pad_token = tokenizer.eos_token
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384) prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384)
@ -209,9 +196,9 @@ if __name__ == '__main__':
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'], choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='colossalai_zero2', default='colossalai_zero2',
help='strategy to use') help='strategy to use')
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta']) parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama'])
parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--pretrain', type=str, default=None)
parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta']) parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama'])
parser.add_argument('--rm_path', type=str, default=None) parser.add_argument('--rm_path', type=str, default=None)
parser.add_argument('--rm_pretrain', type=str, default=None) parser.add_argument('--rm_pretrain', type=str, default=None)
parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts') parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts')

View File

@ -1,27 +1,22 @@
import argparse import argparse
from random import randint from random import randint
import loralib as lora
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from coati.dataset import HhRlhfDataset, RmStaticDataset from coati.dataset import HhRlhfDataset, RmStaticDataset
from coati.models import LogExpLoss, LogSigLoss from coati.models import LogExpLoss, LogSigLoss
from coati.models.base import RewardModel
from coati.models.bloom import BLOOMRM from coati.models.bloom import BLOOMRM
from coati.models.deberta import DebertaRM
from coati.models.gpt import GPTRM from coati.models.gpt import GPTRM
from coati.models.llama import LlamaRM from coati.models.llama import LlamaRM
from coati.models.opt import OPTRM from coati.models.opt import OPTRM
from coati.models.roberta import RoBERTaRM
from coati.trainer import RewardModelTrainer from coati.trainer import RewardModelTrainer
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding
from datasets import load_dataset from datasets import load_dataset
from torch.optim import Adam from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler from torch.utils.data.distributed import DistributedSampler
from transformers import AutoTokenizer, BloomTokenizerFast, DebertaV2Tokenizer, LlamaTokenizer, RobertaTokenizer from transformers import AutoTokenizer, BloomTokenizerFast, LlamaTokenizer
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
from colossalai.nn.optimizer import HybridAdam from colossalai.nn.optimizer import HybridAdam
@ -46,12 +41,8 @@ def train(args):
model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
elif args.model == 'gpt2': elif args.model == 'gpt2':
model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
elif args.model == 'deberta':
model = DebertaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
elif args.model == 'llama': elif args.model == 'llama':
model = LlamaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) model = LlamaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
elif args.model == 'roberta':
model = RoBERTaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
@ -64,24 +55,18 @@ def train(args):
# configure tokenizer # configure tokenizer
if args.model == 'gpt2': if args.model == 'gpt2':
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'bloom': elif args.model == 'bloom':
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m') tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt': elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
elif args.model == 'deberta': tokenizer.pad_token = tokenizer.eos_token
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large')
elif args.model == 'llama': elif args.model == 'llama':
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
elif args.model == 'roberta': tokenizer.pad_token = tokenizer.unk_token
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
max_len = args.max_len
if args.model == 'llama':
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
else:
tokenizer.pad_token = tokenizer.eos_token
# configure optimizer # configure optimizer
if args.strategy.startswith('colossalai'): if args.strategy.startswith('colossalai'):
@ -112,13 +97,13 @@ def train(args):
valid_data = data['test'].select((randint(0, len(eval_data) - 1) for _ in range(len(eval_data) // 5))) valid_data = data['test'].select((randint(0, len(eval_data) - 1) for _ in range(len(eval_data) // 5)))
if args.dataset == 'Dahoas/rm-static': if args.dataset == 'Dahoas/rm-static':
train_dataset = RmStaticDataset(train_data, tokenizer, max_len) train_dataset = RmStaticDataset(train_data, tokenizer, args.max_len)
valid_dataset = RmStaticDataset(valid_data, tokenizer, max_len) valid_dataset = RmStaticDataset(valid_data, tokenizer, args.max_len)
eval_dataset = RmStaticDataset(eval_data, tokenizer, max_len) eval_dataset = RmStaticDataset(eval_data, tokenizer, args.max_len)
elif args.dataset == 'Anthropic/hh-rlhf': elif args.dataset == 'Anthropic/hh-rlhf':
train_dataset = HhRlhfDataset(train_data, tokenizer, max_len) train_dataset = HhRlhfDataset(train_data, tokenizer, args.max_len)
valid_dataset = HhRlhfDataset(valid_data, tokenizer, max_len) valid_dataset = HhRlhfDataset(valid_data, tokenizer, args.max_len)
eval_dataset = HhRlhfDataset(eval_data, tokenizer, max_len) eval_dataset = HhRlhfDataset(eval_data, tokenizer, args.max_len)
else: else:
raise ValueError(f'Unsupported dataset "{args.dataset}"') raise ValueError(f'Unsupported dataset "{args.dataset}"')
@ -195,7 +180,7 @@ if __name__ == '__main__':
parser.add_argument('--strategy', parser.add_argument('--strategy',
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'], choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
default='colossalai_zero2') default='colossalai_zero2')
parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom') parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom')
parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--pretrain', type=str, default=None)
parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--model_path', type=str, default=None)
parser.add_argument('--need_optim_ckpt', type=bool, default=False) parser.add_argument('--need_optim_ckpt', type=bool, default=False)

View File

@ -9,7 +9,6 @@ from coati.dataset import DataCollatorForSupervisedDataset, SFTDataset, Supervis
from coati.models import convert_to_lora_module from coati.models import convert_to_lora_module
from coati.trainer import SFTTrainer from coati.trainer import SFTTrainer
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
from coati.utils import prepare_llama_tokenizer_and_embedding
from datasets import load_dataset from datasets import load_dataset
from torch.optim import Adam from torch.optim import Adam
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
@ -68,6 +67,7 @@ def train(args):
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'opt': elif args.model == 'opt':
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
tokenizer.pad_token = tokenizer.eos_token
elif args.model == 'llama': elif args.model == 'llama':
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
args.pretrain, args.pretrain,
@ -75,24 +75,19 @@ def train(args):
use_fast=False, use_fast=False,
) )
tokenizer.eos_token = '<\s>' tokenizer.eos_token = '<\s>'
tokenizer.pad_token = tokenizer.unk_token
else: else:
raise ValueError(f'Unsupported model "{args.model}"') raise ValueError(f'Unsupported model "{args.model}"')
tokenizer.pad_token = tokenizer.eos_token
max_len = args.max_len
if args.model == 'llama':
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
if args.strategy == 'colossalai_gemini': if args.model == 'llama' and args.strategy == 'colossalai_gemini':
# this is a hack to deal with the resized embedding # this is a hack to deal with the resized embedding
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility # to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
for name, param in model.named_parameters(): for name, param in model.named_parameters():
if not isinstance(param, ColoParameter): if not isinstance(param, ColoParameter):
sub_module_name = '.'.join(name.split('.')[:-1]) sub_module_name = '.'.join(name.split('.')[:-1])
weight_name = name.split('.')[-1] weight_name = name.split('.')[-1]
sub_module = model.get_submodule(sub_module_name) sub_module = model.get_submodule(sub_module_name)
setattr(sub_module, weight_name, ColoParameter(param)) setattr(sub_module, weight_name, ColoParameter(param))
else:
tokenizer.pad_token = tokenizer.eos_token
# configure optimizer # configure optimizer
if args.strategy.startswith('colossalai'): if args.strategy.startswith('colossalai'):
@ -107,14 +102,14 @@ def train(args):
train_data = load_dataset(args.dataset, 'super_natural_instructions', split='train') train_data = load_dataset(args.dataset, 'super_natural_instructions', split='train')
eval_data = load_dataset(args.dataset, 'super_natural_instructions', split='test') eval_data = load_dataset(args.dataset, 'super_natural_instructions', split='test')
train_dataset = SFTDataset(train_data, tokenizer, max_len) train_dataset = SFTDataset(train_data, tokenizer, args.max_len)
eval_dataset = SFTDataset(eval_data, tokenizer, max_len) eval_dataset = SFTDataset(eval_data, tokenizer, args.max_len)
else: else:
train_dataset = SupervisedDataset(tokenizer=tokenizer, train_dataset = SupervisedDataset(tokenizer=tokenizer,
data_path=args.dataset, data_path=args.dataset,
max_datasets_size=args.max_datasets_size, max_datasets_size=args.max_datasets_size,
max_length=max_len) max_length=args.max_len)
eval_dataset = None eval_dataset = None
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)