mirror of https://github.com/hpcaitech/ColossalAI
[chat] use official transformers and fix some issues (#4117)
* feat: remove on_learn_epoch fn as not used * revert: add _on_learn_epoch fn * feat: remove NaiveStrategy * test: update train_prompts tests * fix: remove prepare_llama_tokenizer_and_embedding * test: add lora arg * feat: remove roberta support in train_prompts due to runtime errs * feat: remove deberta & roberta in rm as not used * test: remove deberta and roberta tests * feat: remove deberta and roberta models as not used * fix: remove calls to roberta * fix: remove prepare_llama_tokenizer_and_embedding * chore: update transformers version * docs: update transformers version * fix: fix actor inference * fix: fix ci * feat: change llama pad token to unk * revert: revert ddp setup_distributed * fix: change llama pad token to unk * revert: undo unnecessary changes * fix: use pip to install transformerspull/4157/head
parent
1350ece492
commit
3d8d5d0d58
|
@ -4,11 +4,10 @@ on:
|
|||
pull_request:
|
||||
types: [synchronize, opened, reopened]
|
||||
paths:
|
||||
- 'applications/Chat/coati/**'
|
||||
- 'applications/Chat/requirements.txt'
|
||||
- 'applications/Chat/setup.py'
|
||||
- 'applications/Chat/examples/**'
|
||||
|
||||
- "applications/Chat/coati/**"
|
||||
- "applications/Chat/requirements.txt"
|
||||
- "applications/Chat/setup.py"
|
||||
- "applications/Chat/examples/**"
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
|
@ -38,10 +37,7 @@ jobs:
|
|||
|
||||
- name: Install Transformers
|
||||
run: |
|
||||
cd applications/Chat
|
||||
git clone https://github.com/hpcaitech/transformers
|
||||
cd transformers
|
||||
pip install -v .
|
||||
pip install transformers==4.30.2
|
||||
|
||||
- name: Execute Examples
|
||||
run: |
|
||||
|
|
|
@ -98,12 +98,9 @@ pip install .
|
|||
```
|
||||
|
||||
### Install the Transformers
|
||||
Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code
|
||||
|
||||
```shell
|
||||
git clone https://github.com/hpcaitech/transformers
|
||||
cd transformers
|
||||
pip install .
|
||||
pip install transformers==4.30.2
|
||||
```
|
||||
|
||||
## How to use?
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
from .deberta_critic import DebertaCritic
|
||||
from .deberta_rm import DebertaRM
|
||||
|
||||
__all__ = ['DebertaCritic', 'DebertaRM']
|
|
@ -1,36 +0,0 @@
|
|||
from typing import Optional
|
||||
|
||||
import torch.nn as nn
|
||||
from transformers import DebertaV2Config, DebertaV2Model
|
||||
|
||||
from ..base import Critic
|
||||
|
||||
|
||||
class DebertaCritic(Critic):
|
||||
"""
|
||||
Deberta Critic model.
|
||||
|
||||
Args:
|
||||
pretrained (str): Pretrained model name or path.
|
||||
config (DebertaV2Config): Model config.
|
||||
checkpoint (bool): Enable gradient checkpointing.
|
||||
lora_rank (int): Rank of the LO-RA decomposition.
|
||||
lora_train_bias (str): LoRA bias training mode.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
pretrained: Optional[str] = None,
|
||||
config: Optional[DebertaV2Config] = None,
|
||||
checkpoint: bool = False,
|
||||
lora_rank: int = 0,
|
||||
lora_train_bias: str = 'none') -> None:
|
||||
if pretrained is not None:
|
||||
model = DebertaV2Model.from_pretrained(pretrained)
|
||||
elif config is not None:
|
||||
model = DebertaV2Model(config)
|
||||
else:
|
||||
model = DebertaV2Model(DebertaV2Config())
|
||||
if checkpoint:
|
||||
model.gradient_checkpointing_enable()
|
||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
||||
super().__init__(model, value_head, lora_rank, lora_train_bias)
|
|
@ -1,37 +0,0 @@
|
|||
from typing import Optional
|
||||
|
||||
import torch.nn as nn
|
||||
from transformers import DebertaV2Config, DebertaV2Model
|
||||
|
||||
from ..base import RewardModel
|
||||
|
||||
|
||||
class DebertaRM(RewardModel):
|
||||
"""
|
||||
Deberta Reward model.
|
||||
|
||||
Args:
|
||||
pretrained (str): Pretrained model name or path.
|
||||
config (DebertaV2Config): Model config.
|
||||
checkpoint (bool): Enable gradient checkpointing.
|
||||
lora_rank (int): Rank of the LO-RA decomposition.
|
||||
lora_train_bias (str): LoRA bias training mode.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
pretrained: str = None,
|
||||
config: Optional[DebertaV2Config] = None,
|
||||
checkpoint: bool = False,
|
||||
lora_rank: int = 0,
|
||||
lora_train_bias: str = 'none') -> None:
|
||||
if pretrained is not None:
|
||||
model = DebertaV2Model.from_pretrained(pretrained)
|
||||
elif config is not None:
|
||||
model = DebertaV2Model(config)
|
||||
else:
|
||||
model = DebertaV2Model(DebertaV2Config())
|
||||
if checkpoint:
|
||||
model.gradient_checkpointing_enable()
|
||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
||||
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1))
|
||||
super().__init__(model, value_head, lora_rank, lora_train_bias)
|
|
@ -1,5 +0,0 @@
|
|||
from .roberta_actor import RoBERTaActor
|
||||
from .roberta_critic import RoBERTaCritic
|
||||
from .roberta_rm import RoBERTaRM
|
||||
|
||||
__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM']
|
|
@ -1,35 +0,0 @@
|
|||
from typing import Optional
|
||||
|
||||
from transformers.models.roberta.configuration_roberta import RobertaConfig
|
||||
from transformers.models.roberta.modeling_roberta import RobertaForCausalLM
|
||||
|
||||
from ..base import Actor
|
||||
|
||||
class RoBERTaActor(Actor):
|
||||
"""
|
||||
RoBERTa Actor model.
|
||||
|
||||
Args:
|
||||
pretrained (str): Pretrained model name or path.
|
||||
config (RoBERTaConfig): Model config.
|
||||
checkpoint (bool): Enable gradient checkpointing.
|
||||
lora_rank (int): Rank of the low-rank approximation.
|
||||
lora_train_bias (str): LoRA bias training mode.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self,
|
||||
pretrained: Optional[str] = None,
|
||||
config: Optional[RobertaConfig] = None,
|
||||
checkpoint: bool = False,
|
||||
lora_rank: int = 0,
|
||||
lora_train_bias: str = 'none') -> None:
|
||||
if pretrained is not None:
|
||||
model = RobertaForCausalLM.from_pretrained(pretrained)
|
||||
elif config is not None:
|
||||
model = RobertaForCausalLM(config)
|
||||
else:
|
||||
model = RobertaForCausalLM(RobertaConfig())
|
||||
if checkpoint:
|
||||
model.gradient_checkpointing_enable()
|
||||
super().__init__(model, lora_rank, lora_train_bias)
|
|
@ -1,38 +0,0 @@
|
|||
from typing import Optional
|
||||
|
||||
import torch.nn as nn
|
||||
from transformers.models.roberta.configuration_roberta import RobertaConfig
|
||||
from transformers.models.roberta.modeling_roberta import RobertaModel
|
||||
|
||||
from ..base import Critic
|
||||
|
||||
|
||||
class RoBERTaCritic(Critic):
|
||||
"""
|
||||
RoBERTa Critic model.
|
||||
|
||||
Args:
|
||||
pretrained (str): Pretrained model name or path.
|
||||
config (RoBERTa Config): Model config.
|
||||
checkpoint (bool): Enable gradient checkpointing.
|
||||
lora_rank (int): Rank of the low-rank approximation.
|
||||
lora_train_bias (str): LoRA bias training mode.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
pretrained: Optional[str] = None,
|
||||
config: Optional[RobertaConfig] = None,
|
||||
checkpoint: bool = False,
|
||||
lora_rank: int = 0,
|
||||
lora_train_bias: str = 'none',
|
||||
**kwargs) -> None:
|
||||
if pretrained is not None:
|
||||
model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False)
|
||||
elif config is not None:
|
||||
model = RobertaModel(config)
|
||||
else:
|
||||
model = RobertaModel(RobertaConfig())
|
||||
if checkpoint:
|
||||
model.gradient_checkpointing_enable()
|
||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
||||
super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)
|
|
@ -1,39 +0,0 @@
|
|||
from typing import Optional
|
||||
|
||||
import torch.nn as nn
|
||||
from transformers import RobertaConfig, RobertaModel
|
||||
|
||||
|
||||
from ..base import RewardModel
|
||||
|
||||
|
||||
class RoBERTaRM(RewardModel):
|
||||
"""
|
||||
RoBERTa Reward model.
|
||||
|
||||
Args:
|
||||
pretrained (str): Pretrained model name or path.
|
||||
config (RoBERTaConfig): Model config.
|
||||
checkpoint (bool): Enable gradient checkpointing.
|
||||
lora_rank (int): Rank of the low-rank approximation.
|
||||
lora_train_bias (str): LoRA bias training mode.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
pretrained: Optional[str] = None,
|
||||
config: Optional[RobertaConfig] = None,
|
||||
checkpoint: bool = False,
|
||||
lora_rank: int = 0,
|
||||
lora_train_bias: str = 'none') -> None:
|
||||
if pretrained is not None:
|
||||
model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False)
|
||||
elif config is not None:
|
||||
model = RobertaModel(config)
|
||||
else:
|
||||
model = RobertaModel(RobertaConfig())
|
||||
if checkpoint:
|
||||
model.gradient_checkpointing_enable()
|
||||
|
||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
||||
value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1))
|
||||
super().__init__(model, value_head, lora_rank, lora_train_bias)
|
|
@ -9,10 +9,8 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
|
|||
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
||||
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
||||
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
||||
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
|
||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
||||
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer
|
||||
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer
|
||||
|
||||
|
||||
def is_rank_0() -> bool:
|
||||
|
@ -36,8 +34,6 @@ def get_actor_from_args(model: str, pretrained: str = None, config=None, lora_ra
|
|||
actor = OPTActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
||||
elif model == 'llama':
|
||||
actor = LlamaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
||||
elif model == 'roberta':
|
||||
actor = RoBERTaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
||||
else:
|
||||
raise ValueError(f'Unsupported actor model "{model}"')
|
||||
return actor
|
||||
|
@ -52,8 +48,6 @@ def get_critic_from_args(model: str, pretrained: str = None, config=None, lora_r
|
|||
critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
||||
elif model == 'llama':
|
||||
critic = LlamaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
||||
elif model == 'roberta':
|
||||
critic = RoBERTaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
||||
else:
|
||||
raise ValueError(f'Unsupported reward model "{model}"')
|
||||
return critic
|
||||
|
@ -68,8 +62,6 @@ def get_reward_model_from_args(model: str, pretrained: str = None, config=None):
|
|||
reward_model = OPTRM(pretrained=pretrained, config=config)
|
||||
elif model == 'llama':
|
||||
reward_model = LlamaRM(pretrained=pretrained, config=config)
|
||||
elif model == 'roberta':
|
||||
reward_model = RoBERTaRM(pretrained=pretrained, config=config)
|
||||
else:
|
||||
raise ValueError(f'Unsupported reward model "{model}"')
|
||||
return reward_model
|
||||
|
@ -101,8 +93,6 @@ def get_tokenizer_from_args(model: str, **kwargs):
|
|||
elif model == 'llama':
|
||||
pretrain_path = kwargs["pretrain"]
|
||||
tokenizer = AutoTokenizer.from_pretrained(pretrain_path)
|
||||
elif model == 'roberta':
|
||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{model}"')
|
||||
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
from .tokenizer_utils import prepare_llama_tokenizer_and_embedding, smart_tokenizer_and_embedding_resize
|
||||
|
||||
__all__ = ['smart_tokenizer_and_embedding_resize', 'prepare_llama_tokenizer_and_embedding']
|
|
@ -1,73 +0,0 @@
|
|||
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from typing import Dict
|
||||
|
||||
import transformers
|
||||
|
||||
DEFAULT_PAD_TOKEN = "[PAD]"
|
||||
DEFAULT_EOS_TOKEN = "</s>"
|
||||
DEFAULT_BOS_TOKEN = "</s>"
|
||||
DEFAULT_UNK_TOKEN = "</s>"
|
||||
|
||||
|
||||
def prepare_llama_tokenizer_and_embedding(
|
||||
tokenizer: transformers.PreTrainedTokenizer,
|
||||
model: transformers.PreTrainedModel,
|
||||
special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN),
|
||||
):
|
||||
"""prepare llama tokenizer and embedding.
|
||||
|
||||
"""
|
||||
|
||||
if tokenizer.pad_token is None:
|
||||
smart_tokenizer_and_embedding_resize(
|
||||
special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
|
||||
tokenizer=tokenizer,
|
||||
model=model,
|
||||
)
|
||||
|
||||
tokenizer.add_special_tokens({
|
||||
"eos_token": DEFAULT_EOS_TOKEN,
|
||||
"bos_token": DEFAULT_BOS_TOKEN,
|
||||
"unk_token": DEFAULT_UNK_TOKEN,
|
||||
})
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
||||
def smart_tokenizer_and_embedding_resize(
|
||||
tokenizer: transformers.PreTrainedTokenizer,
|
||||
model: transformers.PreTrainedModel,
|
||||
special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN),
|
||||
):
|
||||
"""Resize tokenizer and embedding.
|
||||
|
||||
Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
|
||||
"""
|
||||
|
||||
if tokenizer.pad_token is None:
|
||||
num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
|
||||
|
||||
model.resize_token_embeddings(len(tokenizer))
|
||||
|
||||
if num_new_tokens > 0:
|
||||
input_embeddings = model.get_input_embeddings().weight.data
|
||||
output_embeddings = model.get_output_embeddings().weight.data
|
||||
|
||||
input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
|
||||
output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
|
||||
|
||||
input_embeddings[-num_new_tokens:] = input_embeddings_avg
|
||||
output_embeddings[-num_new_tokens:] = output_embeddings_avg
|
|
@ -10,7 +10,6 @@ from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
|||
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
||||
from coati.trainer import PPOTrainer
|
||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
||||
from easy_dataset import EasyPromptsDataset, EasySupervisedDataset
|
||||
from easy_models import BLOOMActor
|
||||
from peft import PeftModel
|
||||
|
@ -112,21 +111,20 @@ def main(args):
|
|||
# configure tokenizer
|
||||
if args.model == 'gpt2':
|
||||
tokenizer = GPT2Tokenizer.from_pretrained(args.rm_pretrain)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'bloom':
|
||||
tokenizer = BloomTokenizerFast.from_pretrained(args.rm_pretrain)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'opt':
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.rm_pretrain)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'llama':
|
||||
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
||||
tokenizer.eos_token = '<\s>'
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
|
||||
if args.model == 'llama':
|
||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor)
|
||||
else:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||
|
||||
prompt_dataset = EasyPromptsDataset(args.prompt_path, tokenizer)
|
||||
|
|
|
@ -12,7 +12,6 @@ from coati.models.llama import LlamaLM
|
|||
from coati.models.opt import OPTLM
|
||||
from coati.trainer import SFTTrainer
|
||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
||||
from datasets import load_dataset
|
||||
from easy_dataset import EasyDataset
|
||||
from peft import LoraConfig, PeftModel, TaskType, get_peft_model
|
||||
|
@ -65,10 +64,11 @@ def train(args):
|
|||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'bloom':
|
||||
tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
|
||||
tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'opt':
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'llama':
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
args.pretrain,
|
||||
|
@ -76,13 +76,11 @@ def train(args):
|
|||
use_fast=False,
|
||||
)
|
||||
tokenizer.eos_token = '<\s>'
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
if args.model == 'llama':
|
||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
|
||||
|
||||
if args.strategy == 'colossalai_gemini':
|
||||
if args.model == 'llama' and args.strategy == 'colossalai_gemini':
|
||||
# this is a hack to deal with the resized embedding
|
||||
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
|
||||
for name, param in model.named_parameters():
|
||||
|
@ -91,8 +89,6 @@ def train(args):
|
|||
weight_name = name.split('.')[-1]
|
||||
sub_module = model.get_submodule(sub_module_name)
|
||||
setattr(sub_module, weight_name, ColoParameter(param))
|
||||
else:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
# configure optimizer
|
||||
if args.strategy.startswith('colossalai'):
|
||||
|
|
|
@ -2,10 +2,10 @@ import argparse
|
|||
|
||||
import torch
|
||||
from coati.models.bloom import BLOOMActor
|
||||
from coati.models.generation import generate
|
||||
from coati.models.gpt import GPTActor
|
||||
from coati.models.opt import OPTActor
|
||||
from coati.models.roberta import RoBERTaActor
|
||||
from transformers import AutoTokenizer, RobertaTokenizer
|
||||
from transformers import AutoTokenizer
|
||||
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
|
||||
|
||||
|
||||
|
@ -17,13 +17,11 @@ def eval(args):
|
|||
actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
||||
elif args.model == 'opt':
|
||||
actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
||||
elif args.model == 'roberta':
|
||||
actor = RoBERTaActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
|
||||
state_dict = torch.load(args.model_path)
|
||||
actor.model.load_state_dict(state_dict)
|
||||
actor.load_state_dict(state_dict)
|
||||
|
||||
# configure tokenizer
|
||||
if args.model == 'gpt2':
|
||||
|
@ -34,15 +32,14 @@ def eval(args):
|
|||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'opt':
|
||||
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
|
||||
elif args.model == 'roberta':
|
||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
|
||||
actor.eval()
|
||||
input = args.input
|
||||
input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device())
|
||||
outputs = actor.generate(input_ids,
|
||||
outputs = generate(actor,
|
||||
input_ids,
|
||||
max_length=args.max_length,
|
||||
do_sample=True,
|
||||
top_k=50,
|
||||
|
@ -54,7 +51,7 @@ def eval(args):
|
|||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'roberta'])
|
||||
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
|
||||
# We suggest to use the pretrained model from HuggingFace, use pretrain to configure model
|
||||
parser.add_argument('--pretrain', type=str, default=None)
|
||||
parser.add_argument('--model_path', type=str, default=None)
|
||||
|
|
|
@ -43,18 +43,18 @@ pip install -r ${BASE}/requirements.txt
|
|||
|
||||
wandb init -m offline
|
||||
|
||||
# FIXME: This is a hack to skip tests that are not working (tested at commit b3ab7fbabf)
|
||||
# FIXME: This is a hack to skip tests that are not working
|
||||
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
|
||||
# - llama-*: Repository Not Found for url: https://huggingface.co/{...}/resolve/main/tokenizer.model.
|
||||
# - roberta-*: RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
|
||||
# - llama-*: These tests can be passed locally, skipped for long execution time
|
||||
SKIPPED_TESTS=(
|
||||
"gpt2-ddp"
|
||||
"llama-ddp" "llama-colossalai_gemini" "llama-colossalai_zero2"
|
||||
"roberta-ddp" "roberta-colossalai_gemini" "roberta-colossalai_zero2"
|
||||
"llama-ddp"
|
||||
"llama-colossalai_gemini"
|
||||
"llama-colossalai_zero2"
|
||||
)
|
||||
|
||||
# These tests are quick and do not have any dependencies
|
||||
for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do
|
||||
for model in 'gpt2' 'bloom' 'opt' 'llama'; do
|
||||
for strategy in 'ddp' 'colossalai_gemini' 'colossalai_zero2'; do
|
||||
if [[ " ${SKIPPED_TESTS[*]} " =~ " ${model}-${strategy} " ]]; then
|
||||
echo "[Test]: Skipped $model-$strategy"
|
||||
|
@ -64,7 +64,7 @@ for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do
|
|||
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||
--strategy $strategy --model $model \
|
||||
--num_episodes 1 --num_collect_steps 2 --num_update_steps 1 \
|
||||
--train_batch_size 2
|
||||
--train_batch_size 2 --lora_rank 4
|
||||
done
|
||||
done
|
||||
|
||||
|
@ -124,22 +124,6 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
|||
--save_path ${BASE}/rm_ckpt.pt
|
||||
rm -rf ${BASE}/rm_ckpt.pt
|
||||
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
||||
--pretrain 'microsoft/deberta-v3-large' --model 'deberta' \
|
||||
--strategy colossalai_zero2 --loss_fn 'log_sig' \
|
||||
--dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
|
||||
--test True --lora_rank 4 \
|
||||
--save_path ${BASE}/rm_ckpt.pt
|
||||
rm -rf ${BASE}/rm_ckpt.pt
|
||||
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
||||
--pretrain 'roberta-base' --model 'roberta' \
|
||||
--strategy colossalai_zero2 --loss_fn 'log_exp' \
|
||||
--dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
|
||||
--test True --lora_rank 4 \
|
||||
--save_path ${BASE}/rm_ckpt.pt
|
||||
rm -rf ${BASE}/rm_ckpt.pt
|
||||
|
||||
# train rl
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py \
|
||||
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||
|
|
|
@ -7,14 +7,12 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
|
|||
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
||||
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
||||
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
||||
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
|
||||
from coati.trainer import PPOTrainer
|
||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
||||
from torch.optim import Adam
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data.distributed import DistributedSampler
|
||||
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer
|
||||
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer
|
||||
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
|
||||
|
@ -43,8 +41,6 @@ def main(args):
|
|||
initial_model = OPTActor(pretrained=args.pretrain)
|
||||
elif args.model == 'llama':
|
||||
initial_model = LlamaActor(pretrained=args.pretrain)
|
||||
elif args.model == 'roberta':
|
||||
initial_model = RoBERTaActor(pretrained=args.pretrain)
|
||||
else:
|
||||
raise ValueError(f'Unsupported actor model "{args.model}"')
|
||||
|
||||
|
@ -61,8 +57,6 @@ def main(args):
|
|||
reward_model = OPTRM(pretrained=args.rm_pretrain)
|
||||
elif rm_model_name == 'llama':
|
||||
reward_model = LlamaRM(pretrained=args.rm_pretrain)
|
||||
elif rm_model_name == 'roberta':
|
||||
reward_model = RoBERTaRM(pretrained=args.rm_pretrain)
|
||||
else:
|
||||
raise ValueError(f'Unsupported reward model "{rm_model_name}"')
|
||||
|
||||
|
@ -80,8 +74,6 @@ def main(args):
|
|||
actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
||||
elif args.model == 'llama':
|
||||
actor = LlamaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
||||
elif args.model == 'roberta':
|
||||
actor = RoBERTaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
||||
else:
|
||||
raise ValueError(f'Unsupported actor model "{args.model}"')
|
||||
|
||||
|
@ -93,8 +85,6 @@ def main(args):
|
|||
critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
||||
elif rm_model_name == 'llama':
|
||||
critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
||||
elif rm_model_name == 'roberta':
|
||||
critic = RoBERTaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
||||
else:
|
||||
raise ValueError(f'Unsupported reward model "{rm_model_name}"')
|
||||
|
||||
|
@ -117,23 +107,20 @@ def main(args):
|
|||
# configure tokenizer
|
||||
if args.model == 'gpt2':
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'bloom':
|
||||
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'opt':
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'llama':
|
||||
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
||||
tokenizer.eos_token = '<\s>'
|
||||
elif args.model == 'roberta':
|
||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
|
||||
if args.model == 'llama':
|
||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor)
|
||||
else:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||
|
||||
prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384)
|
||||
|
@ -209,9 +196,9 @@ if __name__ == '__main__':
|
|||
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
||||
default='colossalai_zero2',
|
||||
help='strategy to use')
|
||||
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta'])
|
||||
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama'])
|
||||
parser.add_argument('--pretrain', type=str, default=None)
|
||||
parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta'])
|
||||
parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama'])
|
||||
parser.add_argument('--rm_path', type=str, default=None)
|
||||
parser.add_argument('--rm_pretrain', type=str, default=None)
|
||||
parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts')
|
||||
|
|
|
@ -1,27 +1,22 @@
|
|||
import argparse
|
||||
from random import randint
|
||||
|
||||
import loralib as lora
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from coati.dataset import HhRlhfDataset, RmStaticDataset
|
||||
from coati.models import LogExpLoss, LogSigLoss
|
||||
from coati.models.base import RewardModel
|
||||
from coati.models.bloom import BLOOMRM
|
||||
from coati.models.deberta import DebertaRM
|
||||
from coati.models.gpt import GPTRM
|
||||
from coati.models.llama import LlamaRM
|
||||
from coati.models.opt import OPTRM
|
||||
from coati.models.roberta import RoBERTaRM
|
||||
from coati.trainer import RewardModelTrainer
|
||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
||||
from datasets import load_dataset
|
||||
from torch.optim import Adam
|
||||
from torch.optim.lr_scheduler import CosineAnnealingLR
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.data.distributed import DistributedSampler
|
||||
from transformers import AutoTokenizer, BloomTokenizerFast, DebertaV2Tokenizer, LlamaTokenizer, RobertaTokenizer
|
||||
from transformers import AutoTokenizer, BloomTokenizerFast, LlamaTokenizer
|
||||
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
|
||||
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
|
@ -46,12 +41,8 @@ def train(args):
|
|||
model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||
elif args.model == 'gpt2':
|
||||
model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||
elif args.model == 'deberta':
|
||||
model = DebertaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||
elif args.model == 'llama':
|
||||
model = LlamaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||
elif args.model == 'roberta':
|
||||
model = RoBERTaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
|
||||
|
@ -64,24 +55,18 @@ def train(args):
|
|||
# configure tokenizer
|
||||
if args.model == 'gpt2':
|
||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'bloom':
|
||||
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'opt':
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
elif args.model == 'deberta':
|
||||
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'llama':
|
||||
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
||||
elif args.model == 'roberta':
|
||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
max_len = args.max_len
|
||||
|
||||
if args.model == 'llama':
|
||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
|
||||
else:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
# configure optimizer
|
||||
if args.strategy.startswith('colossalai'):
|
||||
|
@ -112,13 +97,13 @@ def train(args):
|
|||
valid_data = data['test'].select((randint(0, len(eval_data) - 1) for _ in range(len(eval_data) // 5)))
|
||||
|
||||
if args.dataset == 'Dahoas/rm-static':
|
||||
train_dataset = RmStaticDataset(train_data, tokenizer, max_len)
|
||||
valid_dataset = RmStaticDataset(valid_data, tokenizer, max_len)
|
||||
eval_dataset = RmStaticDataset(eval_data, tokenizer, max_len)
|
||||
train_dataset = RmStaticDataset(train_data, tokenizer, args.max_len)
|
||||
valid_dataset = RmStaticDataset(valid_data, tokenizer, args.max_len)
|
||||
eval_dataset = RmStaticDataset(eval_data, tokenizer, args.max_len)
|
||||
elif args.dataset == 'Anthropic/hh-rlhf':
|
||||
train_dataset = HhRlhfDataset(train_data, tokenizer, max_len)
|
||||
valid_dataset = HhRlhfDataset(valid_data, tokenizer, max_len)
|
||||
eval_dataset = HhRlhfDataset(eval_data, tokenizer, max_len)
|
||||
train_dataset = HhRlhfDataset(train_data, tokenizer, args.max_len)
|
||||
valid_dataset = HhRlhfDataset(valid_data, tokenizer, args.max_len)
|
||||
eval_dataset = HhRlhfDataset(eval_data, tokenizer, args.max_len)
|
||||
else:
|
||||
raise ValueError(f'Unsupported dataset "{args.dataset}"')
|
||||
|
||||
|
@ -195,7 +180,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--strategy',
|
||||
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
||||
default='colossalai_zero2')
|
||||
parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom')
|
||||
parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom')
|
||||
parser.add_argument('--pretrain', type=str, default=None)
|
||||
parser.add_argument('--model_path', type=str, default=None)
|
||||
parser.add_argument('--need_optim_ckpt', type=bool, default=False)
|
||||
|
|
|
@ -9,7 +9,6 @@ from coati.dataset import DataCollatorForSupervisedDataset, SFTDataset, Supervis
|
|||
from coati.models import convert_to_lora_module
|
||||
from coati.trainer import SFTTrainer
|
||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
||||
from datasets import load_dataset
|
||||
from torch.optim import Adam
|
||||
from torch.utils.data import DataLoader
|
||||
|
@ -68,6 +67,7 @@ def train(args):
|
|||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'opt':
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
elif args.model == 'llama':
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
args.pretrain,
|
||||
|
@ -75,14 +75,11 @@ def train(args):
|
|||
use_fast=False,
|
||||
)
|
||||
tokenizer.eos_token = '<\s>'
|
||||
tokenizer.pad_token = tokenizer.unk_token
|
||||
else:
|
||||
raise ValueError(f'Unsupported model "{args.model}"')
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
max_len = args.max_len
|
||||
if args.model == 'llama':
|
||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
|
||||
|
||||
if args.strategy == 'colossalai_gemini':
|
||||
if args.model == 'llama' and args.strategy == 'colossalai_gemini':
|
||||
# this is a hack to deal with the resized embedding
|
||||
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
|
||||
for name, param in model.named_parameters():
|
||||
|
@ -91,8 +88,6 @@ def train(args):
|
|||
weight_name = name.split('.')[-1]
|
||||
sub_module = model.get_submodule(sub_module_name)
|
||||
setattr(sub_module, weight_name, ColoParameter(param))
|
||||
else:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
# configure optimizer
|
||||
if args.strategy.startswith('colossalai'):
|
||||
|
@ -107,14 +102,14 @@ def train(args):
|
|||
train_data = load_dataset(args.dataset, 'super_natural_instructions', split='train')
|
||||
eval_data = load_dataset(args.dataset, 'super_natural_instructions', split='test')
|
||||
|
||||
train_dataset = SFTDataset(train_data, tokenizer, max_len)
|
||||
eval_dataset = SFTDataset(eval_data, tokenizer, max_len)
|
||||
train_dataset = SFTDataset(train_data, tokenizer, args.max_len)
|
||||
eval_dataset = SFTDataset(eval_data, tokenizer, args.max_len)
|
||||
|
||||
else:
|
||||
train_dataset = SupervisedDataset(tokenizer=tokenizer,
|
||||
data_path=args.dataset,
|
||||
max_datasets_size=args.max_datasets_size,
|
||||
max_length=max_len)
|
||||
max_length=args.max_len)
|
||||
eval_dataset = None
|
||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||
|
||||
|
|
Loading…
Reference in New Issue