mirror of https://github.com/hpcaitech/ColossalAI
[chat] use official transformers and fix some issues (#4117)
* feat: remove on_learn_epoch fn as not used * revert: add _on_learn_epoch fn * feat: remove NaiveStrategy * test: update train_prompts tests * fix: remove prepare_llama_tokenizer_and_embedding * test: add lora arg * feat: remove roberta support in train_prompts due to runtime errs * feat: remove deberta & roberta in rm as not used * test: remove deberta and roberta tests * feat: remove deberta and roberta models as not used * fix: remove calls to roberta * fix: remove prepare_llama_tokenizer_and_embedding * chore: update transformers version * docs: update transformers version * fix: fix actor inference * fix: fix ci * feat: change llama pad token to unk * revert: revert ddp setup_distributed * fix: change llama pad token to unk * revert: undo unnecessary changes * fix: use pip to install transformerspull/4157/head
parent
1350ece492
commit
3d8d5d0d58
|
@ -4,11 +4,10 @@ on:
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [synchronize, opened, reopened]
|
types: [synchronize, opened, reopened]
|
||||||
paths:
|
paths:
|
||||||
- 'applications/Chat/coati/**'
|
- "applications/Chat/coati/**"
|
||||||
- 'applications/Chat/requirements.txt'
|
- "applications/Chat/requirements.txt"
|
||||||
- 'applications/Chat/setup.py'
|
- "applications/Chat/setup.py"
|
||||||
- 'applications/Chat/examples/**'
|
- "applications/Chat/examples/**"
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
tests:
|
tests:
|
||||||
|
@ -38,10 +37,7 @@ jobs:
|
||||||
|
|
||||||
- name: Install Transformers
|
- name: Install Transformers
|
||||||
run: |
|
run: |
|
||||||
cd applications/Chat
|
pip install transformers==4.30.2
|
||||||
git clone https://github.com/hpcaitech/transformers
|
|
||||||
cd transformers
|
|
||||||
pip install -v .
|
|
||||||
|
|
||||||
- name: Execute Examples
|
- name: Execute Examples
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -98,12 +98,9 @@ pip install .
|
||||||
```
|
```
|
||||||
|
|
||||||
### Install the Transformers
|
### Install the Transformers
|
||||||
Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
git clone https://github.com/hpcaitech/transformers
|
pip install transformers==4.30.2
|
||||||
cd transformers
|
|
||||||
pip install .
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## How to use?
|
## How to use?
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
from .deberta_critic import DebertaCritic
|
|
||||||
from .deberta_rm import DebertaRM
|
|
||||||
|
|
||||||
__all__ = ['DebertaCritic', 'DebertaRM']
|
|
|
@ -1,36 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch.nn as nn
|
|
||||||
from transformers import DebertaV2Config, DebertaV2Model
|
|
||||||
|
|
||||||
from ..base import Critic
|
|
||||||
|
|
||||||
|
|
||||||
class DebertaCritic(Critic):
|
|
||||||
"""
|
|
||||||
Deberta Critic model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pretrained (str): Pretrained model name or path.
|
|
||||||
config (DebertaV2Config): Model config.
|
|
||||||
checkpoint (bool): Enable gradient checkpointing.
|
|
||||||
lora_rank (int): Rank of the LO-RA decomposition.
|
|
||||||
lora_train_bias (str): LoRA bias training mode.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
pretrained: Optional[str] = None,
|
|
||||||
config: Optional[DebertaV2Config] = None,
|
|
||||||
checkpoint: bool = False,
|
|
||||||
lora_rank: int = 0,
|
|
||||||
lora_train_bias: str = 'none') -> None:
|
|
||||||
if pretrained is not None:
|
|
||||||
model = DebertaV2Model.from_pretrained(pretrained)
|
|
||||||
elif config is not None:
|
|
||||||
model = DebertaV2Model(config)
|
|
||||||
else:
|
|
||||||
model = DebertaV2Model(DebertaV2Config())
|
|
||||||
if checkpoint:
|
|
||||||
model.gradient_checkpointing_enable()
|
|
||||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
|
||||||
super().__init__(model, value_head, lora_rank, lora_train_bias)
|
|
|
@ -1,37 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch.nn as nn
|
|
||||||
from transformers import DebertaV2Config, DebertaV2Model
|
|
||||||
|
|
||||||
from ..base import RewardModel
|
|
||||||
|
|
||||||
|
|
||||||
class DebertaRM(RewardModel):
|
|
||||||
"""
|
|
||||||
Deberta Reward model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pretrained (str): Pretrained model name or path.
|
|
||||||
config (DebertaV2Config): Model config.
|
|
||||||
checkpoint (bool): Enable gradient checkpointing.
|
|
||||||
lora_rank (int): Rank of the LO-RA decomposition.
|
|
||||||
lora_train_bias (str): LoRA bias training mode.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
pretrained: str = None,
|
|
||||||
config: Optional[DebertaV2Config] = None,
|
|
||||||
checkpoint: bool = False,
|
|
||||||
lora_rank: int = 0,
|
|
||||||
lora_train_bias: str = 'none') -> None:
|
|
||||||
if pretrained is not None:
|
|
||||||
model = DebertaV2Model.from_pretrained(pretrained)
|
|
||||||
elif config is not None:
|
|
||||||
model = DebertaV2Model(config)
|
|
||||||
else:
|
|
||||||
model = DebertaV2Model(DebertaV2Config())
|
|
||||||
if checkpoint:
|
|
||||||
model.gradient_checkpointing_enable()
|
|
||||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
|
||||||
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1))
|
|
||||||
super().__init__(model, value_head, lora_rank, lora_train_bias)
|
|
|
@ -1,5 +0,0 @@
|
||||||
from .roberta_actor import RoBERTaActor
|
|
||||||
from .roberta_critic import RoBERTaCritic
|
|
||||||
from .roberta_rm import RoBERTaRM
|
|
||||||
|
|
||||||
__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM']
|
|
|
@ -1,35 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from transformers.models.roberta.configuration_roberta import RobertaConfig
|
|
||||||
from transformers.models.roberta.modeling_roberta import RobertaForCausalLM
|
|
||||||
|
|
||||||
from ..base import Actor
|
|
||||||
|
|
||||||
class RoBERTaActor(Actor):
|
|
||||||
"""
|
|
||||||
RoBERTa Actor model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pretrained (str): Pretrained model name or path.
|
|
||||||
config (RoBERTaConfig): Model config.
|
|
||||||
checkpoint (bool): Enable gradient checkpointing.
|
|
||||||
lora_rank (int): Rank of the low-rank approximation.
|
|
||||||
lora_train_bias (str): LoRA bias training mode.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
pretrained: Optional[str] = None,
|
|
||||||
config: Optional[RobertaConfig] = None,
|
|
||||||
checkpoint: bool = False,
|
|
||||||
lora_rank: int = 0,
|
|
||||||
lora_train_bias: str = 'none') -> None:
|
|
||||||
if pretrained is not None:
|
|
||||||
model = RobertaForCausalLM.from_pretrained(pretrained)
|
|
||||||
elif config is not None:
|
|
||||||
model = RobertaForCausalLM(config)
|
|
||||||
else:
|
|
||||||
model = RobertaForCausalLM(RobertaConfig())
|
|
||||||
if checkpoint:
|
|
||||||
model.gradient_checkpointing_enable()
|
|
||||||
super().__init__(model, lora_rank, lora_train_bias)
|
|
|
@ -1,38 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch.nn as nn
|
|
||||||
from transformers.models.roberta.configuration_roberta import RobertaConfig
|
|
||||||
from transformers.models.roberta.modeling_roberta import RobertaModel
|
|
||||||
|
|
||||||
from ..base import Critic
|
|
||||||
|
|
||||||
|
|
||||||
class RoBERTaCritic(Critic):
|
|
||||||
"""
|
|
||||||
RoBERTa Critic model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pretrained (str): Pretrained model name or path.
|
|
||||||
config (RoBERTa Config): Model config.
|
|
||||||
checkpoint (bool): Enable gradient checkpointing.
|
|
||||||
lora_rank (int): Rank of the low-rank approximation.
|
|
||||||
lora_train_bias (str): LoRA bias training mode.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
pretrained: Optional[str] = None,
|
|
||||||
config: Optional[RobertaConfig] = None,
|
|
||||||
checkpoint: bool = False,
|
|
||||||
lora_rank: int = 0,
|
|
||||||
lora_train_bias: str = 'none',
|
|
||||||
**kwargs) -> None:
|
|
||||||
if pretrained is not None:
|
|
||||||
model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False)
|
|
||||||
elif config is not None:
|
|
||||||
model = RobertaModel(config)
|
|
||||||
else:
|
|
||||||
model = RobertaModel(RobertaConfig())
|
|
||||||
if checkpoint:
|
|
||||||
model.gradient_checkpointing_enable()
|
|
||||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
|
||||||
super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs)
|
|
|
@ -1,39 +0,0 @@
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch.nn as nn
|
|
||||||
from transformers import RobertaConfig, RobertaModel
|
|
||||||
|
|
||||||
|
|
||||||
from ..base import RewardModel
|
|
||||||
|
|
||||||
|
|
||||||
class RoBERTaRM(RewardModel):
|
|
||||||
"""
|
|
||||||
RoBERTa Reward model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
pretrained (str): Pretrained model name or path.
|
|
||||||
config (RoBERTaConfig): Model config.
|
|
||||||
checkpoint (bool): Enable gradient checkpointing.
|
|
||||||
lora_rank (int): Rank of the low-rank approximation.
|
|
||||||
lora_train_bias (str): LoRA bias training mode.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
pretrained: Optional[str] = None,
|
|
||||||
config: Optional[RobertaConfig] = None,
|
|
||||||
checkpoint: bool = False,
|
|
||||||
lora_rank: int = 0,
|
|
||||||
lora_train_bias: str = 'none') -> None:
|
|
||||||
if pretrained is not None:
|
|
||||||
model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False)
|
|
||||||
elif config is not None:
|
|
||||||
model = RobertaModel(config)
|
|
||||||
else:
|
|
||||||
model = RobertaModel(RobertaConfig())
|
|
||||||
if checkpoint:
|
|
||||||
model.gradient_checkpointing_enable()
|
|
||||||
|
|
||||||
value_head = nn.Linear(model.config.hidden_size, 1)
|
|
||||||
value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1))
|
|
||||||
super().__init__(model, value_head, lora_rank, lora_train_bias)
|
|
|
@ -9,10 +9,8 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
|
||||||
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
||||||
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
||||||
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
||||||
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
|
|
||||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer
|
||||||
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def is_rank_0() -> bool:
|
def is_rank_0() -> bool:
|
||||||
|
@ -36,8 +34,6 @@ def get_actor_from_args(model: str, pretrained: str = None, config=None, lora_ra
|
||||||
actor = OPTActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
actor = OPTActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
||||||
elif model == 'llama':
|
elif model == 'llama':
|
||||||
actor = LlamaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
actor = LlamaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
||||||
elif model == 'roberta':
|
|
||||||
actor = RoBERTaActor(pretrained=pretrained, config=config, lora_rank=lora_rank)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported actor model "{model}"')
|
raise ValueError(f'Unsupported actor model "{model}"')
|
||||||
return actor
|
return actor
|
||||||
|
@ -52,8 +48,6 @@ def get_critic_from_args(model: str, pretrained: str = None, config=None, lora_r
|
||||||
critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
||||||
elif model == 'llama':
|
elif model == 'llama':
|
||||||
critic = LlamaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
critic = LlamaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
||||||
elif model == 'roberta':
|
|
||||||
critic = RoBERTaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported reward model "{model}"')
|
raise ValueError(f'Unsupported reward model "{model}"')
|
||||||
return critic
|
return critic
|
||||||
|
@ -68,8 +62,6 @@ def get_reward_model_from_args(model: str, pretrained: str = None, config=None):
|
||||||
reward_model = OPTRM(pretrained=pretrained, config=config)
|
reward_model = OPTRM(pretrained=pretrained, config=config)
|
||||||
elif model == 'llama':
|
elif model == 'llama':
|
||||||
reward_model = LlamaRM(pretrained=pretrained, config=config)
|
reward_model = LlamaRM(pretrained=pretrained, config=config)
|
||||||
elif model == 'roberta':
|
|
||||||
reward_model = RoBERTaRM(pretrained=pretrained, config=config)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported reward model "{model}"')
|
raise ValueError(f'Unsupported reward model "{model}"')
|
||||||
return reward_model
|
return reward_model
|
||||||
|
@ -101,8 +93,6 @@ def get_tokenizer_from_args(model: str, **kwargs):
|
||||||
elif model == 'llama':
|
elif model == 'llama':
|
||||||
pretrain_path = kwargs["pretrain"]
|
pretrain_path = kwargs["pretrain"]
|
||||||
tokenizer = AutoTokenizer.from_pretrained(pretrain_path)
|
tokenizer = AutoTokenizer.from_pretrained(pretrain_path)
|
||||||
elif model == 'roberta':
|
|
||||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{model}"')
|
raise ValueError(f'Unsupported model "{model}"')
|
||||||
|
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
from .tokenizer_utils import prepare_llama_tokenizer_and_embedding, smart_tokenizer_and_embedding_resize
|
|
||||||
|
|
||||||
__all__ = ['smart_tokenizer_and_embedding_resize', 'prepare_llama_tokenizer_and_embedding']
|
|
|
@ -1,73 +0,0 @@
|
||||||
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
import transformers
|
|
||||||
|
|
||||||
DEFAULT_PAD_TOKEN = "[PAD]"
|
|
||||||
DEFAULT_EOS_TOKEN = "</s>"
|
|
||||||
DEFAULT_BOS_TOKEN = "</s>"
|
|
||||||
DEFAULT_UNK_TOKEN = "</s>"
|
|
||||||
|
|
||||||
|
|
||||||
def prepare_llama_tokenizer_and_embedding(
|
|
||||||
tokenizer: transformers.PreTrainedTokenizer,
|
|
||||||
model: transformers.PreTrainedModel,
|
|
||||||
special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN),
|
|
||||||
):
|
|
||||||
"""prepare llama tokenizer and embedding.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if tokenizer.pad_token is None:
|
|
||||||
smart_tokenizer_and_embedding_resize(
|
|
||||||
special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
|
|
||||||
tokenizer=tokenizer,
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
|
|
||||||
tokenizer.add_special_tokens({
|
|
||||||
"eos_token": DEFAULT_EOS_TOKEN,
|
|
||||||
"bos_token": DEFAULT_BOS_TOKEN,
|
|
||||||
"unk_token": DEFAULT_UNK_TOKEN,
|
|
||||||
})
|
|
||||||
|
|
||||||
return tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def smart_tokenizer_and_embedding_resize(
|
|
||||||
tokenizer: transformers.PreTrainedTokenizer,
|
|
||||||
model: transformers.PreTrainedModel,
|
|
||||||
special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN),
|
|
||||||
):
|
|
||||||
"""Resize tokenizer and embedding.
|
|
||||||
|
|
||||||
Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if tokenizer.pad_token is None:
|
|
||||||
num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
|
|
||||||
|
|
||||||
model.resize_token_embeddings(len(tokenizer))
|
|
||||||
|
|
||||||
if num_new_tokens > 0:
|
|
||||||
input_embeddings = model.get_input_embeddings().weight.data
|
|
||||||
output_embeddings = model.get_output_embeddings().weight.data
|
|
||||||
|
|
||||||
input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
|
|
||||||
output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True)
|
|
||||||
|
|
||||||
input_embeddings[-num_new_tokens:] = input_embeddings_avg
|
|
||||||
output_embeddings[-num_new_tokens:] = output_embeddings_avg
|
|
|
@ -10,7 +10,6 @@ from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
||||||
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
||||||
from coati.trainer import PPOTrainer
|
from coati.trainer import PPOTrainer
|
||||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
|
||||||
from easy_dataset import EasyPromptsDataset, EasySupervisedDataset
|
from easy_dataset import EasyPromptsDataset, EasySupervisedDataset
|
||||||
from easy_models import BLOOMActor
|
from easy_models import BLOOMActor
|
||||||
from peft import PeftModel
|
from peft import PeftModel
|
||||||
|
@ -112,21 +111,20 @@ def main(args):
|
||||||
# configure tokenizer
|
# configure tokenizer
|
||||||
if args.model == 'gpt2':
|
if args.model == 'gpt2':
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained(args.rm_pretrain)
|
tokenizer = GPT2Tokenizer.from_pretrained(args.rm_pretrain)
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'bloom':
|
elif args.model == 'bloom':
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(args.rm_pretrain)
|
tokenizer = BloomTokenizerFast.from_pretrained(args.rm_pretrain)
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
tokenizer = AutoTokenizer.from_pretrained(args.rm_pretrain)
|
tokenizer = AutoTokenizer.from_pretrained(args.rm_pretrain)
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
||||||
tokenizer.eos_token = '<\s>'
|
tokenizer.eos_token = '<\s>'
|
||||||
|
tokenizer.pad_token = tokenizer.unk_token
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
|
|
||||||
if args.model == 'llama':
|
|
||||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor)
|
|
||||||
else:
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
|
|
||||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||||
|
|
||||||
prompt_dataset = EasyPromptsDataset(args.prompt_path, tokenizer)
|
prompt_dataset = EasyPromptsDataset(args.prompt_path, tokenizer)
|
||||||
|
|
|
@ -12,7 +12,6 @@ from coati.models.llama import LlamaLM
|
||||||
from coati.models.opt import OPTLM
|
from coati.models.opt import OPTLM
|
||||||
from coati.trainer import SFTTrainer
|
from coati.trainer import SFTTrainer
|
||||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from easy_dataset import EasyDataset
|
from easy_dataset import EasyDataset
|
||||||
from peft import LoraConfig, PeftModel, TaskType, get_peft_model
|
from peft import LoraConfig, PeftModel, TaskType, get_peft_model
|
||||||
|
@ -65,10 +64,11 @@ def train(args):
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'bloom':
|
elif args.model == 'bloom':
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain)
|
tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m")
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
args.pretrain,
|
args.pretrain,
|
||||||
|
@ -76,23 +76,19 @@ def train(args):
|
||||||
use_fast=False,
|
use_fast=False,
|
||||||
)
|
)
|
||||||
tokenizer.eos_token = '<\s>'
|
tokenizer.eos_token = '<\s>'
|
||||||
|
tokenizer.pad_token = tokenizer.unk_token
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
if args.model == 'llama':
|
|
||||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
|
|
||||||
|
|
||||||
if args.strategy == 'colossalai_gemini':
|
if args.model == 'llama' and args.strategy == 'colossalai_gemini':
|
||||||
# this is a hack to deal with the resized embedding
|
# this is a hack to deal with the resized embedding
|
||||||
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
|
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
|
||||||
for name, param in model.named_parameters():
|
for name, param in model.named_parameters():
|
||||||
if not isinstance(param, ColoParameter):
|
if not isinstance(param, ColoParameter):
|
||||||
sub_module_name = '.'.join(name.split('.')[:-1])
|
sub_module_name = '.'.join(name.split('.')[:-1])
|
||||||
weight_name = name.split('.')[-1]
|
weight_name = name.split('.')[-1]
|
||||||
sub_module = model.get_submodule(sub_module_name)
|
sub_module = model.get_submodule(sub_module_name)
|
||||||
setattr(sub_module, weight_name, ColoParameter(param))
|
setattr(sub_module, weight_name, ColoParameter(param))
|
||||||
else:
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
|
|
||||||
# configure optimizer
|
# configure optimizer
|
||||||
if args.strategy.startswith('colossalai'):
|
if args.strategy.startswith('colossalai'):
|
||||||
|
|
|
@ -2,10 +2,10 @@ import argparse
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from coati.models.bloom import BLOOMActor
|
from coati.models.bloom import BLOOMActor
|
||||||
|
from coati.models.generation import generate
|
||||||
from coati.models.gpt import GPTActor
|
from coati.models.gpt import GPTActor
|
||||||
from coati.models.opt import OPTActor
|
from coati.models.opt import OPTActor
|
||||||
from coati.models.roberta import RoBERTaActor
|
from transformers import AutoTokenizer
|
||||||
from transformers import AutoTokenizer, RobertaTokenizer
|
|
||||||
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
|
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,13 +17,11 @@ def eval(args):
|
||||||
actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
||||||
elif args.model == 'roberta':
|
|
||||||
actor = RoBERTaActor(pretrained=args.pretrain).to(torch.cuda.current_device())
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
|
|
||||||
state_dict = torch.load(args.model_path)
|
state_dict = torch.load(args.model_path)
|
||||||
actor.model.load_state_dict(state_dict)
|
actor.load_state_dict(state_dict)
|
||||||
|
|
||||||
# configure tokenizer
|
# configure tokenizer
|
||||||
if args.model == 'gpt2':
|
if args.model == 'gpt2':
|
||||||
|
@ -34,27 +32,26 @@ def eval(args):
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
|
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m')
|
||||||
elif args.model == 'roberta':
|
|
||||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
|
|
||||||
actor.eval()
|
actor.eval()
|
||||||
input = args.input
|
input = args.input
|
||||||
input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device())
|
input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device())
|
||||||
outputs = actor.generate(input_ids,
|
outputs = generate(actor,
|
||||||
max_length=args.max_length,
|
input_ids,
|
||||||
do_sample=True,
|
max_length=args.max_length,
|
||||||
top_k=50,
|
do_sample=True,
|
||||||
top_p=0.95,
|
top_k=50,
|
||||||
num_return_sequences=1)
|
top_p=0.95,
|
||||||
|
num_return_sequences=1)
|
||||||
output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)
|
output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)
|
||||||
print(output)
|
print(output)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'roberta'])
|
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt'])
|
||||||
# We suggest to use the pretrained model from HuggingFace, use pretrain to configure model
|
# We suggest to use the pretrained model from HuggingFace, use pretrain to configure model
|
||||||
parser.add_argument('--pretrain', type=str, default=None)
|
parser.add_argument('--pretrain', type=str, default=None)
|
||||||
parser.add_argument('--model_path', type=str, default=None)
|
parser.add_argument('--model_path', type=str, default=None)
|
||||||
|
|
|
@ -43,18 +43,18 @@ pip install -r ${BASE}/requirements.txt
|
||||||
|
|
||||||
wandb init -m offline
|
wandb init -m offline
|
||||||
|
|
||||||
# FIXME: This is a hack to skip tests that are not working (tested at commit b3ab7fbabf)
|
# FIXME: This is a hack to skip tests that are not working
|
||||||
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
|
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
|
||||||
# - llama-*: Repository Not Found for url: https://huggingface.co/{...}/resolve/main/tokenizer.model.
|
# - llama-*: These tests can be passed locally, skipped for long execution time
|
||||||
# - roberta-*: RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
|
|
||||||
SKIPPED_TESTS=(
|
SKIPPED_TESTS=(
|
||||||
"gpt2-ddp"
|
"gpt2-ddp"
|
||||||
"llama-ddp" "llama-colossalai_gemini" "llama-colossalai_zero2"
|
"llama-ddp"
|
||||||
"roberta-ddp" "roberta-colossalai_gemini" "roberta-colossalai_zero2"
|
"llama-colossalai_gemini"
|
||||||
|
"llama-colossalai_zero2"
|
||||||
)
|
)
|
||||||
|
|
||||||
# These tests are quick and do not have any dependencies
|
# These tests are quick and do not have any dependencies
|
||||||
for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do
|
for model in 'gpt2' 'bloom' 'opt' 'llama'; do
|
||||||
for strategy in 'ddp' 'colossalai_gemini' 'colossalai_zero2'; do
|
for strategy in 'ddp' 'colossalai_gemini' 'colossalai_zero2'; do
|
||||||
if [[ " ${SKIPPED_TESTS[*]} " =~ " ${model}-${strategy} " ]]; then
|
if [[ " ${SKIPPED_TESTS[*]} " =~ " ${model}-${strategy} " ]]; then
|
||||||
echo "[Test]: Skipped $model-$strategy"
|
echo "[Test]: Skipped $model-$strategy"
|
||||||
|
@ -64,7 +64,7 @@ for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do
|
||||||
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||||
--strategy $strategy --model $model \
|
--strategy $strategy --model $model \
|
||||||
--num_episodes 1 --num_collect_steps 2 --num_update_steps 1 \
|
--num_episodes 1 --num_collect_steps 2 --num_update_steps 1 \
|
||||||
--train_batch_size 2
|
--train_batch_size 2 --lora_rank 4
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
|
@ -124,22 +124,6 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
||||||
--save_path ${BASE}/rm_ckpt.pt
|
--save_path ${BASE}/rm_ckpt.pt
|
||||||
rm -rf ${BASE}/rm_ckpt.pt
|
rm -rf ${BASE}/rm_ckpt.pt
|
||||||
|
|
||||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
|
||||||
--pretrain 'microsoft/deberta-v3-large' --model 'deberta' \
|
|
||||||
--strategy colossalai_zero2 --loss_fn 'log_sig' \
|
|
||||||
--dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
|
|
||||||
--test True --lora_rank 4 \
|
|
||||||
--save_path ${BASE}/rm_ckpt.pt
|
|
||||||
rm -rf ${BASE}/rm_ckpt.pt
|
|
||||||
|
|
||||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
|
||||||
--pretrain 'roberta-base' --model 'roberta' \
|
|
||||||
--strategy colossalai_zero2 --loss_fn 'log_exp' \
|
|
||||||
--dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \
|
|
||||||
--test True --lora_rank 4 \
|
|
||||||
--save_path ${BASE}/rm_ckpt.pt
|
|
||||||
rm -rf ${BASE}/rm_ckpt.pt
|
|
||||||
|
|
||||||
# train rl
|
# train rl
|
||||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py \
|
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py \
|
||||||
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||||
|
|
|
@ -7,14 +7,12 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic
|
||||||
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
from coati.models.gpt import GPTRM, GPTActor, GPTCritic
|
||||||
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM
|
||||||
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
from coati.models.opt import OPTRM, OPTActor, OPTCritic
|
||||||
from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM
|
|
||||||
from coati.trainer import PPOTrainer
|
from coati.trainer import PPOTrainer
|
||||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
|
||||||
from torch.optim import Adam
|
from torch.optim import Adam
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer
|
from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer
|
||||||
|
|
||||||
from colossalai.nn.optimizer import HybridAdam
|
from colossalai.nn.optimizer import HybridAdam
|
||||||
|
|
||||||
|
@ -43,8 +41,6 @@ def main(args):
|
||||||
initial_model = OPTActor(pretrained=args.pretrain)
|
initial_model = OPTActor(pretrained=args.pretrain)
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
initial_model = LlamaActor(pretrained=args.pretrain)
|
initial_model = LlamaActor(pretrained=args.pretrain)
|
||||||
elif args.model == 'roberta':
|
|
||||||
initial_model = RoBERTaActor(pretrained=args.pretrain)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported actor model "{args.model}"')
|
raise ValueError(f'Unsupported actor model "{args.model}"')
|
||||||
|
|
||||||
|
@ -61,8 +57,6 @@ def main(args):
|
||||||
reward_model = OPTRM(pretrained=args.rm_pretrain)
|
reward_model = OPTRM(pretrained=args.rm_pretrain)
|
||||||
elif rm_model_name == 'llama':
|
elif rm_model_name == 'llama':
|
||||||
reward_model = LlamaRM(pretrained=args.rm_pretrain)
|
reward_model = LlamaRM(pretrained=args.rm_pretrain)
|
||||||
elif rm_model_name == 'roberta':
|
|
||||||
reward_model = RoBERTaRM(pretrained=args.rm_pretrain)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported reward model "{rm_model_name}"')
|
raise ValueError(f'Unsupported reward model "{rm_model_name}"')
|
||||||
|
|
||||||
|
@ -80,8 +74,6 @@ def main(args):
|
||||||
actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
actor = LlamaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
actor = LlamaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
||||||
elif args.model == 'roberta':
|
|
||||||
actor = RoBERTaActor(pretrained=args.pretrain, lora_rank=args.lora_rank)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported actor model "{args.model}"')
|
raise ValueError(f'Unsupported actor model "{args.model}"')
|
||||||
|
|
||||||
|
@ -93,8 +85,6 @@ def main(args):
|
||||||
critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
||||||
elif rm_model_name == 'llama':
|
elif rm_model_name == 'llama':
|
||||||
critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
||||||
elif rm_model_name == 'roberta':
|
|
||||||
critic = RoBERTaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported reward model "{rm_model_name}"')
|
raise ValueError(f'Unsupported reward model "{rm_model_name}"')
|
||||||
|
|
||||||
|
@ -117,23 +107,20 @@ def main(args):
|
||||||
# configure tokenizer
|
# configure tokenizer
|
||||||
if args.model == 'gpt2':
|
if args.model == 'gpt2':
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'bloom':
|
elif args.model == 'bloom':
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
|
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
||||||
tokenizer.eos_token = '<\s>'
|
tokenizer.eos_token = '<\s>'
|
||||||
elif args.model == 'roberta':
|
tokenizer.pad_token = tokenizer.unk_token
|
||||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
|
|
||||||
if args.model == 'llama':
|
|
||||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor)
|
|
||||||
else:
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
|
|
||||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||||
|
|
||||||
prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384)
|
prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384)
|
||||||
|
@ -209,9 +196,9 @@ if __name__ == '__main__':
|
||||||
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
||||||
default='colossalai_zero2',
|
default='colossalai_zero2',
|
||||||
help='strategy to use')
|
help='strategy to use')
|
||||||
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta'])
|
parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama'])
|
||||||
parser.add_argument('--pretrain', type=str, default=None)
|
parser.add_argument('--pretrain', type=str, default=None)
|
||||||
parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta'])
|
parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama'])
|
||||||
parser.add_argument('--rm_path', type=str, default=None)
|
parser.add_argument('--rm_path', type=str, default=None)
|
||||||
parser.add_argument('--rm_pretrain', type=str, default=None)
|
parser.add_argument('--rm_pretrain', type=str, default=None)
|
||||||
parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts')
|
parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts')
|
||||||
|
|
|
@ -1,27 +1,22 @@
|
||||||
import argparse
|
import argparse
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
import loralib as lora
|
|
||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
from coati.dataset import HhRlhfDataset, RmStaticDataset
|
from coati.dataset import HhRlhfDataset, RmStaticDataset
|
||||||
from coati.models import LogExpLoss, LogSigLoss
|
from coati.models import LogExpLoss, LogSigLoss
|
||||||
from coati.models.base import RewardModel
|
|
||||||
from coati.models.bloom import BLOOMRM
|
from coati.models.bloom import BLOOMRM
|
||||||
from coati.models.deberta import DebertaRM
|
|
||||||
from coati.models.gpt import GPTRM
|
from coati.models.gpt import GPTRM
|
||||||
from coati.models.llama import LlamaRM
|
from coati.models.llama import LlamaRM
|
||||||
from coati.models.opt import OPTRM
|
from coati.models.opt import OPTRM
|
||||||
from coati.models.roberta import RoBERTaRM
|
|
||||||
from coati.trainer import RewardModelTrainer
|
from coati.trainer import RewardModelTrainer
|
||||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from torch.optim import Adam
|
from torch.optim import Adam
|
||||||
from torch.optim.lr_scheduler import CosineAnnealingLR
|
from torch.optim.lr_scheduler import CosineAnnealingLR
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
from transformers import AutoTokenizer, BloomTokenizerFast, DebertaV2Tokenizer, LlamaTokenizer, RobertaTokenizer
|
from transformers import AutoTokenizer, BloomTokenizerFast, LlamaTokenizer
|
||||||
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
|
from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer
|
||||||
|
|
||||||
from colossalai.nn.optimizer import HybridAdam
|
from colossalai.nn.optimizer import HybridAdam
|
||||||
|
@ -46,12 +41,8 @@ def train(args):
|
||||||
model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||||
elif args.model == 'gpt2':
|
elif args.model == 'gpt2':
|
||||||
model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||||
elif args.model == 'deberta':
|
|
||||||
model = DebertaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
model = LlamaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
model = LlamaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
||||||
elif args.model == 'roberta':
|
|
||||||
model = RoBERTaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device())
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
|
|
||||||
|
@ -64,24 +55,18 @@ def train(args):
|
||||||
# configure tokenizer
|
# configure tokenizer
|
||||||
if args.model == 'gpt2':
|
if args.model == 'gpt2':
|
||||||
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'bloom':
|
elif args.model == 'bloom':
|
||||||
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
|
tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m')
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||||
elif args.model == 'deberta':
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large')
|
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
tokenizer = LlamaTokenizer.from_pretrained(args.pretrain)
|
||||||
elif args.model == 'roberta':
|
tokenizer.pad_token = tokenizer.unk_token
|
||||||
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
max_len = args.max_len
|
|
||||||
|
|
||||||
if args.model == 'llama':
|
|
||||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
|
|
||||||
else:
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
|
|
||||||
# configure optimizer
|
# configure optimizer
|
||||||
if args.strategy.startswith('colossalai'):
|
if args.strategy.startswith('colossalai'):
|
||||||
|
@ -112,13 +97,13 @@ def train(args):
|
||||||
valid_data = data['test'].select((randint(0, len(eval_data) - 1) for _ in range(len(eval_data) // 5)))
|
valid_data = data['test'].select((randint(0, len(eval_data) - 1) for _ in range(len(eval_data) // 5)))
|
||||||
|
|
||||||
if args.dataset == 'Dahoas/rm-static':
|
if args.dataset == 'Dahoas/rm-static':
|
||||||
train_dataset = RmStaticDataset(train_data, tokenizer, max_len)
|
train_dataset = RmStaticDataset(train_data, tokenizer, args.max_len)
|
||||||
valid_dataset = RmStaticDataset(valid_data, tokenizer, max_len)
|
valid_dataset = RmStaticDataset(valid_data, tokenizer, args.max_len)
|
||||||
eval_dataset = RmStaticDataset(eval_data, tokenizer, max_len)
|
eval_dataset = RmStaticDataset(eval_data, tokenizer, args.max_len)
|
||||||
elif args.dataset == 'Anthropic/hh-rlhf':
|
elif args.dataset == 'Anthropic/hh-rlhf':
|
||||||
train_dataset = HhRlhfDataset(train_data, tokenizer, max_len)
|
train_dataset = HhRlhfDataset(train_data, tokenizer, args.max_len)
|
||||||
valid_dataset = HhRlhfDataset(valid_data, tokenizer, max_len)
|
valid_dataset = HhRlhfDataset(valid_data, tokenizer, args.max_len)
|
||||||
eval_dataset = HhRlhfDataset(eval_data, tokenizer, max_len)
|
eval_dataset = HhRlhfDataset(eval_data, tokenizer, args.max_len)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported dataset "{args.dataset}"')
|
raise ValueError(f'Unsupported dataset "{args.dataset}"')
|
||||||
|
|
||||||
|
@ -195,7 +180,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--strategy',
|
parser.add_argument('--strategy',
|
||||||
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
||||||
default='colossalai_zero2')
|
default='colossalai_zero2')
|
||||||
parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom')
|
parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom')
|
||||||
parser.add_argument('--pretrain', type=str, default=None)
|
parser.add_argument('--pretrain', type=str, default=None)
|
||||||
parser.add_argument('--model_path', type=str, default=None)
|
parser.add_argument('--model_path', type=str, default=None)
|
||||||
parser.add_argument('--need_optim_ckpt', type=bool, default=False)
|
parser.add_argument('--need_optim_ckpt', type=bool, default=False)
|
||||||
|
|
|
@ -9,7 +9,6 @@ from coati.dataset import DataCollatorForSupervisedDataset, SFTDataset, Supervis
|
||||||
from coati.models import convert_to_lora_module
|
from coati.models import convert_to_lora_module
|
||||||
from coati.trainer import SFTTrainer
|
from coati.trainer import SFTTrainer
|
||||||
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy
|
||||||
from coati.utils import prepare_llama_tokenizer_and_embedding
|
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
from torch.optim import Adam
|
from torch.optim import Adam
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
@ -68,6 +67,7 @@ def train(args):
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'opt':
|
elif args.model == 'opt':
|
||||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
elif args.model == 'llama':
|
elif args.model == 'llama':
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
args.pretrain,
|
args.pretrain,
|
||||||
|
@ -75,24 +75,19 @@ def train(args):
|
||||||
use_fast=False,
|
use_fast=False,
|
||||||
)
|
)
|
||||||
tokenizer.eos_token = '<\s>'
|
tokenizer.eos_token = '<\s>'
|
||||||
|
tokenizer.pad_token = tokenizer.unk_token
|
||||||
else:
|
else:
|
||||||
raise ValueError(f'Unsupported model "{args.model}"')
|
raise ValueError(f'Unsupported model "{args.model}"')
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
max_len = args.max_len
|
|
||||||
if args.model == 'llama':
|
|
||||||
tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model)
|
|
||||||
|
|
||||||
if args.strategy == 'colossalai_gemini':
|
if args.model == 'llama' and args.strategy == 'colossalai_gemini':
|
||||||
# this is a hack to deal with the resized embedding
|
# this is a hack to deal with the resized embedding
|
||||||
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
|
# to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility
|
||||||
for name, param in model.named_parameters():
|
for name, param in model.named_parameters():
|
||||||
if not isinstance(param, ColoParameter):
|
if not isinstance(param, ColoParameter):
|
||||||
sub_module_name = '.'.join(name.split('.')[:-1])
|
sub_module_name = '.'.join(name.split('.')[:-1])
|
||||||
weight_name = name.split('.')[-1]
|
weight_name = name.split('.')[-1]
|
||||||
sub_module = model.get_submodule(sub_module_name)
|
sub_module = model.get_submodule(sub_module_name)
|
||||||
setattr(sub_module, weight_name, ColoParameter(param))
|
setattr(sub_module, weight_name, ColoParameter(param))
|
||||||
else:
|
|
||||||
tokenizer.pad_token = tokenizer.eos_token
|
|
||||||
|
|
||||||
# configure optimizer
|
# configure optimizer
|
||||||
if args.strategy.startswith('colossalai'):
|
if args.strategy.startswith('colossalai'):
|
||||||
|
@ -107,14 +102,14 @@ def train(args):
|
||||||
train_data = load_dataset(args.dataset, 'super_natural_instructions', split='train')
|
train_data = load_dataset(args.dataset, 'super_natural_instructions', split='train')
|
||||||
eval_data = load_dataset(args.dataset, 'super_natural_instructions', split='test')
|
eval_data = load_dataset(args.dataset, 'super_natural_instructions', split='test')
|
||||||
|
|
||||||
train_dataset = SFTDataset(train_data, tokenizer, max_len)
|
train_dataset = SFTDataset(train_data, tokenizer, args.max_len)
|
||||||
eval_dataset = SFTDataset(eval_data, tokenizer, max_len)
|
eval_dataset = SFTDataset(eval_data, tokenizer, args.max_len)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
train_dataset = SupervisedDataset(tokenizer=tokenizer,
|
train_dataset = SupervisedDataset(tokenizer=tokenizer,
|
||||||
data_path=args.dataset,
|
data_path=args.dataset,
|
||||||
max_datasets_size=args.max_datasets_size,
|
max_datasets_size=args.max_datasets_size,
|
||||||
max_length=max_len)
|
max_length=args.max_len)
|
||||||
eval_dataset = None
|
eval_dataset = None
|
||||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue