diff --git a/.github/workflows/run_chatgpt_examples.yml b/.github/workflows/run_chatgpt_examples.yml index 129bf7ed3..510f6b6f0 100644 --- a/.github/workflows/run_chatgpt_examples.yml +++ b/.github/workflows/run_chatgpt_examples.yml @@ -4,11 +4,10 @@ on: pull_request: types: [synchronize, opened, reopened] paths: - - 'applications/Chat/coati/**' - - 'applications/Chat/requirements.txt' - - 'applications/Chat/setup.py' - - 'applications/Chat/examples/**' - + - "applications/Chat/coati/**" + - "applications/Chat/requirements.txt" + - "applications/Chat/setup.py" + - "applications/Chat/examples/**" jobs: tests: @@ -38,10 +37,7 @@ jobs: - name: Install Transformers run: | - cd applications/Chat - git clone https://github.com/hpcaitech/transformers - cd transformers - pip install -v . + pip install transformers==4.30.2 - name: Execute Examples run: | diff --git a/applications/Chat/README.md b/applications/Chat/README.md index 016272ed8..162528cee 100644 --- a/applications/Chat/README.md +++ b/applications/Chat/README.md @@ -98,12 +98,9 @@ pip install . ``` ### Install the Transformers -Given Hugging Face hasn't officially supported the LLaMA models, We fork a branch of Transformers that can be compatible with our code ```shell -git clone https://github.com/hpcaitech/transformers -cd transformers -pip install . +pip install transformers==4.30.2 ``` ## How to use? diff --git a/applications/Chat/coati/models/deberta/__init__.py b/applications/Chat/coati/models/deberta/__init__.py deleted file mode 100644 index b66888f34..000000000 --- a/applications/Chat/coati/models/deberta/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .deberta_critic import DebertaCritic -from .deberta_rm import DebertaRM - -__all__ = ['DebertaCritic', 'DebertaRM'] diff --git a/applications/Chat/coati/models/deberta/deberta_critic.py b/applications/Chat/coati/models/deberta/deberta_critic.py deleted file mode 100644 index e84c1dbd8..000000000 --- a/applications/Chat/coati/models/deberta/deberta_critic.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import DebertaV2Config, DebertaV2Model - -from ..base import Critic - - -class DebertaCritic(Critic): - """ - Deberta Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (DebertaV2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LO-RA decomposition. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[DebertaV2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = DebertaV2Model.from_pretrained(pretrained) - elif config is not None: - model = DebertaV2Model(config) - else: - model = DebertaV2Model(DebertaV2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/applications/Chat/coati/models/deberta/deberta_rm.py b/applications/Chat/coati/models/deberta/deberta_rm.py deleted file mode 100644 index 2448c879e..000000000 --- a/applications/Chat/coati/models/deberta/deberta_rm.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import DebertaV2Config, DebertaV2Model - -from ..base import RewardModel - - -class DebertaRM(RewardModel): - """ - Deberta Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (DebertaV2Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the LO-RA decomposition. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: str = None, - config: Optional[DebertaV2Config] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = DebertaV2Model.from_pretrained(pretrained) - elif config is not None: - model = DebertaV2Model(config) - else: - model = DebertaV2Model(DebertaV2Config()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.hidden_size + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) diff --git a/applications/Chat/coati/models/roberta/__init__.py b/applications/Chat/coati/models/roberta/__init__.py deleted file mode 100644 index 0f4a8de06..000000000 --- a/applications/Chat/coati/models/roberta/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .roberta_actor import RoBERTaActor -from .roberta_critic import RoBERTaCritic -from .roberta_rm import RoBERTaRM - -__all__ = ['RoBERTaActor', 'RoBERTaCritic', 'RoBERTaRM'] \ No newline at end of file diff --git a/applications/Chat/coati/models/roberta/roberta_actor.py b/applications/Chat/coati/models/roberta/roberta_actor.py deleted file mode 100644 index e35fa6eb1..000000000 --- a/applications/Chat/coati/models/roberta/roberta_actor.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Optional - -from transformers.models.roberta.configuration_roberta import RobertaConfig -from transformers.models.roberta.modeling_roberta import RobertaForCausalLM - -from ..base import Actor - -class RoBERTaActor(Actor): - """ - RoBERTa Actor model. - - Args: - pretrained (str): Pretrained model name or path. - config (RoBERTaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[RobertaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = RobertaForCausalLM.from_pretrained(pretrained) - elif config is not None: - model = RobertaForCausalLM(config) - else: - model = RobertaForCausalLM(RobertaConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - super().__init__(model, lora_rank, lora_train_bias) diff --git a/applications/Chat/coati/models/roberta/roberta_critic.py b/applications/Chat/coati/models/roberta/roberta_critic.py deleted file mode 100644 index c8dc0d9e1..000000000 --- a/applications/Chat/coati/models/roberta/roberta_critic.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers.models.roberta.configuration_roberta import RobertaConfig -from transformers.models.roberta.modeling_roberta import RobertaModel - -from ..base import Critic - - -class RoBERTaCritic(Critic): - """ - RoBERTa Critic model. - - Args: - pretrained (str): Pretrained model name or path. - config (RoBERTa Config): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[RobertaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none', - **kwargs) -> None: - if pretrained is not None: - model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False) - elif config is not None: - model = RobertaModel(config) - else: - model = RobertaModel(RobertaConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - value_head = nn.Linear(model.config.hidden_size, 1) - super().__init__(model, value_head, lora_rank, lora_train_bias, **kwargs) diff --git a/applications/Chat/coati/models/roberta/roberta_rm.py b/applications/Chat/coati/models/roberta/roberta_rm.py deleted file mode 100644 index 770750529..000000000 --- a/applications/Chat/coati/models/roberta/roberta_rm.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import Optional - -import torch.nn as nn -from transformers import RobertaConfig, RobertaModel - - -from ..base import RewardModel - - -class RoBERTaRM(RewardModel): - """ - RoBERTa Reward model. - - Args: - pretrained (str): Pretrained model name or path. - config (RoBERTaConfig): Model config. - checkpoint (bool): Enable gradient checkpointing. - lora_rank (int): Rank of the low-rank approximation. - lora_train_bias (str): LoRA bias training mode. - """ - - def __init__(self, - pretrained: Optional[str] = None, - config: Optional[RobertaConfig] = None, - checkpoint: bool = False, - lora_rank: int = 0, - lora_train_bias: str = 'none') -> None: - if pretrained is not None: - model = RobertaModel.from_pretrained(pretrained, add_pooling_layer=False) - elif config is not None: - model = RobertaModel(config) - else: - model = RobertaModel(RobertaConfig()) - if checkpoint: - model.gradient_checkpointing_enable() - - value_head = nn.Linear(model.config.hidden_size, 1) - value_head.weight.data.normal_(mean=0.0, std=1/(model.config.hidden_size + 1)) - super().__init__(model, value_head, lora_rank, lora_train_bias) \ No newline at end of file diff --git a/applications/Chat/coati/ray/utils.py b/applications/Chat/coati/ray/utils.py index 4f8e0b8a8..761186b95 100644 --- a/applications/Chat/coati/ray/utils.py +++ b/applications/Chat/coati/ray/utils.py @@ -9,10 +9,8 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic from coati.models.gpt import GPTRM, GPTActor, GPTCritic from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM from coati.models.opt import OPTRM, OPTActor, OPTCritic -from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy -from coati.utils import prepare_llama_tokenizer_and_embedding -from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer +from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer def is_rank_0() -> bool: @@ -36,8 +34,6 @@ def get_actor_from_args(model: str, pretrained: str = None, config=None, lora_ra actor = OPTActor(pretrained=pretrained, config=config, lora_rank=lora_rank) elif model == 'llama': actor = LlamaActor(pretrained=pretrained, config=config, lora_rank=lora_rank) - elif model == 'roberta': - actor = RoBERTaActor(pretrained=pretrained, config=config, lora_rank=lora_rank) else: raise ValueError(f'Unsupported actor model "{model}"') return actor @@ -52,8 +48,6 @@ def get_critic_from_args(model: str, pretrained: str = None, config=None, lora_r critic = OPTCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True) elif model == 'llama': critic = LlamaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True) - elif model == 'roberta': - critic = RoBERTaCritic(pretrained=pretrained, lora_rank=lora_rank, config=config, use_action_mask=True) else: raise ValueError(f'Unsupported reward model "{model}"') return critic @@ -68,8 +62,6 @@ def get_reward_model_from_args(model: str, pretrained: str = None, config=None): reward_model = OPTRM(pretrained=pretrained, config=config) elif model == 'llama': reward_model = LlamaRM(pretrained=pretrained, config=config) - elif model == 'roberta': - reward_model = RoBERTaRM(pretrained=pretrained, config=config) else: raise ValueError(f'Unsupported reward model "{model}"') return reward_model @@ -101,8 +93,6 @@ def get_tokenizer_from_args(model: str, **kwargs): elif model == 'llama': pretrain_path = kwargs["pretrain"] tokenizer = AutoTokenizer.from_pretrained(pretrain_path) - elif model == 'roberta': - tokenizer = RobertaTokenizer.from_pretrained("roberta-base") else: raise ValueError(f'Unsupported model "{model}"') diff --git a/applications/Chat/coati/utils/__init__.py b/applications/Chat/coati/utils/__init__.py deleted file mode 100644 index 112b82b97..000000000 --- a/applications/Chat/coati/utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .tokenizer_utils import prepare_llama_tokenizer_and_embedding, smart_tokenizer_and_embedding_resize - -__all__ = ['smart_tokenizer_and_embedding_resize', 'prepare_llama_tokenizer_and_embedding'] \ No newline at end of file diff --git a/applications/Chat/coati/utils/tokenizer_utils.py b/applications/Chat/coati/utils/tokenizer_utils.py deleted file mode 100644 index e0d96cfc8..000000000 --- a/applications/Chat/coati/utils/tokenizer_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict - -import transformers - -DEFAULT_PAD_TOKEN = "[PAD]" -DEFAULT_EOS_TOKEN = "" -DEFAULT_BOS_TOKEN = "" -DEFAULT_UNK_TOKEN = "" - - -def prepare_llama_tokenizer_and_embedding( - tokenizer: transformers.PreTrainedTokenizer, - model: transformers.PreTrainedModel, - special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN), -): - """prepare llama tokenizer and embedding. - - """ - - if tokenizer.pad_token is None: - smart_tokenizer_and_embedding_resize( - special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN), - tokenizer=tokenizer, - model=model, - ) - - tokenizer.add_special_tokens({ - "eos_token": DEFAULT_EOS_TOKEN, - "bos_token": DEFAULT_BOS_TOKEN, - "unk_token": DEFAULT_UNK_TOKEN, - }) - - return tokenizer - - -def smart_tokenizer_and_embedding_resize( - tokenizer: transformers.PreTrainedTokenizer, - model: transformers.PreTrainedModel, - special_tokens_dict: Dict = dict(pad_token=DEFAULT_PAD_TOKEN), -): - """Resize tokenizer and embedding. - - Note: This is the unoptimized version that may make your embedding size not be divisible by 64. - """ - - if tokenizer.pad_token is None: - num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict) - - model.resize_token_embeddings(len(tokenizer)) - - if num_new_tokens > 0: - input_embeddings = model.get_input_embeddings().weight.data - output_embeddings = model.get_output_embeddings().weight.data - - input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True) - output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(dim=0, keepdim=True) - - input_embeddings[-num_new_tokens:] = input_embeddings_avg - output_embeddings[-num_new_tokens:] = output_embeddings_avg diff --git a/applications/Chat/examples/community/peft/train_peft_prompts.py b/applications/Chat/examples/community/peft/train_peft_prompts.py index 9d8dbb38a..9385e457d 100644 --- a/applications/Chat/examples/community/peft/train_peft_prompts.py +++ b/applications/Chat/examples/community/peft/train_peft_prompts.py @@ -10,7 +10,6 @@ from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM from coati.models.opt import OPTRM, OPTActor, OPTCritic from coati.trainer import PPOTrainer from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy -from coati.utils import prepare_llama_tokenizer_and_embedding from easy_dataset import EasyPromptsDataset, EasySupervisedDataset from easy_models import BLOOMActor from peft import PeftModel @@ -112,21 +111,20 @@ def main(args): # configure tokenizer if args.model == 'gpt2': tokenizer = GPT2Tokenizer.from_pretrained(args.rm_pretrain) + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'bloom': tokenizer = BloomTokenizerFast.from_pretrained(args.rm_pretrain) + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained(args.rm_pretrain) + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'llama': tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) tokenizer.eos_token = '<\s>' + tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') - if args.model == 'llama': - tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor) - else: - tokenizer.pad_token = tokenizer.eos_token - data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) prompt_dataset = EasyPromptsDataset(args.prompt_path, tokenizer) diff --git a/applications/Chat/examples/community/peft/train_peft_sft.py b/applications/Chat/examples/community/peft/train_peft_sft.py index 54fe0ad55..4af08e6d0 100644 --- a/applications/Chat/examples/community/peft/train_peft_sft.py +++ b/applications/Chat/examples/community/peft/train_peft_sft.py @@ -12,7 +12,6 @@ from coati.models.llama import LlamaLM from coati.models.opt import OPTLM from coati.trainer import SFTTrainer from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy -from coati.utils import prepare_llama_tokenizer_and_embedding from datasets import load_dataset from easy_dataset import EasyDataset from peft import LoraConfig, PeftModel, TaskType, get_peft_model @@ -65,10 +64,11 @@ def train(args): tokenizer = GPT2Tokenizer.from_pretrained('gpt2') tokenizer.pad_token = tokenizer.eos_token elif args.model == 'bloom': - tokenizer = BloomTokenizerFast.from_pretrained(args.pretrain) + tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m") tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'llama': tokenizer = AutoTokenizer.from_pretrained( args.pretrain, @@ -76,23 +76,19 @@ def train(args): use_fast=False, ) tokenizer.eos_token = '<\s>' + tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') - tokenizer.pad_token = tokenizer.eos_token - if args.model == 'llama': - tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model) - if args.strategy == 'colossalai_gemini': - # this is a hack to deal with the resized embedding - # to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility - for name, param in model.named_parameters(): - if not isinstance(param, ColoParameter): - sub_module_name = '.'.join(name.split('.')[:-1]) - weight_name = name.split('.')[-1] - sub_module = model.get_submodule(sub_module_name) - setattr(sub_module, weight_name, ColoParameter(param)) - else: - tokenizer.pad_token = tokenizer.eos_token + if args.model == 'llama' and args.strategy == 'colossalai_gemini': + # this is a hack to deal with the resized embedding + # to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility + for name, param in model.named_parameters(): + if not isinstance(param, ColoParameter): + sub_module_name = '.'.join(name.split('.')[:-1]) + weight_name = name.split('.')[-1] + sub_module = model.get_submodule(sub_module_name) + setattr(sub_module, weight_name, ColoParameter(param)) # configure optimizer if args.strategy.startswith('colossalai'): diff --git a/applications/Chat/examples/inference.py b/applications/Chat/examples/inference.py index ae59d91c1..4b49e7608 100644 --- a/applications/Chat/examples/inference.py +++ b/applications/Chat/examples/inference.py @@ -2,10 +2,10 @@ import argparse import torch from coati.models.bloom import BLOOMActor +from coati.models.generation import generate from coati.models.gpt import GPTActor from coati.models.opt import OPTActor -from coati.models.roberta import RoBERTaActor -from transformers import AutoTokenizer, RobertaTokenizer +from transformers import AutoTokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer @@ -17,13 +17,11 @@ def eval(args): actor = BLOOMActor(pretrained=args.pretrain).to(torch.cuda.current_device()) elif args.model == 'opt': actor = OPTActor(pretrained=args.pretrain).to(torch.cuda.current_device()) - elif args.model == 'roberta': - actor = RoBERTaActor(pretrained=args.pretrain).to(torch.cuda.current_device()) else: raise ValueError(f'Unsupported model "{args.model}"') state_dict = torch.load(args.model_path) - actor.model.load_state_dict(state_dict) + actor.load_state_dict(state_dict) # configure tokenizer if args.model == 'gpt2': @@ -34,27 +32,26 @@ def eval(args): tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained('facebook/opt-350m') - elif args.model == 'roberta': - tokenizer = RobertaTokenizer.from_pretrained("roberta-base") else: raise ValueError(f'Unsupported model "{args.model}"') actor.eval() input = args.input input_ids = tokenizer.encode(input, return_tensors='pt').to(torch.cuda.current_device()) - outputs = actor.generate(input_ids, - max_length=args.max_length, - do_sample=True, - top_k=50, - top_p=0.95, - num_return_sequences=1) + outputs = generate(actor, + input_ids, + max_length=args.max_length, + do_sample=True, + top_k=50, + top_p=0.95, + num_return_sequences=1) output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True) print(output) if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'roberta']) + parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt']) # We suggest to use the pretrained model from HuggingFace, use pretrain to configure model parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--model_path', type=str, default=None) diff --git a/applications/Chat/examples/test_ci.sh b/applications/Chat/examples/test_ci.sh index dec1f7c03..fe2af4710 100755 --- a/applications/Chat/examples/test_ci.sh +++ b/applications/Chat/examples/test_ci.sh @@ -43,18 +43,18 @@ pip install -r ${BASE}/requirements.txt wandb init -m offline -# FIXME: This is a hack to skip tests that are not working (tested at commit b3ab7fbabf) +# FIXME: This is a hack to skip tests that are not working # - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation -# - llama-*: Repository Not Found for url: https://huggingface.co/{...}/resolve/main/tokenizer.model. -# - roberta-*: RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)` +# - llama-*: These tests can be passed locally, skipped for long execution time SKIPPED_TESTS=( "gpt2-ddp" - "llama-ddp" "llama-colossalai_gemini" "llama-colossalai_zero2" - "roberta-ddp" "roberta-colossalai_gemini" "roberta-colossalai_zero2" + "llama-ddp" + "llama-colossalai_gemini" + "llama-colossalai_zero2" ) # These tests are quick and do not have any dependencies -for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do +for model in 'gpt2' 'bloom' 'opt' 'llama'; do for strategy in 'ddp' 'colossalai_gemini' 'colossalai_zero2'; do if [[ " ${SKIPPED_TESTS[*]} " =~ " ${model}-${strategy} " ]]; then echo "[Test]: Skipped $model-$strategy" @@ -64,7 +64,7 @@ for model in 'gpt2' 'bloom' 'opt' 'llama' 'roberta'; do --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ --strategy $strategy --model $model \ --num_episodes 1 --num_collect_steps 2 --num_update_steps 1 \ - --train_batch_size 2 + --train_batch_size 2 --lora_rank 4 done done @@ -124,22 +124,6 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \ --save_path ${BASE}/rm_ckpt.pt rm -rf ${BASE}/rm_ckpt.pt -torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \ - --pretrain 'microsoft/deberta-v3-large' --model 'deberta' \ - --strategy colossalai_zero2 --loss_fn 'log_sig' \ - --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \ - --test True --lora_rank 4 \ - --save_path ${BASE}/rm_ckpt.pt -rm -rf ${BASE}/rm_ckpt.pt - -torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \ - --pretrain 'roberta-base' --model 'roberta' \ - --strategy colossalai_zero2 --loss_fn 'log_exp' \ - --dataset 'Anthropic/hh-rlhf' --subset 'harmless-base' \ - --test True --lora_rank 4 \ - --save_path ${BASE}/rm_ckpt.pt -rm -rf ${BASE}/rm_ckpt.pt - # train rl torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py \ --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \ diff --git a/applications/Chat/examples/train_prompts.py b/applications/Chat/examples/train_prompts.py index c748eeb21..7338a6d51 100644 --- a/applications/Chat/examples/train_prompts.py +++ b/applications/Chat/examples/train_prompts.py @@ -7,14 +7,12 @@ from coati.models.bloom import BLOOMRM, BLOOMActor, BLOOMCritic from coati.models.gpt import GPTRM, GPTActor, GPTCritic from coati.models.llama import LlamaActor, LlamaCritic, LlamaRM from coati.models.opt import OPTRM, OPTActor, OPTCritic -from coati.models.roberta import RoBERTaActor, RoBERTaCritic, RoBERTaRM from coati.trainer import PPOTrainer from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy -from coati.utils import prepare_llama_tokenizer_and_embedding from torch.optim import Adam from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler -from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer, RobertaTokenizer +from transformers import AutoTokenizer, BloomTokenizerFast, GPT2Tokenizer, LlamaTokenizer from colossalai.nn.optimizer import HybridAdam @@ -43,8 +41,6 @@ def main(args): initial_model = OPTActor(pretrained=args.pretrain) elif args.model == 'llama': initial_model = LlamaActor(pretrained=args.pretrain) - elif args.model == 'roberta': - initial_model = RoBERTaActor(pretrained=args.pretrain) else: raise ValueError(f'Unsupported actor model "{args.model}"') @@ -61,8 +57,6 @@ def main(args): reward_model = OPTRM(pretrained=args.rm_pretrain) elif rm_model_name == 'llama': reward_model = LlamaRM(pretrained=args.rm_pretrain) - elif rm_model_name == 'roberta': - reward_model = RoBERTaRM(pretrained=args.rm_pretrain) else: raise ValueError(f'Unsupported reward model "{rm_model_name}"') @@ -80,8 +74,6 @@ def main(args): actor = OPTActor(pretrained=args.pretrain, lora_rank=args.lora_rank) elif args.model == 'llama': actor = LlamaActor(pretrained=args.pretrain, lora_rank=args.lora_rank) - elif args.model == 'roberta': - actor = RoBERTaActor(pretrained=args.pretrain, lora_rank=args.lora_rank) else: raise ValueError(f'Unsupported actor model "{args.model}"') @@ -93,8 +85,6 @@ def main(args): critic = OPTCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True) elif rm_model_name == 'llama': critic = LlamaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True) - elif rm_model_name == 'roberta': - critic = RoBERTaCritic(pretrained=args.rm_pretrain, lora_rank=args.lora_rank, use_action_mask=True) else: raise ValueError(f'Unsupported reward model "{rm_model_name}"') @@ -117,23 +107,20 @@ def main(args): # configure tokenizer if args.model == 'gpt2': tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'bloom': tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m') + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'llama': tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) tokenizer.eos_token = '<\s>' - elif args.model == 'roberta': - tokenizer = RobertaTokenizer.from_pretrained("roberta-base") + tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') - if args.model == 'llama': - tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, actor) - else: - tokenizer.pad_token = tokenizer.eos_token - data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer) prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384) @@ -209,9 +196,9 @@ if __name__ == '__main__': choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2', help='strategy to use') - parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta']) + parser.add_argument('--model', default='gpt2', choices=['gpt2', 'bloom', 'opt', 'llama']) parser.add_argument('--pretrain', type=str, default=None) - parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama', 'roberta']) + parser.add_argument('--rm_model', default=None, choices=['gpt2', 'bloom', 'opt', 'llama']) parser.add_argument('--rm_path', type=str, default=None) parser.add_argument('--rm_pretrain', type=str, default=None) parser.add_argument('--save_path', type=str, default='actor_checkpoint_prompts') diff --git a/applications/Chat/examples/train_reward_model.py b/applications/Chat/examples/train_reward_model.py index e9618e0c1..5b1b8d3d1 100644 --- a/applications/Chat/examples/train_reward_model.py +++ b/applications/Chat/examples/train_reward_model.py @@ -1,27 +1,22 @@ import argparse from random import randint -import loralib as lora import torch import torch.distributed as dist from coati.dataset import HhRlhfDataset, RmStaticDataset from coati.models import LogExpLoss, LogSigLoss -from coati.models.base import RewardModel from coati.models.bloom import BLOOMRM -from coati.models.deberta import DebertaRM from coati.models.gpt import GPTRM from coati.models.llama import LlamaRM from coati.models.opt import OPTRM -from coati.models.roberta import RoBERTaRM from coati.trainer import RewardModelTrainer from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy -from coati.utils import prepare_llama_tokenizer_and_embedding from datasets import load_dataset from torch.optim import Adam from torch.optim.lr_scheduler import CosineAnnealingLR from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler -from transformers import AutoTokenizer, BloomTokenizerFast, DebertaV2Tokenizer, LlamaTokenizer, RobertaTokenizer +from transformers import AutoTokenizer, BloomTokenizerFast, LlamaTokenizer from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer from colossalai.nn.optimizer import HybridAdam @@ -46,12 +41,8 @@ def train(args): model = OPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) elif args.model == 'gpt2': model = GPTRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) - elif args.model == 'deberta': - model = DebertaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) elif args.model == 'llama': model = LlamaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) - elif args.model == 'roberta': - model = RoBERTaRM(pretrained=args.pretrain, lora_rank=args.lora_rank).to(torch.cuda.current_device()) else: raise ValueError(f'Unsupported model "{args.model}"') @@ -64,24 +55,18 @@ def train(args): # configure tokenizer if args.model == 'gpt2': tokenizer = GPT2Tokenizer.from_pretrained('gpt2') + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'bloom': tokenizer = BloomTokenizerFast.from_pretrained('bigscience/bloom-560m') + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") - elif args.model == 'deberta': - tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-large') + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'llama': tokenizer = LlamaTokenizer.from_pretrained(args.pretrain) - elif args.model == 'roberta': - tokenizer = RobertaTokenizer.from_pretrained("roberta-base") + tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') - max_len = args.max_len - - if args.model == 'llama': - tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model) - else: - tokenizer.pad_token = tokenizer.eos_token # configure optimizer if args.strategy.startswith('colossalai'): @@ -112,13 +97,13 @@ def train(args): valid_data = data['test'].select((randint(0, len(eval_data) - 1) for _ in range(len(eval_data) // 5))) if args.dataset == 'Dahoas/rm-static': - train_dataset = RmStaticDataset(train_data, tokenizer, max_len) - valid_dataset = RmStaticDataset(valid_data, tokenizer, max_len) - eval_dataset = RmStaticDataset(eval_data, tokenizer, max_len) + train_dataset = RmStaticDataset(train_data, tokenizer, args.max_len) + valid_dataset = RmStaticDataset(valid_data, tokenizer, args.max_len) + eval_dataset = RmStaticDataset(eval_data, tokenizer, args.max_len) elif args.dataset == 'Anthropic/hh-rlhf': - train_dataset = HhRlhfDataset(train_data, tokenizer, max_len) - valid_dataset = HhRlhfDataset(valid_data, tokenizer, max_len) - eval_dataset = HhRlhfDataset(eval_data, tokenizer, max_len) + train_dataset = HhRlhfDataset(train_data, tokenizer, args.max_len) + valid_dataset = HhRlhfDataset(valid_data, tokenizer, args.max_len) + eval_dataset = HhRlhfDataset(eval_data, tokenizer, args.max_len) else: raise ValueError(f'Unsupported dataset "{args.dataset}"') @@ -195,7 +180,7 @@ if __name__ == '__main__': parser.add_argument('--strategy', choices=['ddp', 'colossalai_gemini', 'colossalai_zero2'], default='colossalai_zero2') - parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'deberta', 'llama', 'roberta'], default='bloom') + parser.add_argument('--model', choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom') parser.add_argument('--pretrain', type=str, default=None) parser.add_argument('--model_path', type=str, default=None) parser.add_argument('--need_optim_ckpt', type=bool, default=False) diff --git a/applications/Chat/examples/train_sft.py b/applications/Chat/examples/train_sft.py index 30becd8a6..cb3eb649d 100644 --- a/applications/Chat/examples/train_sft.py +++ b/applications/Chat/examples/train_sft.py @@ -9,7 +9,6 @@ from coati.dataset import DataCollatorForSupervisedDataset, SFTDataset, Supervis from coati.models import convert_to_lora_module from coati.trainer import SFTTrainer from coati.trainer.strategies import DDPStrategy, GeminiStrategy, LowLevelZeroStrategy -from coati.utils import prepare_llama_tokenizer_and_embedding from datasets import load_dataset from torch.optim import Adam from torch.utils.data import DataLoader @@ -68,6 +67,7 @@ def train(args): tokenizer.pad_token = tokenizer.eos_token elif args.model == 'opt': tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m") + tokenizer.pad_token = tokenizer.eos_token elif args.model == 'llama': tokenizer = AutoTokenizer.from_pretrained( args.pretrain, @@ -75,24 +75,19 @@ def train(args): use_fast=False, ) tokenizer.eos_token = '<\s>' + tokenizer.pad_token = tokenizer.unk_token else: raise ValueError(f'Unsupported model "{args.model}"') - tokenizer.pad_token = tokenizer.eos_token - max_len = args.max_len - if args.model == 'llama': - tokenizer = prepare_llama_tokenizer_and_embedding(tokenizer, model) - if args.strategy == 'colossalai_gemini': - # this is a hack to deal with the resized embedding - # to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility - for name, param in model.named_parameters(): - if not isinstance(param, ColoParameter): - sub_module_name = '.'.join(name.split('.')[:-1]) - weight_name = name.split('.')[-1] - sub_module = model.get_submodule(sub_module_name) - setattr(sub_module, weight_name, ColoParameter(param)) - else: - tokenizer.pad_token = tokenizer.eos_token + if args.model == 'llama' and args.strategy == 'colossalai_gemini': + # this is a hack to deal with the resized embedding + # to make sure all parameters are ColoParameter for Colossal-AI Gemini Compatibility + for name, param in model.named_parameters(): + if not isinstance(param, ColoParameter): + sub_module_name = '.'.join(name.split('.')[:-1]) + weight_name = name.split('.')[-1] + sub_module = model.get_submodule(sub_module_name) + setattr(sub_module, weight_name, ColoParameter(param)) # configure optimizer if args.strategy.startswith('colossalai'): @@ -107,14 +102,14 @@ def train(args): train_data = load_dataset(args.dataset, 'super_natural_instructions', split='train') eval_data = load_dataset(args.dataset, 'super_natural_instructions', split='test') - train_dataset = SFTDataset(train_data, tokenizer, max_len) - eval_dataset = SFTDataset(eval_data, tokenizer, max_len) + train_dataset = SFTDataset(train_data, tokenizer, args.max_len) + eval_dataset = SFTDataset(eval_data, tokenizer, args.max_len) else: train_dataset = SupervisedDataset(tokenizer=tokenizer, data_path=args.dataset, max_datasets_size=args.max_datasets_size, - max_length=max_len) + max_length=args.max_len) eval_dataset = None data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)