diff --git a/applications/Chat/coati/experience_maker/base.py b/applications/Chat/coati/experience_maker/base.py index 61fd4f674..ff7585257 100644 --- a/applications/Chat/coati/experience_maker/base.py +++ b/applications/Chat/coati/experience_maker/base.py @@ -18,7 +18,7 @@ class Experience: action_log_probs: (B, A) values: (B) reward: (B) - advatanges: (B) + advantages: (B) attention_mask: (B, S) action_mask: (B, A) diff --git a/applications/Chat/coati/models/lora.py b/applications/Chat/coati/models/lora.py index f8f7a1cb5..7f6eb7326 100644 --- a/applications/Chat/coati/models/lora.py +++ b/applications/Chat/coati/models/lora.py @@ -108,7 +108,7 @@ def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None: class LoRAModule(nn.Module): """A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`. - This calss will convert all torch.nn.Linear layer to LoraLinear layer. + This class will convert all torch.nn.Linear layer to LoraLinear layer. Args: lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0. diff --git a/applications/Chat/coati/ray/src/detached_trainer_ppo.py b/applications/Chat/coati/ray/src/detached_trainer_ppo.py index 90e5e4377..838e82d07 100644 --- a/applications/Chat/coati/ray/src/detached_trainer_ppo.py +++ b/applications/Chat/coati/ray/src/detached_trainer_ppo.py @@ -29,7 +29,7 @@ class DetachedPPOTrainer(DetachedTrainer): lora_rank (int) : for actor / critic init train_batch_size (int, defaults to 8): the batch size to use for training train_batch_size (int, defaults to 8): the batch size to use for training - buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer + buffer_limit (int, defaults to 0): the max_size limitation of replay buffer buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu eps_clip (float, defaults to 0.2): the clip coefficient of policy loss value_clip (float, defaults to 0.4): the clip coefficient of value loss diff --git a/applications/Chat/coati/ray/src/experience_maker_holder.py b/applications/Chat/coati/ray/src/experience_maker_holder.py index 696773e84..94e4a3d53 100644 --- a/applications/Chat/coati/ray/src/experience_maker_holder.py +++ b/applications/Chat/coati/ray/src/experience_maker_holder.py @@ -83,7 +83,7 @@ class ExperienceMakerHolder: chosen_trainer = None min_length = None if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True: - print("[maker] choosing tartget trainer") + print("[maker] choosing target trainer") while chosen_trainer is None: for target_trainer in self.target_trainer_list: try: diff --git a/applications/Chat/coati/replay_buffer/utils.py b/applications/Chat/coati/replay_buffer/utils.py index 55ddb2ae8..6ad0db2c3 100644 --- a/applications/Chat/coati/replay_buffer/utils.py +++ b/applications/Chat/coati/replay_buffer/utils.py @@ -15,7 +15,7 @@ class BufferItem: action_log_probs: (A) values: (1) reward: (1) - advatanges: (1) + advantages: (1) attention_mask: (S) action_mask: (A) diff --git a/applications/Chat/coati/trainer/callbacks/performance_evaluator.py b/applications/Chat/coati/trainer/callbacks/performance_evaluator.py index 0fc3b077a..5ca44a52d 100644 --- a/applications/Chat/coati/trainer/callbacks/performance_evaluator.py +++ b/applications/Chat/coati/trainer/callbacks/performance_evaluator.py @@ -114,7 +114,7 @@ class PerformanceEvaluator(Callback): # actor forward-backward, 3 means forward(1) + backward(2) self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint)) - # critic foward-backward + # critic forward-backward self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint)) def on_fit_end(self) -> None: diff --git a/applications/Chat/coati/trainer/ppo.py b/applications/Chat/coati/trainer/ppo.py index 008a6aea8..2db604fc9 100644 --- a/applications/Chat/coati/trainer/ppo.py +++ b/applications/Chat/coati/trainer/ppo.py @@ -28,12 +28,12 @@ class PPOTrainer(Trainer): actor (Actor): the actor model in ppo algorithm critic (Critic): the critic model in ppo algorithm reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences - initial_model (Actor): the initial model in rlhf algorithm to generate reference logits to limit the update of actor + initial_model (Actor): the initial model in rlhf algorithm to generate reference logics to limit the update of actor actor_optim (Optimizer): the optimizer to use for actor model critic_optim (Optimizer): the optimizer to use for critic model kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss train_batch_size (int, defaults to 8): the batch size to use for training - buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer + buffer_limit (int, defaults to 0): the max_size limitation of replay buffer buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu eps_clip (float, defaults to 0.2): the clip coefficient of policy loss vf_coef (float, defaults to 1.0): the coefficient of value loss @@ -41,7 +41,7 @@ class PPOTrainer(Trainer): value_clip (float, defaults to 0.4): the clip coefficient of value loss experience_batch_size (int, defaults to 8): the batch size to use for experience generation max_epochs (int, defaults to 1): the number of epochs of training process - tokenier (Callable, optional): the tokenizer to use for tokenizing the input + tokenizer (Callable, optional): the tokenizer to use for tokenizing the input sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader callbacks (List[Callback], defaults to []): the callbacks to call during training process diff --git a/applications/Chat/examples/README.md b/applications/Chat/examples/README.md index 993a56c5a..af8ded005 100644 --- a/applications/Chat/examples/README.md +++ b/applications/Chat/examples/README.md @@ -38,7 +38,7 @@ pip install -r requirements.txt ## Supervised datasets collection -We colllected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo +We collected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo [InstructionWild](https://github.com/XueFuzhao/InstructionWild). The following pic shows how we collected the data. @@ -128,7 +128,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862): - --lora_rank: low-rank adaptation matrices rank, type=int, default=0 - --loss_func: which kind of loss function, choices=['log_sig', 'log_exp'] - --max_len: max sentence length for generation, type=int, default=512 -- --test: whether is only tesing, if it's ture, the dataset will be small +- --test: whether is only testing, if it's true, the dataset will be small ## Stage3 - Training model using prompts with RL @@ -245,7 +245,7 @@ class CoatiActor(Actor): if pretrained is not None: model = CoatiModel.from_pretrained(pretrained) else: - model = build_model() # load your own model if it is not support in trainsformers + model = build_model() # load your own model if it is not support in transformers super().__init__(model, lora_rank, lora_train_bias) ``` @@ -266,7 +266,7 @@ class GPTLM(LM): if pretrained is not None: model = CoatiModel.from_pretrained(pretrained) else: - model = build_model() # load your own model if it is not support in trainsformers + model = build_model() # load your own model if it is not support in transformers super().__init__(model, lora_rank, lora_train_bias) @@ -288,7 +288,7 @@ class CoatiRM(RewardModel): if pretrained is not None: model = CoatiModel.from_pretrained(pretrained) else: - model = build_model() # load your own model if it is not support in trainsformers + model = build_model() # load your own model if it is not support in transformers value_head = nn.Linear(model.config.n_embd, 1) value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1)) @@ -311,7 +311,7 @@ class CoatiCritic(Critic): if pretrained is not None: model = CoatiModel.from_pretrained(pretrained) else: - model = build_model() # load your own model if it is not support in trainsformers + model = build_model() # load your own model if it is not support in transformers value_head = nn.Linear(model.config.n_embd, 1) value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1)) diff --git a/applications/Chat/examples/community/peft/README.md b/applications/Chat/examples/community/peft/README.md index a82f02a87..eabb56fd8 100644 --- a/applications/Chat/examples/community/peft/README.md +++ b/applications/Chat/examples/community/peft/README.md @@ -1,10 +1,10 @@ # Add Peft support for SFT and Prompts model training -The orginal implementation just adopts the loralib and merges the layers into the final model. The huggingface peft is a better lora model implementation and can be easily training and distributed. +The original implementation just adopts the loralib and merges the layers into the final model. The huggingface peft is a better lora model implementation and can be easily training and distributed. Since reward model is relative small, I just keep it as original one. I suggest train full model to get the proper reward/critic model. -# Prelimenary installation +# Preliminary installation Since the current pypi peft package(0.2) has some bugs, please install the peft package using source. ``` git clone https://github.com/huggingface/peft diff --git a/applications/Chat/examples/community/peft/easy_dataset.py b/applications/Chat/examples/community/peft/easy_dataset.py index 13dceef79..24ea4f0a8 100644 --- a/applications/Chat/examples/community/peft/easy_dataset.py +++ b/applications/Chat/examples/community/peft/easy_dataset.py @@ -166,7 +166,7 @@ class EasyRewardDataset(Dataset): ''' -Easy SFT just accept a text file which can be read line by line. However the datasest will group texts together to max_length so LLM will learn the texts meaning better. +Easy SFT just accept a text file which can be read line by line. However the datasets will group texts together to max_length so LLM will learn the texts meaning better. If individual lines are not related, just set is_group_texts to False. '''