[chat] polish code note typo (#3612)

pull/3621/head
digger-yu 2023-04-20 17:22:15 +08:00 committed by GitHub
parent c4709d34cf
commit d7bf284706
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 18 additions and 18 deletions

View File

@ -18,7 +18,7 @@ class Experience:
action_log_probs: (B, A)
values: (B)
reward: (B)
advatanges: (B)
advantages: (B)
attention_mask: (B, S)
action_mask: (B, A)

View File

@ -108,7 +108,7 @@ def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None:
class LoRAModule(nn.Module):
"""A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`.
This calss will convert all torch.nn.Linear layer to LoraLinear layer.
This class will convert all torch.nn.Linear layer to LoraLinear layer.
Args:
lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0.

View File

@ -29,7 +29,7 @@ class DetachedPPOTrainer(DetachedTrainer):
lora_rank (int) : for actor / critic init
train_batch_size (int, defaults to 8): the batch size to use for training
train_batch_size (int, defaults to 8): the batch size to use for training
buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
buffer_limit (int, defaults to 0): the max_size limitation of replay buffer
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
value_clip (float, defaults to 0.4): the clip coefficient of value loss

View File

@ -83,7 +83,7 @@ class ExperienceMakerHolder:
chosen_trainer = None
min_length = None
if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
print("[maker] choosing tartget trainer")
print("[maker] choosing target trainer")
while chosen_trainer is None:
for target_trainer in self.target_trainer_list:
try:

View File

@ -15,7 +15,7 @@ class BufferItem:
action_log_probs: (A)
values: (1)
reward: (1)
advatanges: (1)
advantages: (1)
attention_mask: (S)
action_mask: (A)

View File

@ -114,7 +114,7 @@ class PerformanceEvaluator(Callback):
# actor forward-backward, 3 means forward(1) + backward(2)
self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
# critic foward-backward
# critic forward-backward
self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
def on_fit_end(self) -> None:

View File

@ -28,12 +28,12 @@ class PPOTrainer(Trainer):
actor (Actor): the actor model in ppo algorithm
critic (Critic): the critic model in ppo algorithm
reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences
initial_model (Actor): the initial model in rlhf algorithm to generate reference logits to limit the update of actor
initial_model (Actor): the initial model in rlhf algorithm to generate reference logics to limit the update of actor
actor_optim (Optimizer): the optimizer to use for actor model
critic_optim (Optimizer): the optimizer to use for critic model
kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss
train_batch_size (int, defaults to 8): the batch size to use for training
buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
buffer_limit (int, defaults to 0): the max_size limitation of replay buffer
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
vf_coef (float, defaults to 1.0): the coefficient of value loss
@ -41,7 +41,7 @@ class PPOTrainer(Trainer):
value_clip (float, defaults to 0.4): the clip coefficient of value loss
experience_batch_size (int, defaults to 8): the batch size to use for experience generation
max_epochs (int, defaults to 1): the number of epochs of training process
tokenier (Callable, optional): the tokenizer to use for tokenizing the input
tokenizer (Callable, optional): the tokenizer to use for tokenizing the input
sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
callbacks (List[Callback], defaults to []): the callbacks to call during training process

View File

@ -38,7 +38,7 @@ pip install -r requirements.txt
## Supervised datasets collection
We colllected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo
We collected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo
[InstructionWild](https://github.com/XueFuzhao/InstructionWild).
The following pic shows how we collected the data.
@ -128,7 +128,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):
- --lora_rank: low-rank adaptation matrices rank, type=int, default=0
- --loss_func: which kind of loss function, choices=['log_sig', 'log_exp']
- --max_len: max sentence length for generation, type=int, default=512
- --test: whether is only tesing, if it's ture, the dataset will be small
- --test: whether is only testing, if it's true, the dataset will be small
## Stage3 - Training model using prompts with RL
@ -245,7 +245,7 @@ class CoatiActor(Actor):
if pretrained is not None:
model = CoatiModel.from_pretrained(pretrained)
else:
model = build_model() # load your own model if it is not support in trainsformers
model = build_model() # load your own model if it is not support in transformers
super().__init__(model, lora_rank, lora_train_bias)
```
@ -266,7 +266,7 @@ class GPTLM(LM):
if pretrained is not None:
model = CoatiModel.from_pretrained(pretrained)
else:
model = build_model() # load your own model if it is not support in trainsformers
model = build_model() # load your own model if it is not support in transformers
super().__init__(model, lora_rank, lora_train_bias)
@ -288,7 +288,7 @@ class CoatiRM(RewardModel):
if pretrained is not None:
model = CoatiModel.from_pretrained(pretrained)
else:
model = build_model() # load your own model if it is not support in trainsformers
model = build_model() # load your own model if it is not support in transformers
value_head = nn.Linear(model.config.n_embd, 1)
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1))
@ -311,7 +311,7 @@ class CoatiCritic(Critic):
if pretrained is not None:
model = CoatiModel.from_pretrained(pretrained)
else:
model = build_model() # load your own model if it is not support in trainsformers
model = build_model() # load your own model if it is not support in transformers
value_head = nn.Linear(model.config.n_embd, 1)
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1))

View File

@ -1,10 +1,10 @@
# Add Peft support for SFT and Prompts model training
The orginal implementation just adopts the loralib and merges the layers into the final model. The huggingface peft is a better lora model implementation and can be easily training and distributed.
The original implementation just adopts the loralib and merges the layers into the final model. The huggingface peft is a better lora model implementation and can be easily training and distributed.
Since reward model is relative small, I just keep it as original one. I suggest train full model to get the proper reward/critic model.
# Prelimenary installation
# Preliminary installation
Since the current pypi peft package(0.2) has some bugs, please install the peft package using source.
```
git clone https://github.com/huggingface/peft

View File

@ -166,7 +166,7 @@ class EasyRewardDataset(Dataset):
'''
Easy SFT just accept a text file which can be read line by line. However the datasest will group texts together to max_length so LLM will learn the texts meaning better.
Easy SFT just accept a text file which can be read line by line. However the datasets will group texts together to max_length so LLM will learn the texts meaning better.
If individual lines are not related, just set is_group_texts to False.
'''