mirror of https://github.com/hpcaitech/ColossalAI
[chat] polish code note typo (#3612)
parent
c4709d34cf
commit
d7bf284706
|
@ -18,7 +18,7 @@ class Experience:
|
||||||
action_log_probs: (B, A)
|
action_log_probs: (B, A)
|
||||||
values: (B)
|
values: (B)
|
||||||
reward: (B)
|
reward: (B)
|
||||||
advatanges: (B)
|
advantages: (B)
|
||||||
attention_mask: (B, S)
|
attention_mask: (B, S)
|
||||||
action_mask: (B, A)
|
action_mask: (B, A)
|
||||||
|
|
||||||
|
|
|
@ -108,7 +108,7 @@ def convert_to_lora_recursively(module: nn.Module, lora_rank: int) -> None:
|
||||||
|
|
||||||
class LoRAModule(nn.Module):
|
class LoRAModule(nn.Module):
|
||||||
"""A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`.
|
"""A LoRA module base class. All derived classes should call `convert_to_lora()` at the bottom of `__init__()`.
|
||||||
This calss will convert all torch.nn.Linear layer to LoraLinear layer.
|
This class will convert all torch.nn.Linear layer to LoraLinear layer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0.
|
lora_rank (int, optional): LoRA rank. 0 means LoRA is not applied. Defaults to 0.
|
||||||
|
|
|
@ -29,7 +29,7 @@ class DetachedPPOTrainer(DetachedTrainer):
|
||||||
lora_rank (int) : for actor / critic init
|
lora_rank (int) : for actor / critic init
|
||||||
train_batch_size (int, defaults to 8): the batch size to use for training
|
train_batch_size (int, defaults to 8): the batch size to use for training
|
||||||
train_batch_size (int, defaults to 8): the batch size to use for training
|
train_batch_size (int, defaults to 8): the batch size to use for training
|
||||||
buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
|
buffer_limit (int, defaults to 0): the max_size limitation of replay buffer
|
||||||
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
|
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
|
||||||
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
|
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
|
||||||
value_clip (float, defaults to 0.4): the clip coefficient of value loss
|
value_clip (float, defaults to 0.4): the clip coefficient of value loss
|
||||||
|
|
|
@ -83,7 +83,7 @@ class ExperienceMakerHolder:
|
||||||
chosen_trainer = None
|
chosen_trainer = None
|
||||||
min_length = None
|
min_length = None
|
||||||
if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
|
if 'debug' in self.generate_kwargs and self.generate_kwargs['debug'] == True:
|
||||||
print("[maker] choosing tartget trainer")
|
print("[maker] choosing target trainer")
|
||||||
while chosen_trainer is None:
|
while chosen_trainer is None:
|
||||||
for target_trainer in self.target_trainer_list:
|
for target_trainer in self.target_trainer_list:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -15,7 +15,7 @@ class BufferItem:
|
||||||
action_log_probs: (A)
|
action_log_probs: (A)
|
||||||
values: (1)
|
values: (1)
|
||||||
reward: (1)
|
reward: (1)
|
||||||
advatanges: (1)
|
advantages: (1)
|
||||||
attention_mask: (S)
|
attention_mask: (S)
|
||||||
action_mask: (A)
|
action_mask: (A)
|
||||||
|
|
||||||
|
|
|
@ -114,7 +114,7 @@ class PerformanceEvaluator(Callback):
|
||||||
|
|
||||||
# actor forward-backward, 3 means forward(1) + backward(2)
|
# actor forward-backward, 3 means forward(1) + backward(2)
|
||||||
self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
|
self.learn_flop += self.actor_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
|
||||||
# critic foward-backward
|
# critic forward-backward
|
||||||
self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
|
self.learn_flop += self.critic_num_params * batch_size * seq_len * 2 * (3 + int(self.enable_grad_checkpoint))
|
||||||
|
|
||||||
def on_fit_end(self) -> None:
|
def on_fit_end(self) -> None:
|
||||||
|
|
|
@ -28,12 +28,12 @@ class PPOTrainer(Trainer):
|
||||||
actor (Actor): the actor model in ppo algorithm
|
actor (Actor): the actor model in ppo algorithm
|
||||||
critic (Critic): the critic model in ppo algorithm
|
critic (Critic): the critic model in ppo algorithm
|
||||||
reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences
|
reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences
|
||||||
initial_model (Actor): the initial model in rlhf algorithm to generate reference logits to limit the update of actor
|
initial_model (Actor): the initial model in rlhf algorithm to generate reference logics to limit the update of actor
|
||||||
actor_optim (Optimizer): the optimizer to use for actor model
|
actor_optim (Optimizer): the optimizer to use for actor model
|
||||||
critic_optim (Optimizer): the optimizer to use for critic model
|
critic_optim (Optimizer): the optimizer to use for critic model
|
||||||
kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss
|
kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss
|
||||||
train_batch_size (int, defaults to 8): the batch size to use for training
|
train_batch_size (int, defaults to 8): the batch size to use for training
|
||||||
buffer_limit (int, defaults to 0): the max_size limitaiton of replay buffer
|
buffer_limit (int, defaults to 0): the max_size limitation of replay buffer
|
||||||
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
|
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
|
||||||
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
|
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
|
||||||
vf_coef (float, defaults to 1.0): the coefficient of value loss
|
vf_coef (float, defaults to 1.0): the coefficient of value loss
|
||||||
|
@ -41,7 +41,7 @@ class PPOTrainer(Trainer):
|
||||||
value_clip (float, defaults to 0.4): the clip coefficient of value loss
|
value_clip (float, defaults to 0.4): the clip coefficient of value loss
|
||||||
experience_batch_size (int, defaults to 8): the batch size to use for experience generation
|
experience_batch_size (int, defaults to 8): the batch size to use for experience generation
|
||||||
max_epochs (int, defaults to 1): the number of epochs of training process
|
max_epochs (int, defaults to 1): the number of epochs of training process
|
||||||
tokenier (Callable, optional): the tokenizer to use for tokenizing the input
|
tokenizer (Callable, optional): the tokenizer to use for tokenizing the input
|
||||||
sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
|
sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
|
||||||
dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
|
dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
|
||||||
callbacks (List[Callback], defaults to []): the callbacks to call during training process
|
callbacks (List[Callback], defaults to []): the callbacks to call during training process
|
||||||
|
|
|
@ -38,7 +38,7 @@ pip install -r requirements.txt
|
||||||
|
|
||||||
## Supervised datasets collection
|
## Supervised datasets collection
|
||||||
|
|
||||||
We colllected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo
|
We collected 104K bilingual dataset of Chinese and English, and you can find the datasets in this repo
|
||||||
[InstructionWild](https://github.com/XueFuzhao/InstructionWild).
|
[InstructionWild](https://github.com/XueFuzhao/InstructionWild).
|
||||||
|
|
||||||
The following pic shows how we collected the data.
|
The following pic shows how we collected the data.
|
||||||
|
@ -128,7 +128,7 @@ Model performance in [Anthropics paper](https://arxiv.org/abs/2204.05862):
|
||||||
- --lora_rank: low-rank adaptation matrices rank, type=int, default=0
|
- --lora_rank: low-rank adaptation matrices rank, type=int, default=0
|
||||||
- --loss_func: which kind of loss function, choices=['log_sig', 'log_exp']
|
- --loss_func: which kind of loss function, choices=['log_sig', 'log_exp']
|
||||||
- --max_len: max sentence length for generation, type=int, default=512
|
- --max_len: max sentence length for generation, type=int, default=512
|
||||||
- --test: whether is only tesing, if it's ture, the dataset will be small
|
- --test: whether is only testing, if it's true, the dataset will be small
|
||||||
|
|
||||||
## Stage3 - Training model using prompts with RL
|
## Stage3 - Training model using prompts with RL
|
||||||
|
|
||||||
|
@ -245,7 +245,7 @@ class CoatiActor(Actor):
|
||||||
if pretrained is not None:
|
if pretrained is not None:
|
||||||
model = CoatiModel.from_pretrained(pretrained)
|
model = CoatiModel.from_pretrained(pretrained)
|
||||||
else:
|
else:
|
||||||
model = build_model() # load your own model if it is not support in trainsformers
|
model = build_model() # load your own model if it is not support in transformers
|
||||||
|
|
||||||
super().__init__(model, lora_rank, lora_train_bias)
|
super().__init__(model, lora_rank, lora_train_bias)
|
||||||
```
|
```
|
||||||
|
@ -266,7 +266,7 @@ class GPTLM(LM):
|
||||||
if pretrained is not None:
|
if pretrained is not None:
|
||||||
model = CoatiModel.from_pretrained(pretrained)
|
model = CoatiModel.from_pretrained(pretrained)
|
||||||
else:
|
else:
|
||||||
model = build_model() # load your own model if it is not support in trainsformers
|
model = build_model() # load your own model if it is not support in transformers
|
||||||
|
|
||||||
super().__init__(model, lora_rank, lora_train_bias)
|
super().__init__(model, lora_rank, lora_train_bias)
|
||||||
|
|
||||||
|
@ -288,7 +288,7 @@ class CoatiRM(RewardModel):
|
||||||
if pretrained is not None:
|
if pretrained is not None:
|
||||||
model = CoatiModel.from_pretrained(pretrained)
|
model = CoatiModel.from_pretrained(pretrained)
|
||||||
else:
|
else:
|
||||||
model = build_model() # load your own model if it is not support in trainsformers
|
model = build_model() # load your own model if it is not support in transformers
|
||||||
|
|
||||||
value_head = nn.Linear(model.config.n_embd, 1)
|
value_head = nn.Linear(model.config.n_embd, 1)
|
||||||
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1))
|
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1))
|
||||||
|
@ -311,7 +311,7 @@ class CoatiCritic(Critic):
|
||||||
if pretrained is not None:
|
if pretrained is not None:
|
||||||
model = CoatiModel.from_pretrained(pretrained)
|
model = CoatiModel.from_pretrained(pretrained)
|
||||||
else:
|
else:
|
||||||
model = build_model() # load your own model if it is not support in trainsformers
|
model = build_model() # load your own model if it is not support in transformers
|
||||||
|
|
||||||
value_head = nn.Linear(model.config.n_embd, 1)
|
value_head = nn.Linear(model.config.n_embd, 1)
|
||||||
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1))
|
value_head.weight.data.normal_(mean=0.0, std=1 / (model.config.n_embd + 1))
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
# Add Peft support for SFT and Prompts model training
|
# Add Peft support for SFT and Prompts model training
|
||||||
|
|
||||||
The orginal implementation just adopts the loralib and merges the layers into the final model. The huggingface peft is a better lora model implementation and can be easily training and distributed.
|
The original implementation just adopts the loralib and merges the layers into the final model. The huggingface peft is a better lora model implementation and can be easily training and distributed.
|
||||||
|
|
||||||
Since reward model is relative small, I just keep it as original one. I suggest train full model to get the proper reward/critic model.
|
Since reward model is relative small, I just keep it as original one. I suggest train full model to get the proper reward/critic model.
|
||||||
|
|
||||||
# Prelimenary installation
|
# Preliminary installation
|
||||||
Since the current pypi peft package(0.2) has some bugs, please install the peft package using source.
|
Since the current pypi peft package(0.2) has some bugs, please install the peft package using source.
|
||||||
```
|
```
|
||||||
git clone https://github.com/huggingface/peft
|
git clone https://github.com/huggingface/peft
|
||||||
|
|
|
@ -166,7 +166,7 @@ class EasyRewardDataset(Dataset):
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Easy SFT just accept a text file which can be read line by line. However the datasest will group texts together to max_length so LLM will learn the texts meaning better.
|
Easy SFT just accept a text file which can be read line by line. However the datasets will group texts together to max_length so LLM will learn the texts meaning better.
|
||||||
If individual lines are not related, just set is_group_texts to False.
|
If individual lines are not related, just set is_group_texts to False.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue