mirror of https://github.com/hpcaitech/ColossalAI
fix style, add kto data sample
parent
845ea7214e
commit
544b7a38a1
|
@ -17,8 +17,8 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 4
|
|||
|
||||
PROJECT_NAME="kto"
|
||||
PARENT_CONFIG_FILE="./benchmark_config" # Path to a folder to save training config logs
|
||||
PRETRAINED_MODEL_PATH="/root/commonData/Llama-2-7b-hf" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="/root/commonData/Llama-2-7b-hf" # huggingface or local tokenizer path
|
||||
PRETRAINED_MODEL_PATH="" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="" # huggingface or local tokenizer path
|
||||
|
||||
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
|
||||
FULL_PROJECT_NAME="${PROJECT_NAME}-${TIMESTAMP}"
|
||||
|
|
|
@ -14,11 +14,11 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
|
|||
}
|
||||
|
||||
set_n_least_used_CUDA_VISIBLE_DEVICES 4
|
||||
# export CUDA_VISIBLE_DEVICES=3,4
|
||||
|
||||
PROJECT_NAME="sft"
|
||||
PARENT_CONFIG_FILE="./benchmark_config" # Path to a folder to save training config logs
|
||||
PRETRAINED_MODEL_PATH="/root/commonData/Llama-2-7b-hf" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="/root/commonData/Llama-2-7b-hf" # huggingface or local tokenizer path
|
||||
PRETRAINED_MODEL_PATH="" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="" # huggingface or local tokenizer path
|
||||
|
||||
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
|
||||
FULL_PROJECT_NAME="${PROJECT_NAME}-${TIMESTAMP}"
|
||||
|
@ -29,7 +29,7 @@ colossalai run --nproc_per_node 1 --master_port 31312 benchmark_sft.py \
|
|||
--pretrain $PRETRAINED_MODEL_PATH \
|
||||
--tokenizer_dir $PRETRAINED_TOKENIZER_PATH \
|
||||
--config_file $CONFIG_FILE \
|
||||
--plugin ddp \
|
||||
--plugin zero2 \
|
||||
--batch_size 8 \
|
||||
--max_epochs 1 \
|
||||
--accumulation_steps 1 \
|
||||
|
|
|
@ -251,17 +251,13 @@ class KTOLoss(nn.Module):
|
|||
# all gather
|
||||
dist.all_reduce(kl, op=dist.ReduceOp.SUM)
|
||||
kl = (kl / dist.get_world_size()).clamp(min=0)
|
||||
# kl = 0
|
||||
|
||||
if chosen_logps.shape[0] != 0 and ref_chosen_logps.shape[0] != 0:
|
||||
chosen_logratios = chosen_logps - ref_chosen_logps
|
||||
chosen_losses = 1 - nn.functional.sigmoid(self.beta * (chosen_logratios - kl))
|
||||
chosen_rewards = self.beta * chosen_logratios.detach()
|
||||
else:
|
||||
# important to cast to policy_dtype; otherwise error will occur during all_gather
|
||||
chosen_losses = torch.Tensor([]).to(
|
||||
kl_logps.device
|
||||
) # torch.Tensor(0.).to(chosen_logps.dtype).to(chosen_logps.device)
|
||||
chosen_losses = torch.Tensor([]).to(kl_logps.device)
|
||||
chosen_rewards = torch.Tensor([]).to(kl_logps.device)
|
||||
|
||||
if rejected_logps.shape[0] != 0 and ref_rejected_logps.shape[0] != 0:
|
||||
|
@ -269,10 +265,7 @@ class KTOLoss(nn.Module):
|
|||
rejected_losses = 1 - nn.functional.sigmoid(self.beta * (kl - rejected_logratios))
|
||||
rejected_rewards = self.beta * rejected_logratios.detach()
|
||||
else:
|
||||
# important to cast to policy_dtype; otherwise error will occur during all_gather
|
||||
rejected_losses = torch.Tensor([]).to(
|
||||
kl_logps.device
|
||||
) # torch.Tensor(0.).to(rejected_logps.dtype).to(rejected_logps.device)
|
||||
rejected_losses = torch.Tensor([]).to(kl_logps.device)
|
||||
rejected_rewards = torch.Tensor([]).to(kl_logps.device)
|
||||
|
||||
losses = torch.cat((self.desirable_weight * chosen_losses, self.undesirable_weight * rejected_losses), 0).mean()
|
||||
|
|
|
@ -180,7 +180,7 @@ class KTOTrainer(SLTrainer):
|
|||
self.optimizer.zero_grad()
|
||||
self.actor_scheduler.step()
|
||||
|
||||
# # sync
|
||||
# sync
|
||||
loss_mean = all_reduce_mean(tensor=loss)
|
||||
chosen_rewards_mean = all_reduce_mean(tensor=chosen_rewards.mean())
|
||||
rejected_rewards_mean = all_reduce_mean(tensor=rejected_rewards.mean())
|
||||
|
@ -297,7 +297,7 @@ class KTOTrainer(SLTrainer):
|
|||
chosen_logprob, rejected_logprob, kl_logprob, ref_chosen_logprob, ref_rejected_logprob, ref_kl_logprob
|
||||
)
|
||||
|
||||
# # sync
|
||||
# sync
|
||||
loss_mean = all_reduce_mean(tensor=loss)
|
||||
chosen_rewards_mean = all_reduce_mean(tensor=chosen_rewards.mean())
|
||||
rejected_rewards_mean = all_reduce_mean(tensor=rejected_rewards.mean())
|
||||
|
|
|
@ -754,7 +754,26 @@ We support the method introduced in the paper [ORPO: Monolithic Preference Optim
|
|||
</p>
|
||||
|
||||
### Alternative Option For RLHF: Kahneman-Tversky Optimization (KTO)
|
||||
We support the method introduced in the paper [KTO:Model Alignment as Prospect Theoretic Optimization](https://arxiv.org/pdf/2402.01306) (KTO). Which is a aligment method that directly maximize "human utility" of generation results. To use KTO in alignment, use the [train_kto.sh](./examples/training_scripts/train_orpo.sh) script, You may need to set the value for `beta` (which determine how strongly the reinforcement learning loss affect the training), `desirable_weight` and `undesirable_weight` if your data is biased (has unequal number of chosen and rejected samples).
|
||||
We support the method introduced in the paper [KTO:Model Alignment as Prospect Theoretic Optimization](https://arxiv.org/pdf/2402.01306) (KTO). Which is a aligment method that directly maximize "human utility" of generation results.
|
||||
|
||||
For KTO data preparation, please use the script [prepare_kto_dataset.sh](./examples/data_preparation_scripts/prepare_kto_dataset.sh). You will need preference data, different from DPO and its derivatives, you no longer need a pair of chosen/rejected response for the same input. You only need data whose response is associated with a preference label--- whether the response is okay or not, read the papre for more details. You also need to convert your data to the following intermediate format before you run the data preparation script.
|
||||
|
||||
```jsonl
|
||||
{
|
||||
"prompt": [
|
||||
{"from": "human", "content": "What are some cuss words in english?"},
|
||||
{
|
||||
"from": "assistant",
|
||||
"content": "Here's an incomplete list.\n\nAss, dick, bugger, crap, ...",
|
||||
},
|
||||
{"from": "human", "content": "What's your favorite one?"},
|
||||
],
|
||||
"completion": {"from": "assistant", "content": "Ass."}, # the completion must contain a single line from the assistant.
|
||||
"label": False, # whether the response is favorable or not
|
||||
}
|
||||
```
|
||||
|
||||
For training, use the [train_kto.sh](./examples/training_scripts/train_orpo.sh) script, You may need to set the value for `beta` (which determine how strongly the reinforcement learning loss affect the training), `desirable_weight` and `undesirable_weight` if your data is biased (has unequal number of chosen and rejected samples).
|
||||
|
||||
#### KTO Result
|
||||
<p align="center">
|
||||
|
|
|
@ -5,9 +5,9 @@ rm -rf $SAVE_DIR/jsonl
|
|||
rm -rf $SAVE_DIR/arrow
|
||||
|
||||
python prepare_dataset.py --type kto \
|
||||
--data_input_dirs /home/nvme-share/home/yeanbang/data/dataset/hh_rlhf/kto_format/data \
|
||||
--conversation_template_config /home/nvme-share/home/yeanbang/ColossalAI/applications/ColossalChat/config/conversation_template/llama2.json \
|
||||
--tokenizer_dir "/home/nvme-share/share/models/Sheared-LLaMA-1.3B" \
|
||||
--data_input_dirs /PATH/TO/KTO/DATASET \
|
||||
--conversation_template_config /PATH/TO/CHAT/TEMPLATE/CONFIG.json \
|
||||
--tokenizer_dir "" \
|
||||
--data_cache_dir $SAVE_DIR/cache \
|
||||
--data_jsonl_output_dir $SAVE_DIR/jsonl \
|
||||
--data_arrow_output_dir $SAVE_DIR/arrow \
|
||||
|
|
|
@ -10,4 +10,5 @@ python prepare_dataset.py --type preference \
|
|||
--tokenizer_dir "" \
|
||||
--data_cache_dir $SAVE_DIR/cache \
|
||||
--data_jsonl_output_dir $SAVE_DIR/jsonl \
|
||||
--data_arrow_output_dir $SAVE_DIR/arrow
|
||||
--data_arrow_output_dir $SAVE_DIR/arrow \
|
||||
--max_length 1024
|
||||
|
|
|
@ -10,4 +10,5 @@ python prepare_dataset.py --type prompt \
|
|||
--tokenizer_dir "" \
|
||||
--data_cache_dir $SAVE_DIR/cache \
|
||||
--data_jsonl_output_dir $SAVE_DIR/jsonl \
|
||||
--data_arrow_output_dir $SAVE_DIR/arrow
|
||||
--data_arrow_output_dir $SAVE_DIR/arrow \
|
||||
--max_length 1024
|
||||
|
|
|
@ -11,3 +11,4 @@ python prepare_dataset.py --type sft \
|
|||
--data_cache_dir $SAVE_DIR/cache \
|
||||
--data_jsonl_output_dir $SAVE_DIR/jsonl \
|
||||
--data_arrow_output_dir $SAVE_DIR/arrow \
|
||||
--max_length 4096
|
||||
|
|
|
@ -16,23 +16,23 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
|
|||
set_n_least_used_CUDA_VISIBLE_DEVICES 4
|
||||
|
||||
PROJECT_NAME="kto"
|
||||
PARENT_SAVE_DIR="/home/nvme-share/home/yeanbang/data/experiments/kto/checkpoint" # Path to a folder to save checkpoints
|
||||
PARENT_TENSORBOARD_DIR="/home/nvme-share/home/yeanbang/data/experiments/kto/log" # Path to a folder to save logs
|
||||
PARENT_CONFIG_FILE="/home/nvme-share/home/yeanbang/data/experiments/kto/log" # Path to a folder to save training config logs
|
||||
PRETRAINED_MODEL_PATH="/home/nvme-share/home/yeanbang/data/model/hh_rlhf_sheared_llamasft-2024-07-17-07-29-29/modeling" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="/home/nvme-share/share/models/Sheared-LLaMA-1.3B" # huggingface or local tokenizer path
|
||||
PARENT_SAVE_DIR="" # Path to a folder to save checkpoints
|
||||
PARENT_TENSORBOARD_DIR="" # Path to a folder to save logs
|
||||
PARENT_CONFIG_FILE="" # Path to a folder to save training config logs
|
||||
PRETRAINED_MODEL_PATH="" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="" # huggingface or local tokenizer path
|
||||
|
||||
declare -a dataset=(
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00000
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00001
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00002
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00003
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00004
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00005
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00006
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00007
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00008
|
||||
/home/nvme-share/home/yeanbang/data/experiments/kto/arrow/part-00009
|
||||
/Your/KTO/Data/arrow/part-00000
|
||||
/Your/KTO/Data/arrow/part-00001
|
||||
/Your/KTO/Data/arrow/part-00002
|
||||
/Your/KTO/Data/arrow/part-00003
|
||||
/Your/KTO/Data/arrow/part-00004
|
||||
/Your/KTO/Data/arrow/part-00005
|
||||
/Your/KTO/Data/arrow/part-00006
|
||||
/Your/KTO/Data/arrow/part-00007
|
||||
/Your/KTO/Data/arrow/part-00008
|
||||
/Your/KTO/Data/arrow/part-00009
|
||||
)
|
||||
|
||||
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
|
||||
|
|
|
@ -15,22 +15,22 @@ set_n_least_used_CUDA_VISIBLE_DEVICES() {
|
|||
|
||||
set_n_least_used_CUDA_VISIBLE_DEVICES 4
|
||||
PROJECT_NAME="sft"
|
||||
PARENT_SAVE_DIR="/home/nvme-share/home/yeanbang/data/model/hh_rlhf_sheared_llama" # Path to a folder to save checkpoints
|
||||
PARENT_TENSORBOARD_DIR="/home/nvme-share/home/yeanbang/data/experiments/sft/log" # Path to a folder to save logs
|
||||
PARENT_CONFIG_FILE="/home/nvme-share/home/yeanbang/data/experiments/kto/log" # Path to a folder to save training config logs
|
||||
PRETRAINED_MODEL_PATH="/home/nvme-share/share/models/Sheared-LLaMA-1.3B" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="/home/nvme-share/share/models/Sheared-LLaMA-1.3B" # huggingface or local tokenizer path
|
||||
PARENT_SAVE_DIR="" # Path to a folder to save checkpoints
|
||||
PARENT_TENSORBOARD_DIR="" # Path to a folder to save logs
|
||||
PARENT_CONFIG_FILE="" # Path to a folder to save training config logs
|
||||
PRETRAINED_MODEL_PATH="" # huggingface or local model path
|
||||
PRETRAINED_TOKENIZER_PATH="" # huggingface or local tokenizer path
|
||||
declare -a dataset=(
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00000
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00001
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00002
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00003
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00004
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00005
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00006
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00007
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00008
|
||||
/home/nvme-share/home/yeanbang/data/experiments/sft/arrow/part-00009
|
||||
/Your/SFT/Data/arrow/part-00000
|
||||
/Your/SFT/Data/arrow/part-00001
|
||||
/Your/SFT/Data/arrow/part-00002
|
||||
/Your/SFT/Data/arrow/part-00003
|
||||
/Your/SFT/Data/arrow/part-00004
|
||||
/Your/SFT/Data/arrow/part-00005
|
||||
/Your/SFT/Data/arrow/part-00006
|
||||
/Your/SFT/Data/arrow/part-00007
|
||||
/Your/SFT/Data/arrow/part-00008
|
||||
/Your/SFT/Data/arrow/part-00009
|
||||
)
|
||||
|
||||
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
|
||||
|
|
Loading…
Reference in New Issue