mirror of https://github.com/hpcaitech/ColossalAI
[Doc] enhancement on README.md for chat examples (#3646)
* Add RoBERTa for RLHF Stage 2 & 3 (test) RoBERTa for RLHF Stage 2 & 3 (still in testing) Revert "Add RoBERTa for RLHF Stage 2 & 3 (test)" This reverts commitpull/3662/head06741d894d
. Add RoBERTa for RLHF stage 2 & 3 1. add roberta folder under model folder 2. add roberta option in train_reward_model.py 3. add some test in testci Update test_ci.sh Revert "Update test_ci.sh" This reverts commit 9c7352b81766f3177d31eeec0ec178a301df966a. Add RoBERTa for RLHF Stage 2 & 3 (test) RoBERTa for RLHF Stage 2 & 3 (still in testing) Revert "Add RoBERTa for RLHF Stage 2 & 3 (test)" This reverts commit06741d894d
. Add RoBERTa for RLHF stage 2 & 3 1. add roberta folder under model folder 2. add roberta option in train_reward_model.py 3. add some test in testci Update test_ci.sh Revert "Update test_ci.sh" This reverts commit 9c7352b81766f3177d31eeec0ec178a301df966a. update roberta with coati chat ci update Revert "chat ci update" This reverts commit 17ae7ae01fa752bd3289fc39069868fde99cf846. * Update README.md Update README.md * update readme * Update test_ci.sh
parent
2a951955ad
commit
8bccb72c8d
|
@ -146,11 +146,15 @@ torchrun --standalone --nproc_per_node=4 train_prompts.py \
|
|||
--pretrain "/path/to/LLaMa-7B/" \
|
||||
--model 'llama' \
|
||||
--strategy colossalai_zero2 \
|
||||
--prompt_path /path/to/your/prompt_dataset \
|
||||
--prompt_dataset /path/to/your/prompt_dataset \
|
||||
--pretrain_dataset /path/to/your/pretrain_dataset \
|
||||
--rm_pretrain /your/pretrain/rm/defination \
|
||||
--rm_path /your/rm/model/path
|
||||
```
|
||||
|
||||
Prompt dataset: the instruction dataset mentioned in the above figure which includes the instructions, e.g. you can use [seed_prompts_ch.jsonl](https://github.com/XueFuzhao/InstructionWild/blob/main/data/seed_prompts_ch.jsonl) or [seed_prompts_en.jsonl](https://github.com/XueFuzhao/InstructionWild/blob/main/data/seed_prompts_en.jsonl) in InstructionWild.
|
||||
Pretrain dataset: the pretrain dataset including the instruction and corresponding response, e.g. you can use the [InstructWild Data](https://github.com/XueFuzhao/InstructionWild/tree/main/data) in stage 1 supervised instructs tuning.
|
||||
|
||||
### Arg List
|
||||
- --strategy: the strategy using for training, choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'], default='naive'
|
||||
- --model: model type of actor, choices=['gpt2', 'bloom', 'opt', 'llama'], default='bloom'
|
||||
|
@ -159,7 +163,7 @@ torchrun --standalone --nproc_per_node=4 train_prompts.py \
|
|||
- --rm_pretrain: pretrain model for reward model, type=str, default=None
|
||||
- --rm_path: the path of rm model, type=str, default=None
|
||||
- --save_path: path to save the model, type=str, default='output'
|
||||
- --prompt_path: path of the prompt dataset, type=str, default=None
|
||||
- --prompt_dataset: path of the prompt dataset, type=str, default=None
|
||||
- --pretrain_dataset: path of the ptx dataset, type=str, default=None
|
||||
- --need_optim_ckpt: whether to save optim ckpt, type=bool, default=False
|
||||
- --num_episodes: num of episodes for training, type=int, default=10
|
||||
|
|
|
@ -99,7 +99,7 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_reward_model.py \
|
|||
|
||||
rm -rf ${BASE}/rm_ckpt.pt
|
||||
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_path $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||
--strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
|
||||
--update_timesteps 2 --max_epochs 1 --train_batch_size 2 \
|
||||
--pretrain 'facebook/opt-350m' --model opt \
|
||||
|
@ -108,7 +108,7 @@ torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_path
|
|||
--save_path ${BASE}/actor_checkpoint_prompts.pt
|
||||
rm -rf ${BASE}/rm_ckpt_opt.pt
|
||||
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_path $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||
torchrun --standalone --nproc_per_node=2 ${BASE}/train_prompts.py --prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
|
||||
--strategy colossalai_zero2 --num_episodes 1 --max_timesteps 2 \
|
||||
--update_timesteps 2 --max_epochs 1 --train_batch_size 2 \
|
||||
--pretrain 'gpt2' --model gpt2 \
|
||||
|
|
|
@ -139,7 +139,7 @@ def main(args):
|
|||
|
||||
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)
|
||||
|
||||
prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_path, max_datasets_size=16384)
|
||||
prompt_dataset = PromptDataset(tokenizer=tokenizer, data_path=args.prompt_dataset, max_datasets_size=16384)
|
||||
if dist.is_initialized() and dist.get_world_size() > 1:
|
||||
prompt_sampler = DistributedSampler(prompt_dataset, shuffle=True, seed=42, drop_last=True)
|
||||
else:
|
||||
|
@ -204,7 +204,7 @@ def main(args):
|
|||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--prompt_path', type=str, default=None, help='path to the prompt dataset')
|
||||
parser.add_argument('--prompt_dataset', type=str, default=None, help='path to the prompt dataset')
|
||||
parser.add_argument('--pretrain_dataset', type=str, default=None, help='path to the pretrained dataset')
|
||||
parser.add_argument('--strategy',
|
||||
choices=['naive', 'ddp', 'colossalai_gemini', 'colossalai_zero2'],
|
||||
|
|
|
@ -17,4 +17,4 @@ set_n_least_used_CUDA_VISIBLE_DEVICES 2
|
|||
|
||||
# torchrun --standalone --nproc_per_node=2 train_prompts.py prompts.csv --strategy colossalai_zero2
|
||||
|
||||
torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_path /path/to/data.json --strategy colossalai_zero2
|
||||
torchrun --standalone --nproc_per_node=2 train_prompts.py --prompt_dataset /path/to/data.json --strategy colossalai_zero2
|
||||
|
|
Loading…
Reference in New Issue