mirror of https://github.com/THUDM/ChatGLM-6B
Add deepspeed finetuning scripts
parent
cbb9f44e30
commit
47a5ec121e
|
@ -11,6 +11,9 @@ class ModelArguments:
|
|||
model_name_or_path: str = field(
|
||||
metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
|
||||
)
|
||||
ptuning_checkpoint: str = field(
|
||||
default=None, metadata={"help": "Path to p-tuning v2 checkpoints"}
|
||||
)
|
||||
config_name: Optional[str] = field(
|
||||
default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
|
||||
)
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"train_micro_batch_size_per_gpu": "auto",
|
||||
"zero_allow_untested_optimizer": true,
|
||||
"fp16": {
|
||||
"enabled": "auto",
|
||||
"loss_scale": 0,
|
||||
"initial_scale_power": 16,
|
||||
"loss_scale_window": 1000,
|
||||
"hysteresis": 2,
|
||||
"min_loss_scale": 1
|
||||
},
|
||||
"zero_optimization": {
|
||||
"stage": 2,
|
||||
"allgather_partitions": true,
|
||||
"allgather_bucket_size": 5e8,
|
||||
"overlap_comm": false,
|
||||
"reduce_scatter": true,
|
||||
"reduce_bucket_size": 5e8,
|
||||
"contiguous_gradients" : true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
LR=1e-4
|
||||
|
||||
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
|
||||
MASTER_PORT=50003
|
||||
|
||||
deepspeed --num_gpus=3 --master_port $MASTER_PORT main.py \
|
||||
--deepspeed deepspeed.json \
|
||||
--do_train \
|
||||
--train_file AdvertiseGen/train.json \
|
||||
--test_file AdvertiseGen/dev.json \
|
||||
--prompt_column content \
|
||||
--response_column summary \
|
||||
--overwrite_cache \
|
||||
--model_name_or_path THUDM/chatglm-6b \
|
||||
--output_dir ./output/adgen-chatglm-6b-ft-$LR \
|
||||
--overwrite_output_dir \
|
||||
--max_source_length 64 \
|
||||
--max_target_length 64 \
|
||||
--per_device_train_batch_size 4 \
|
||||
--per_device_eval_batch_size 1 \
|
||||
--gradient_accumulation_steps 4 \
|
||||
--predict_with_generate \
|
||||
--max_steps 5000 \
|
||||
--logging_steps 10 \
|
||||
--save_steps 1000 \
|
||||
--learning_rate $LR \
|
||||
--fp16
|
||||
|
Loading…
Reference in New Issue