Add deepspeed finetuning scripts

2023-04-10 17:28:27 +08:00 · 2023-04-10 17:28:27 +08:00 · 47a5ec121e
parent cbb9f44e30
commit 47a5ec121e
3 changed files with 53 additions and 0 deletions
--- a/ptuning/arguments.py
+++ b/ptuning/arguments.py
@ -11,6 +11,9 @@ class ModelArguments:
    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
+    ptuning_checkpoint: str = field(
+        default=None, metadata={"help": "Path to p-tuning v2 checkpoints"}
+    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
--- a/ptuning/deepspeed.json
+++ b/ptuning/deepspeed.json
@ -0,0 +1,21 @@
+{
+  "train_micro_batch_size_per_gpu": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "initial_scale_power": 16,
+    "loss_scale_window": 1000,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": false,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients" : true
+  }
+}
--- a/ptuning/ds_train_finetune.sh
+++ b/ptuning/ds_train_finetune.sh
@ -0,0 +1,29 @@
+
+LR=1e-4
+
+MASTER_PORT=$(shuf -n 1 -i 10000-65535)
+MASTER_PORT=50003
+
+deepspeed --num_gpus=3 --master_port $MASTER_PORT main.py \
+    --deepspeed deepspeed.json \
+    --do_train \
+    --train_file AdvertiseGen/train.json \
+    --test_file AdvertiseGen/dev.json \
+    --prompt_column content \
+    --response_column summary \
+    --overwrite_cache \
+    --model_name_or_path THUDM/chatglm-6b \
+    --output_dir ./output/adgen-chatglm-6b-ft-$LR \
+    --overwrite_output_dir \
+    --max_source_length 64 \
+    --max_target_length 64 \
+    --per_device_train_batch_size 4 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --predict_with_generate \
+    --max_steps 5000 \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate $LR \
+    --fp16
+