@ -126,8 +126,8 @@ gradient_accumulation_steps=1
```
learning_rate=5e-4
per_device_train_batch_size=1
gradient_accumulation_steps=16
per_device_train_batch_size=16
gradient_accumulation_steps=1
@ -2,7 +2,6 @@
LR=1e-4
MASTER_PORT=$(shuf -n 1 -i 10000-65535)
MASTER_PORT=50003
deepspeed --num_gpus=4 --master_port $MASTER_PORT main.py \
--deepspeed deepspeed.json \