mirror of https://github.com/hpcaitech/ColossalAI
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
809 B
41 lines
809 B
1 year ago
|
#!/bin/bash
|
||
|
|
||
|
set -xue
|
||
|
|
||
|
NUM_GPU=8
|
||
|
MODEL="8b"
|
||
|
SEQ_LENGTH=2048
|
||
|
BATCH_SIZE=1
|
||
|
LR=0.00001
|
||
|
|
||
|
# ep zero
|
||
|
torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
||
|
--num_epoch 1 \
|
||
|
--model_name $MODEL \
|
||
|
--plugin "ep_zero" \
|
||
|
--batch_size $BATCH_SIZE \
|
||
|
--lr $LR \
|
||
|
--zero_stage 1 \
|
||
|
--extra_dp_size 2
|
||
|
|
||
|
# ep
|
||
|
# torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
||
|
# --num_epoch 1 \
|
||
|
# --model_name $MODEL \
|
||
|
# --plugin "ep_zero" \
|
||
|
# --batch_size $BATCH_SIZE \
|
||
|
# --lr $LR \
|
||
|
# --zero_stage 1
|
||
|
|
||
|
# hybrid
|
||
|
# torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
||
|
# --num_epoch 1 \
|
||
|
# --model_name $MODEL \
|
||
|
# --plugin "hybrid" \
|
||
|
# --batch_size $BATCH_SIZE \
|
||
|
# --lr $LR \
|
||
|
# --zero_stage 1 \
|
||
|
# --pp_size 2 \
|
||
|
# --dp_size 1 \
|
||
|
# --ep_size 2 \
|