ColossalAI/applications/ColossalMoE/train.sh

39 lines
785 B
Bash
Raw Normal View History

2023-12-14 09:52:05 +00:00
2023-12-15 08:38:51 +00:00
NUM_GPU=8
2023-12-14 09:52:05 +00:00
MODEL="8b"
SEQ_LENGTH=2048
BATCH_SIZE=1
LR=0.00001
# ep zero
# torchrun --standalone --nproc_per_node $NUM_GPU train.py \
# --num_epoch 1 \
# --model_name $MODEL \
# --plugin "ep_zero" \
# --batch_size $BATCH_SIZE \
# --lr $LR \
# --zero_stage 1 \
# --extra_dp_size 2
# ep
2023-12-15 08:38:51 +00:00
torchrun --standalone --nproc_per_node $NUM_GPU train.py \
2023-12-14 09:52:05 +00:00
--num_epoch 1 \
--model_name $MODEL \
2023-12-15 08:38:51 +00:00
--plugin "ep" \
2023-12-14 09:52:05 +00:00
--batch_size $BATCH_SIZE \
--lr $LR \
2023-12-15 08:38:51 +00:00
--zero_stage 2
2023-12-14 09:52:05 +00:00
# hybrid
# torchrun --standalone --nproc_per_node $NUM_GPU train.py \
# --num_epoch 1 \
# --model_name $MODEL \
# --plugin "hybrid" \
# --batch_size $BATCH_SIZE \
# --lr $LR \
# --zero_stage 1 \
# --pp_size 2 \
# --dp_size 1 \
# --ep_size 2 \