ColossalAI/applications/ColossalMoE/train.sh

20 lines
419 B
Bash
Raw Normal View History

2023-12-15 08:38:51 +00:00
NUM_GPU=8
2023-12-26 09:33:32 +00:00
MODEL="mistralai/Mixtral-8x7B-v0.1"
2023-12-14 09:52:05 +00:00
SEQ_LENGTH=2048
BATCH_SIZE=1
LR=0.00001
2023-12-26 09:32:59 +00:00
# hybrid
# torchrun --standalone --nproc_per_node $NUM_GPU \
colossalai run --nproc_per_node $NUM_GPU --hostfile "hostfile" \
train.py \
2023-12-14 09:52:05 +00:00
--num_epoch 1 \
--model_name $MODEL \
2023-12-26 09:32:59 +00:00
--plugin "hybrid" \
2023-12-14 09:52:05 +00:00
--batch_size $BATCH_SIZE \
--lr $LR \
2023-12-26 09:32:59 +00:00
--zero_stage 1 \
--pp_size 2 \
--dp_size 1 \
--ep_size 8 \