mirror of https://github.com/hpcaitech/ColossalAI
update script
parent
f66469e209
commit
ebd8cc579a
|
@ -1,6 +1,6 @@
|
||||||
|
|
||||||
|
|
||||||
NUM_GPU=4
|
NUM_GPU=8
|
||||||
MODEL="8b"
|
MODEL="8b"
|
||||||
SEQ_LENGTH=2048
|
SEQ_LENGTH=2048
|
||||||
BATCH_SIZE=1
|
BATCH_SIZE=1
|
||||||
|
@ -17,13 +17,13 @@ LR=0.00001
|
||||||
# --extra_dp_size 2
|
# --extra_dp_size 2
|
||||||
|
|
||||||
# ep
|
# ep
|
||||||
CUDA_LAUNCH_BLOCKING=1 torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
||||||
--num_epoch 1 \
|
--num_epoch 1 \
|
||||||
--model_name $MODEL \
|
--model_name $MODEL \
|
||||||
--plugin "ep_zero" \
|
--plugin "ep" \
|
||||||
--batch_size $BATCH_SIZE \
|
--batch_size $BATCH_SIZE \
|
||||||
--lr $LR \
|
--lr $LR \
|
||||||
--zero_stage 1
|
--zero_stage 2
|
||||||
|
|
||||||
# hybrid
|
# hybrid
|
||||||
# torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
# torchrun --standalone --nproc_per_node $NUM_GPU train.py \
|
||||||
|
|
Loading…
Reference in New Issue