mirror of https://github.com/hpcaitech/ColossalAI
[example] fix benchmark.sh for gpt example (#2229)
parent
78483a9fdd
commit
31fe84237b
|
@ -1,6 +1,6 @@
|
|||
for MODEL_NAME in "GPT2small"
|
||||
for MODEL_TYPE in "gpt2_medium"
|
||||
do
|
||||
for BATCH_SIZE in 8
|
||||
for BATCH_SIZE in 16
|
||||
do
|
||||
for GPUNUM in 1 2 4 8
|
||||
do
|
||||
|
@ -11,8 +11,8 @@ then
|
|||
continue
|
||||
fi
|
||||
echo "****************** Begin ***************************"
|
||||
echo "* benchmrking MODEL_NAME ${MODEL_NAME} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE}"
|
||||
bash ./run.sh
|
||||
echo "* benchmrking MODEL_TYPE ${MODEL_TYPE} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE}"
|
||||
MODEL_TYPE=${MODEL_TYPE} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} bash ./run.sh
|
||||
echo "****************** Finished ***************************"
|
||||
echo ""
|
||||
echo ""
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
# distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"]
|
||||
export DISTPAN={$DISTPAN:-"colossalai"}
|
||||
export DISTPAN=${DISTPAN:-"colossalai"}
|
||||
|
||||
# The following options only valid when DISTPAN="colossalai"
|
||||
export TPDEGREE=${TPDEGREE:-1}
|
||||
export GPUNUM=${GPUNUM:-1}
|
||||
export PLACEMENT=${PLACEMENT:'const'}
|
||||
export USE_SHARD_INIT=${USE_SHARD_INIT:False}
|
||||
export BATCH_SIZE=${BATCH_SIZE:-8}
|
||||
export MODEL_TYPE=${MODEL_TYPE:"gpt2_medium"}
|
||||
export TPDEGREE=${TPDEGREE:-1}
|
||||
export PLACEMENT=${PLACEMENT:-"const"}
|
||||
export USE_SHARD_INIT=${USE_SHARD_INIT:-False}
|
||||
export BATCH_SIZE=${BATCH_SIZE:-16}
|
||||
export MODEL_TYPE=${MODEL_TYPE:-"gpt2_medium"}
|
||||
|
||||
mkdir -p logs
|
||||
torchrun --standalone --nproc_per_node=${GPUNUM} train_gpt_demo.py --tp_degree=${TPDEGREE} --model_type=${MODEL_TYPE} --batch_size=${BATCH_SIZE} --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee ./logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}.log
|
||||
|
|
Loading…
Reference in New Issue