[hotfix] fix gpt gemini example (#2404)

* [hotfix] fix gpt gemini example

* [example] add new assertions
pull/2405/head
HELSON 2023-01-09 15:52:17 +08:00 committed by GitHub
parent 9880fd2cd8
commit 498b5ca993
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 14 deletions

View File

@ -1,4 +1,5 @@
for MODEL_TYPE in "gpt2_medium"; do for MODEL_TYPE in "gpt2_medium"; do
for DISPAN in "colossalai"; do
for BATCH_SIZE in 16; do for BATCH_SIZE in 16; do
for GPUNUM in 1 2 4 8; do for GPUNUM in 1 2 4 8; do
for TPDEGREE in 1 2 4 8; do for TPDEGREE in 1 2 4 8; do
@ -7,9 +8,9 @@ for MODEL_TYPE in "gpt2_medium"; do
fi fi
for PLACEMENT in "cpu" "auto"; do for PLACEMENT in "cpu" "auto"; do
echo "****************** Begin ***************************" echo "****************** Begin ***************************"
echo "* benchmrking MODEL_TYPE ${MODEL_TYPE} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE} PLACEMENT ${PLACEMENT}" echo "+ benchmrking MODEL ${MODEL_TYPE} DISPAN ${DISPAN} GPU ${GPUNUM} BS ${BATCH_SIZE} TP ${TPDEGREE} POLICY ${PLACEMENT}"
MODEL_TYPE=${MODEL_TYPE} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} PLACEMENT=${PLACEMENT} \ MODEL_TYPE=${MODEL_TYPE} DISPAN=${DISPAN} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} PLACEMENT=${PLACEMENT} \
bash ./gemini/run_gemini.sh bash ./run_gemini.sh
echo "****************** Finished ***************************" echo "****************** Finished ***************************"
echo "" echo ""
echo "" echo ""
@ -17,4 +18,5 @@ for MODEL_TYPE in "gpt2_medium"; do
done done
done done
done done
done
done done

View File

@ -270,6 +270,7 @@ def main():
tp_pg = ProcessGroup(tp_degree=args.tp_degree) tp_pg = ProcessGroup(tp_degree=args.tp_degree)
# Tensor Parallelism (TP) # Tensor Parallelism (TP)
# You should notice that v0.1.10 is not compatible with TP degree > 1
tensor_parallelize(model, tp_pg) tensor_parallelize(model, tp_pg)
# build a Gemini model and a highly optimized cpu optimizer # build a Gemini model and a highly optimized cpu optimizer
@ -278,6 +279,7 @@ def main():
logger.info(get_mem_info(prefix='After init optim, '), ranks=[0]) logger.info(get_mem_info(prefix='After init optim, '), ranks=[0])
else: else:
assert args.tp_degree == 1, "The degree of TP should be 1 for DDP examples."
model = model_builder(args.model_type)(checkpoint=True).cuda() model = model_builder(args.model_type)(checkpoint=True).cuda()
if args.distplan.startswith("torch"): if args.distplan.startswith("torch"):