ColossalAI/examples/language/openmoe/benchmark/benchmark_cai_dist.sh

48 lines
1005 B
Bash
Executable File

#!/bin/bash
set -xue
NUM_GPU=8
MODEL="8b"
SEQ_LENGTH=2048
WARMUP=20
ACTIVE=4
# HACK: make model importable
example_dir=$(dirname $(realpath $(dirname $0)))
if [ -z ${PYTHONPATH+x} ]; then
export PYTHONPATH=$example_dir
else
export PYTHONPATH=$example_dir:$PYTHONPATH
fi
# ep
echo -e "\n\n Naive EP \n\n"
colossalai run --nproc_per_node $NUM_GPU --hostfile "hostfile.txt" \
$example_dir/benchmark/benchmark_cai.py \
--model_name $MODEL \
--batch_size 12 \
--seq_length $SEQ_LENGTH \
--warmup $WARMUP \
--active $ACTIVE \
--plugin ep \
--zero_stage 2
# ep_zero
echo -e "\n\n EP-ZERO \n\n"
colossalai run --nproc_per_node $NUM_GPU --hostfile "hostfile.txt" \
$example_dir/benchmark/benchmark_cai.py \
--model_name $MODEL \
--batch_size 20 \
--seq_length $SEQ_LENGTH \
--warmup $WARMUP \
--active $ACTIVE \
--plugin ep_zero \
--use_kernel \
--extra_dp_size 2 \
--zero_stage 1 \
--load_balance \
--overlap_alltoall