ColossalAI/examples/language/gpt/gemini/run_gemini.sh

set -x
# distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"]
export DISTPLAN=${DISTPLAN:-"colossalai"}

# The following options only valid when DISTPLAN="colossalai"
export GPUNUM=${GPUNUM:-1}
export TPDEGREE=${TPDEGREE:-1}
export PLACEMENT=${PLACEMENT:-"cpu"}
export USE_SHARD_INIT=${USE_SHARD_INIT:-False}
export BATCH_SIZE=${BATCH_SIZE:-16}
export MODEL_TYPE=${MODEL_TYPE:-"gpt2_medium"}

# export PYTHONPATH=$PWD:$PYTHONPATH

mkdir -p gemini_logs

torchrun --standalone --nproc_per_node=${GPUNUM} ./train_gpt_demo.py \
--tp_degree=${TPDEGREE} \
--model_type=${MODEL_TYPE} \
--batch_size=${BATCH_SIZE} \
--placement=${PLACEMENT} \
--shardinit=${USE_SHARD_INIT} \
--distplan=${DISTPLAN} \
2>&1 | tee ./gemini_logs/${MODEL_TYPE}_${DISTPLAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}_${PLACEMENT}.log
[example] make gpt example directory more clear (#2353) 2 years ago			`set -x`
[example] add zero1, zero2 example in GPT examples (#2146) * [example] add zero1 and zero2 for GPT * update readme in gpt example * polish code * change init value * update readme 2 years ago			`# distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"]`
[hotfix] add DISTPAN argument for benchmark (#2412) * change the benchmark config file * change config * revert config file * rename distpan to distplan 2 years ago			`export DISTPLAN=${DISTPLAN:-"colossalai"}`
[example] enhance GPT demo (#1959) * [example] enhence GPT demo * Update README.md Co-authored-by: binmakeswell <binmakeswell@gmail.com> 2 years ago
[hotfix] add DISTPAN argument for benchmark (#2412) * change the benchmark config file * change config * revert config file * rename distpan to distplan 2 years ago			`# The following options only valid when DISTPLAN="colossalai"`
[example] add benchmark.sh for gpt (#2226) 2 years ago			`export GPUNUM=${GPUNUM:-1}`
[example] fix benchmark.sh for gpt example (#2229) 2 years ago			`export TPDEGREE=${TPDEGREE:-1}`
[example] fix gpt example with 0.1.10 (#2265) 2 years ago			`export PLACEMENT=${PLACEMENT:-"cpu"}`
[example] fix benchmark.sh for gpt example (#2229) 2 years ago			`export USE_SHARD_INIT=${USE_SHARD_INIT:-False}`
			`export BATCH_SIZE=${BATCH_SIZE:-16}`
			`export MODEL_TYPE=${MODEL_TYPE:-"gpt2_medium"}`
[example] enhance GPT demo (#1959) * [example] enhence GPT demo * Update README.md Co-authored-by: binmakeswell <binmakeswell@gmail.com> 2 years ago
[example] make gpt example directory more clear (#2353) 2 years ago			`# export PYTHONPATH=$PWD:$PYTHONPATH`

[example] GPT polish readme (#2274) 2 years ago			`mkdir -p gemini_logs`
[example] make gpt example directory more clear (#2353) 2 years ago
			`torchrun --standalone --nproc_per_node=${GPUNUM} ./train_gpt_demo.py \`
[example] update gemini benchmark bash (#2306) 2 years ago			`--tp_degree=${TPDEGREE} \`
			`--model_type=${MODEL_TYPE} \`
			`--batch_size=${BATCH_SIZE} \`
			`--placement=${PLACEMENT} \`
			`--shardinit=${USE_SHARD_INIT} \`
[hotfix] add DISTPAN argument for benchmark (#2412) * change the benchmark config file * change config * revert config file * rename distpan to distplan 2 years ago			`--distplan=${DISTPLAN} \`
			`2>&1 \| tee ./gemini_logs/${MODEL_TYPE}_${DISTPLAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}_${PLACEMENT}.log`