ColossalAI/applications/ColossalChat/benchmarks/benchmark_ppo.sh

120 lines
3.9 KiB
Bash
Raw Normal View History

[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com>
2024-03-29 06:12:29 +00:00
#!/usr/bin/env bash
set_n_least_used_CUDA_VISIBLE_DEVICES() {
local n=${1:-"9999"}
echo "GPU Memory Usage:"
local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv |
tail -n +2 |
nl -v 0 |
tee /dev/tty |
sort -g -k 2 |
awk '{print $1}' |
head -n $n)
export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
echo "Now CUDA_VISIBLE_DEVICES is set to:"
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
}
set_n_least_used_CUDA_VISIBLE_DEVICES 8
set -xu
NUM_RETRY=3
BASE_DIR=$(dirname $(dirname $(realpath $BASH_SOURCE)))
EXAMPLES_DIR=$BASE_DIR/examples
TEMP_DIR=$BASE_DIR/temp
MODEL_SAVE_PATH=$TEMP_DIR/rlhf_models
MODELS_DIR=$TEMP_DIR/models_config
# To benchmark different models, change the following line
# MODELS=('125m' '350m' '700m' '1.3b' '2.7b' '3.5b' '5.5b' '6.7b' '10b' '13b')
MODELS=('125m')
# To benchmark different strategies, change the following line
# PLUGINS=('zero2', 'zero2_cpu', '3d')
PLUGINS=('zero2')
LORA_RANK=('0')
export OMP_NUM_THREADS=8
rm ./benchmark_memory_consumption.txt
rm ./benchmark_performance_summarization.txt
# install requirements
pip install -r $EXAMPLES_DIR/requirements.txt
random_choice() {
local arr=("$@")
local len=${#arr[@]}
local idx=$((RANDOM % len))
echo ${arr[$idx]}
}
echo "[Test]: testing ppo ..."
SKIPPED_TESTS=(
)
GRAD_CKPTS=('' '--grad_checkpoint')
GRAD_CKPTS=('')
for lora_rank in ${LORA_RANK[@]}; do
for model in ${MODELS[@]}; do
plugins=($(shuf -e "${PLUGINS[@]}"))
for plugin in ${plugins[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$plugin-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$plugin-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$plugin " ]]; then
echo "[Test]: Skipped $model-$plugin"
continue
fi
pretrain=$model
tokenizer_dir="facebook/opt-125m"
grad_ckpt=$(random_choice "${GRAD_CKPTS[@]}")
tp='1'
if [[ $plugin == "3d" ]]; then
tp='4'
fi
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$plugin-$lora_rank, attempt $i"
declare -a prompt_dataset=()
for split in $(seq -f "%05g" 0 9); do
prompt_dataset+=("$TEMP_DIR/benchmark/arrow/part-$split")
done
colossalai run --nproc_per_node 8 --master_port 28547 $BASE_DIR/benchmarks/benchmark_ppo.py \
--pretrain $pretrain \
--tokenizer_dir $tokenizer_dir \
--prompt_dataset ${prompt_dataset[@]} \
--ptx_coef 0 \
--save_path $MODEL_SAVE_PATH \
--conversation_template_config ./Opt.json \
--lora_rank $lora_rank \
--plugin $plugin \
--num_episodes 5 \
--num_collect_steps 1 \
--num_update_steps 1 \
--max_seq_len 128 \
--max_length 512 \
--experience_batch_size 32 \
--train_batch_size 32 \
--accumulation_steps 1 \
--lr 9e-6 \
--mixed_precision "bf16" \
--grad_clip 1.0 \
--use_flash_attn \
--tp $tp \
--lr 2e-5 \
$grad_ckpt
passed=$?
if [ $passed -eq 0 ]; then
rm -rf $MODEL_SAVE_PATH/*
rm -rf $MODELS_DIR/*
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed $model-$plugin-$lora_rank"
exit 1
fi
done
done
done