ColossalAI/applications/ColossalChat/tests/test_templating.sh


BASE_DIR=$(dirname $(dirname $(realpath $BASH_SOURCE)))
BASE_TEMP_DIR=$BASE_DIR/temp
EXAMPLES_DIR=$BASE_DIR/examples
TEST_DATA_DIR=$BASE_DIR/tests/test_data
DATA_SAVE_PATH=$BASE_TEMP_DIR/tests
CONFIG_DIR=$BASE_DIR/config

# MODELS=("colossal-llama2" "llama2" "mistral" "chatGLM2" "chatGLM3" "deepseek" "Yi" "baichuan")  # for local test
MODELS=("colossal-llama2" "llama2" "chatGLM2" "chatGLM3" "deepseek" "Yi")

get_pretrain() {
    local model=$1
    if [[ $model == "colossal-llama2" ]]; then
        echo "hpcai-tech/Colossal-LLaMA-2-7b-base"
    elif [[ $model == "llama2" ]]; then
        echo "hf-internal-testing/llama-tokenizer"
    elif [[ $model == "phi" ]]; then
        echo "microsoft/phi-2"
    elif [[ $model == "mistral" ]]; then
        echo "mistralai/Mistral-7B-Instruct-v0.3"
    elif [[ $model == "chatGLM2" ]]; then
        echo "THUDM/chatglm2-6b"
    elif [[ $model == "chatGLM3" ]]; then
        echo "THUDM/chatglm3-6b"
    elif [[ $model == "deepseek" ]]; then
        echo "deepseek-ai/DeepSeek-V2-Lite"
    elif [[ $model == "Yi" ]]; then
        echo "01-ai/Yi-1.5-9B-Chat"
    elif [[ $model == "baichuan" ]]; then
        echo "baichuan-inc/Baichuan2-13B-Chat"
    else
        echo "Unknown model $model"
        exit 1
    fi
}


get_conversation_template_config() {
    local model=$1
    if [[ $model == "colossal-llama2" ]]; then
        echo "$CONFIG_DIR/conversation_template/colossal-llama2.json"
    elif [[ $model == "llama2" ]]; then
        echo "$CONFIG_DIR/conversation_template/llama2.json"
    elif [[ $model == "deepseek" ]]; then
        echo "$CONFIG_DIR/conversation_template/deepseek-ai_DeepSeek-V2-Lite.json"
    elif [[ $model == "mistral" ]]; then
        echo "$CONFIG_DIR/conversation_template/mistralai_Mixtral-8x7B-Instruct-v0.1.json"
    elif [[ $model == "chatGLM2" ]]; then
        echo "$CONFIG_DIR/conversation_template/THUDM_chatglm2-6b.json"
    elif [[ $model == "chatGLM3" ]]; then
        echo "$CONFIG_DIR/conversation_template/THUDM_chatglm3-6b.json"
    elif [[ $model == "phi" ]]; then
        echo "$CONFIG_DIR/conversation_template/microsoft_phi-2.json"
    elif [[ $model == "Yi" ]]; then
        echo "$CONFIG_DIR/conversation_template/01-ai_Yi-1.5-9B-Chat.json"
    elif [[ $model == "baichuan" ]]; then
        echo "$CONFIG_DIR/conversation_template/baichuan-inc_Baichuan2-13B-Chat.json"
    else
        echo "Unknown model $model"
        exit 1
    fi
}

# Test SFT data Preparation
for model in ${MODELS[@]}; do
    echo "Testing SFT data templating for $model"
    SAVE_DIR=$DATA_SAVE_PATH/sft/$model
    rm -rf $SAVE_DIR/cache
    rm -rf $SAVE_DIR/jsonl
    rm -rf $SAVE_DIR/arrow
    pretrain=$(get_pretrain $model)
    conversation_template_config=$(get_conversation_template_config $model)
    python $EXAMPLES_DIR/data_preparation_scripts/prepare_dataset.py --type sft --data_input_dirs $TEST_DATA_DIR/sft \
        --tokenizer_dir $pretrain \
        --conversation_template_config $conversation_template_config \
        --data_cache_dir $SAVE_DIR/cache \
        --data_jsonl_output_dir $SAVE_DIR/jsonl \
        --data_arrow_output_dir $SAVE_DIR/arrow
    passed=$?
    if [ $passed -ne 0 ]; then
        echo "[Test]: Failed in the SFT data templating for $model"
        exit 1
    fi
    python $BASE_DIR/tests/verify_chat_data.py --data_source $TEST_DATA_DIR/sft/test_sft_data.jsonl \
        --to_verify_file $SAVE_DIR/jsonl/part-00005.jsonl --data_type sft
    passed=$?
    if [ $passed -ne 0 ]; then
        echo "[Test]: Failed in the SFT data templating test for $model"
        exit 1
    fi
done


# Test DPO/PPO data Preparation
for model in ${MODELS[@]}; do
    echo "Testing DPO/PPO data templating for $model"
    SAVE_DIR=$DATA_SAVE_PATH/dpo/$model
    rm -rf $SAVE_DIR/cache
    rm -rf $SAVE_DIR/jsonl
    rm -rf $SAVE_DIR/arrow
    pretrain=$(get_pretrain $model)
    conversation_template_config=$(get_conversation_template_config $model)
    python $EXAMPLES_DIR/data_preparation_scripts/prepare_dataset.py --type preference --data_input_dirs $TEST_DATA_DIR/dpo \
        --tokenizer_dir  $pretrain \
        --conversation_template_config $conversation_template_config \
        --data_cache_dir $SAVE_DIR/cache \
        --data_jsonl_output_dir $SAVE_DIR/jsonl \
        --data_arrow_output_dir $SAVE_DIR/arrow
    passed=$?
    if [ $passed -ne 0 ]; then
        echo "[Test]: Failed in the DPO data templating for $model"
        exit 1
    fi
    python $BASE_DIR/tests/verify_chat_data.py --data_source $TEST_DATA_DIR/dpo/test_dpo_data.jsonl \
        --to_verify_file $SAVE_DIR/jsonl/part-00005.jsonl --data_type dpo
    passed=$?
    if [ $passed -ne 0 ]; then
        echo "[Test]: Failed in the DPO data templating test for $model"
        exit 1
    fi
done
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00
			`BASE_DIR=$(dirname $(dirname $(realpath $BASH_SOURCE)))`
			`BASE_TEMP_DIR=$BASE_DIR/temp`
			`EXAMPLES_DIR=$BASE_DIR/examples`
			`TEST_DATA_DIR=$BASE_DIR/tests/test_data`
			`DATA_SAVE_PATH=$BASE_TEMP_DIR/tests`
			`CONFIG_DIR=$BASE_DIR/config`

replace the customized dataloader setup with the build-in one 2024-06-07 09:44:25 +00:00			`# MODELS=("colossal-llama2" "llama2" "mistral" "chatGLM2" "chatGLM3" "deepseek" "Yi" "baichuan") # for local test`
			`MODELS=("colossal-llama2" "llama2" "chatGLM2" "chatGLM3" "deepseek" "Yi")`
remove models that require huggingface auth from ci 2024-05-29 02:10:37 +00:00
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`get_pretrain() {`
			`local model=$1`
			`if [[ $model == "colossal-llama2" ]]; then`
			`echo "hpcai-tech/Colossal-LLaMA-2-7b-base"`
			`elif [[ $model == "llama2" ]]; then`
			`echo "hf-internal-testing/llama-tokenizer"`
upgrade ppo dpo rm script 2024-05-28 03:04:39 +00:00			`elif [[ $model == "phi" ]]; then`
			`echo "microsoft/phi-2"`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`elif [[ $model == "mistral" ]]; then`
upgrade ppo dpo rm script 2024-05-28 03:04:39 +00:00			`echo "mistralai/Mistral-7B-Instruct-v0.3"`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`elif [[ $model == "chatGLM2" ]]; then`
			`echo "THUDM/chatglm2-6b"`
upgrade ppo dpo rm script 2024-05-28 03:04:39 +00:00			`elif [[ $model == "chatGLM3" ]]; then`
			`echo "THUDM/chatglm3-6b"`
			`elif [[ $model == "deepseek" ]]; then`
			`echo "deepseek-ai/DeepSeek-V2-Lite"`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`elif [[ $model == "Yi" ]]; then`
upgrade ppo dpo rm script 2024-05-28 03:04:39 +00:00			`echo "01-ai/Yi-1.5-9B-Chat"`
			`elif [[ $model == "baichuan" ]]; then`
			`echo "baichuan-inc/Baichuan2-13B-Chat"`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`else`
			`echo "Unknown model $model"`
			`exit 1`
			`fi`
			`}`

upgrade ppo dpo rm script 2024-05-28 03:04:39 +00:00
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`get_conversation_template_config() {`
			`local model=$1`
upgrade ppo dpo rm script 2024-05-28 03:04:39 +00:00			`if [[ $model == "colossal-llama2" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/colossal-llama2.json"`
			`elif [[ $model == "llama2" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/llama2.json"`
			`elif [[ $model == "deepseek" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/deepseek-ai_DeepSeek-V2-Lite.json"`
			`elif [[ $model == "mistral" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/mistralai_Mixtral-8x7B-Instruct-v0.1.json"`
			`elif [[ $model == "chatGLM2" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/THUDM_chatglm2-6b.json"`
			`elif [[ $model == "chatGLM3" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/THUDM_chatglm3-6b.json"`
			`elif [[ $model == "phi" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/microsoft_phi-2.json"`
			`elif [[ $model == "Yi" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/01-ai_Yi-1.5-9B-Chat.json"`
			`elif [[ $model == "baichuan" ]]; then`
			`echo "$CONFIG_DIR/conversation_template/baichuan-inc_Baichuan2-13B-Chat.json"`
			`else`
			`echo "Unknown model $model"`
			`exit 1`
			`fi`
[ColossalChat] Update RLHF V2 (#5286) * Add dpo. Fix sft, ppo, lora. Refactor all * fix and tested ppo * 2 nd round refactor * add ci tests * fix ci * fix ci * fix readme, style * fix readme style * fix style, fix benchmark * reproduce benchmark result, remove useless files * rename to ColossalChat * use new image * fix ci workflow * fix ci * use local model/tokenizer for ci tests * fix ci * fix ci * fix ci * fix ci timeout * fix rm progress bar. fix ci timeout * fix ci * fix ci typo * remove 3d plugin from ci temporary * test environment * cannot save optimizer * support chat template * fix readme * fix path * test ci locally * restore build_or_pr * fix ci data path * fix benchmark * fix ci, move ci tests to 3080, disable fast tokenizer * move ci to 85 * support flash attention 2 * add all-in-one data preparation script. Fix colossal-llama2-chat chat template * add hardware requirements * move ci test data * fix save_model, add unwrap * fix missing bos * fix missing bos; support grad accumulation with gemini * fix ci * fix ci * fix ci * fix llama2 chat template config * debug sft * debug sft * fix colossalai version requirement * fix ci * add sanity check to prevent NaN loss * fix requirements * add dummy data generation script * add dummy data generation script * add dummy data generation script * add dummy data generation script * update readme * update readme * update readme and ignore * fix logger bug * support parallel_output * modify data preparation logic * fix tokenization * update lr * fix inference * run pre-commit --------- Co-authored-by: Tong Li <tong.li352711588@gmail.com> 2024-03-29 06:12:29 +00:00			`}`

			`# Test SFT data Preparation`
			`for model in ${MODELS[@]}; do`
			`echo "Testing SFT data templating for $model"`
			`SAVE_DIR=$DATA_SAVE_PATH/sft/$model`
			`rm -rf $SAVE_DIR/cache`
			`rm -rf $SAVE_DIR/jsonl`
			`rm -rf $SAVE_DIR/arrow`
			`pretrain=$(get_pretrain $model)`
			`conversation_template_config=$(get_conversation_template_config $model)`
			`python $EXAMPLES_DIR/data_preparation_scripts/prepare_dataset.py --type sft --data_input_dirs $TEST_DATA_DIR/sft \`
			`--tokenizer_dir $pretrain \`
			`--conversation_template_config $conversation_template_config \`
			`--data_cache_dir $SAVE_DIR/cache \`
			`--data_jsonl_output_dir $SAVE_DIR/jsonl \`
			`--data_arrow_output_dir $SAVE_DIR/arrow`
			`passed=$?`
			`if [ $passed -ne 0 ]; then`
			`echo "[Test]: Failed in the SFT data templating for $model"`
			`exit 1`
			`fi`
			`python $BASE_DIR/tests/verify_chat_data.py --data_source $TEST_DATA_DIR/sft/test_sft_data.jsonl \`
			`--to_verify_file $SAVE_DIR/jsonl/part-00005.jsonl --data_type sft`
			`passed=$?`
			`if [ $passed -ne 0 ]; then`
			`echo "[Test]: Failed in the SFT data templating test for $model"`
			`exit 1`
			`fi`
			`done`


			`# Test DPO/PPO data Preparation`
			`for model in ${MODELS[@]}; do`
			`echo "Testing DPO/PPO data templating for $model"`
			`SAVE_DIR=$DATA_SAVE_PATH/dpo/$model`
			`rm -rf $SAVE_DIR/cache`
			`rm -rf $SAVE_DIR/jsonl`
			`rm -rf $SAVE_DIR/arrow`
			`pretrain=$(get_pretrain $model)`
			`conversation_template_config=$(get_conversation_template_config $model)`
			`python $EXAMPLES_DIR/data_preparation_scripts/prepare_dataset.py --type preference --data_input_dirs $TEST_DATA_DIR/dpo \`
			`--tokenizer_dir $pretrain \`
			`--conversation_template_config $conversation_template_config \`
			`--data_cache_dir $SAVE_DIR/cache \`
			`--data_jsonl_output_dir $SAVE_DIR/jsonl \`
			`--data_arrow_output_dir $SAVE_DIR/arrow`
			`passed=$?`
			`if [ $passed -ne 0 ]; then`
			`echo "[Test]: Failed in the DPO data templating for $model"`
			`exit 1`
			`fi`
			`python $BASE_DIR/tests/verify_chat_data.py --data_source $TEST_DATA_DIR/dpo/test_dpo_data.jsonl \`
			`--to_verify_file $SAVE_DIR/jsonl/part-00005.jsonl --data_type dpo`
			`passed=$?`
			`if [ $passed -ne 0 ]; then`
			`echo "[Test]: Failed in the DPO data templating test for $model"`
			`exit 1`
			`fi`
			`done`