diff --git a/ci_scripts/common/basic_func.sh b/ci_scripts/common/basic_func.sh
index f9bb196..8ce1c54 100644
--- a/ci_scripts/common/basic_func.sh
+++ b/ci_scripts/common/basic_func.sh
@@ -1,14 +1,18 @@
 #!/bin/bash
 
-export exit_code=0
-
-function if_exist() {
-ls -l $file_path
-exit_code_now=$?
-exit_code=$(($exit_code + $exit_code_now))
-}
-
-function num_files() {
-file_num=$(ls -l $file_dir |wc -l)
-echo "there are $file_num files in $file_dir"
+#######################################
+# Calculate the number of files in a directory.
+# Call this function like this: num_files "${file_path}".
+# Globals:
+#   None
+# Arguments:
+#   $1: the directory path
+# Returns:
+#   the number of files in the directory
+#######################################
+num_files() {
+    [[ $# -eq 1 ]] || return 1
+    local file_num
+    file_num=$(ls -l $1 | grep '^-' | wc -l)
+    echo $file_num
 }
diff --git a/ci_scripts/common/variables.sh b/ci_scripts/common/variables.sh
new file mode 100644
index 0000000..5dcc5fa
--- /dev/null
+++ b/ci_scripts/common/variables.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+# TODO: variable defination should be in repo configures. 
+readonly DATA_VOLUME=${DATA_VOLUME:-"/mnt/petrelfs/qa-caif-cicd/data"}
\ No newline at end of file
diff --git a/ci_scripts/data/tokenizer_alpaca.sh b/ci_scripts/data/tokenizer_alpaca.sh
index e8ccac7..0d06455 100644
--- a/ci_scripts/data/tokenizer_alpaca.sh
+++ b/ci_scripts/data/tokenizer_alpaca.sh
@@ -1,22 +1,50 @@
 #!/bin/bash
+set -x
 
-rm -rf /mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/result/*
+source ./ci_scripts/common/variables.sh
+[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
 
-python tools/alpaca_tokenizer.py /mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/alpaca_data.json /mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/result  tools/V7_sft.model --split_ratio 0.1
+readonly SRC_DATASET_META=${DATA_VOLUME}/lm_data/alpaca_data/alpaca_data.json
+readonly RESULTS=${DATA_VOLUME}/lm_data/alpaca_data/result
+readonly TRAIN_DATASET=${RESULTS}/train/en/dataset.bin
+readonly TRAIN_DATASET_META=${RESULTS}/train/en/dataset.bin.meta
+readonly VALID_DATASET=${RESULTS}/valid/en/dataset.bin
+readonly VALID_DATASET_META=${RESULTS}/valid/en/dataset.bin.meta
 
-file_one="/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/result/train/en/dataset.bin"
-file_two="/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/result/train/en/dataset.bin.meta"
-file_three="/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/result/valid/en/dataset.bin"
-file_four="/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/result/valid/en/dataset.bin.meta"
-file_list=($file_one $file_two $file_three $file_four)
+split_ratio=0.1
+exit_code=0
 
 source ./ci_scripts/common/basic_func.sh
-for file_path in ${file_list[@]};
-do
-if_exist $file_path
+
+echo "start to test alpaca_tokenizer.py." 
+
+if [[ -d ${RESULTS} ]]; then
+    if ! rm -rf ${RESULTS}/*; then
+       echo "cleaning test data in ${RESULTS} failed, exit."
+       exit 1
+    fi
+fi
+
+if [[ ! -f ${SRC_DATASET_META} ]]; then
+   echo "${SRC_DATASET_META} should be exist, exit."
+   exit 1
+fi
+
+python tools/alpaca_tokenizer.py ${SRC_DATASET_META} ${RESULTS} tools/V7_sft.model --split_ratio ${split_ratio}
+[[ $? -ne 0 ]] && { echo "test alpaca_tokenizer.py failed.";  exit_code=$(($exit_code + 1)); }
+
+file_list=(${TRAIN_DATASET} ${TRAIN_DATASET_META} ${VALID_DATASET} ${VALID_DATASET_META})
+for file in ${file_list[@]}; do
+    if [[ ! -f ${file} ]]; then
+        echo "expect: ${file} exists, actual: not exist."
+        exit_code=$(($exit_code + 1))
+    fi
 done
 
-if [ $exit_code -ne 0 ]
-then
-    exit 1
+# clean the test files.
+if ! rm -rf ${RESULTS}/*; then
+    echo "cleaning test data in ${RESULTS} failed."
+    exit_code=$(($exit_code + 1))
 fi
+
+exit $exit_code
diff --git a/ci_scripts/data/tokenizer_chinese.sh b/ci_scripts/data/tokenizer_chinese.sh
index 99241e7..d427c0b 100644
--- a/ci_scripts/data/tokenizer_chinese.sh
+++ b/ci_scripts/data/tokenizer_chinese.sh
@@ -1,19 +1,42 @@
 #!/bin/bash
+set -x
 
-rm -rf /mnt/petrelfs/qa-caif-cicd/data/lm_data/cn_data/result.*
-srun -p llm python tools/tokenizer.py --text_input_path /mnt/petrelfs/qa-caif-cicd/data/lm_data/cn_data/raw_data.txt --bin_output_path /mnt/petrelfs/qa-caif-cicd/data/lm_data/cn_data/result.bin
+source ./ci_scripts/common/variables.sh
+[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci."; exit 1; }
 
-file_one="/mnt/petrelfs/qa-caif-cicd/data/lm_data/cn_data/result.bin"
-file_two="/mnt/petrelfs/qa-caif-cicd/data/lm_data/cn_data/result.bin.meta"
-file_list=($file_one $file_two)
+readonly DATA=${DATA_VOLUME}/lm_data/cn_data/raw_data.txt
+readonly RESULT=${DATA_VOLUME}/lm_data/cn_data/result.bin
+readonly RESULT_META=${DATA_VOLUME}/lm_data/cn_data/result.bin.meta
+readonly RESULTS=${DATA_VOLUME}/lm_data/cn_data/result.*
+exit_code=0
 
 source ./ci_scripts/common/basic_func.sh
-for file_path in ${file_list[@]};
-do
-if_exist $file_path
+
+echo "start to test tokenizer.py." 
+
+num=$(num_files "${RESULTS}")
+if [[ ${num} -gt 0 ]]; then
+    if ! rm -rf ${RESULTS}; then
+       echo "cleaning test data ${RESULTS} failed, exit."
+       exit 1
+    fi
+fi
+
+srun -p llm python tools/tokenizer.py --text_input_path ${DATA} --bin_output_path ${RESULT}
+[[ $? -ne 0 ]] && { echo "test tokenizer.py failed.";  exit_code=$(($exit_code + 1)); }
+
+file_list=($RESULT $RESULT_META)
+for file in ${file_list[@]}; do
+    if [[ ! -f ${file} ]]; then
+        echo "expect: ${file} exists, actual: not exist."
+        exit_code=$(($exit_code + 1))
+    fi
 done
 
-if [ $exit_code -ne 0 ]
-then
-    exit 1
+# clean the test files.
+if ! rm -rf ${RESULTS}/*; then
+   echo "cleaning cached file in ${RESULTS} failed."
+   exit_code=$(($exit_code + 1))
 fi
+
+exit $exit_code
diff --git a/ci_scripts/model/convert_to_hf.sh b/ci_scripts/model/convert_to_hf.sh
index 385bba5..162946d 100644
--- a/ci_scripts/model/convert_to_hf.sh
+++ b/ci_scripts/model/convert_to_hf.sh
@@ -1,33 +1,47 @@
 #!/bin/bash
+set -x
 
-rm -rf ./hf_ckpt/*
-python ./tools/transformers/convert2hf.py --src_folder /mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/llm_ckpts/20 --tgt_folder hf_ckpt/ --tokenizer ./tools/V7_sft.model
+source ./ci_scripts/common/variables.sh
+[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
+[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
 
-#assert exists model
-file_one="$GITHUB_WORKSPACE/hf_ckpt/tokenizer.model"
-file_two="$GITHUB_WORKSPACE/hf_ckpt/config.json"
-file_three="$GITHUB_WORKSPACE/hf_ckpt/modeling_internlm.py"
-file_list=($file_one $file_two $file_three)
-file_dir="$GITHUB_WORKSPACE/hf_ckpt/*"
+readonly CKPTS_INPUT="${DATA_VOLUME}/lm_data/alpaca_data/llm_ckpts/20"
+readonly CKPTS_OUTPUT="${GITHUB_WORKSPACE}/hf_ckpt"
+readonly TOKENIZER="${GITHUB_WORKSPACE}/hf_ckpt/tokenizer.model"
+readonly CONFIG="${GITHUB_WORKSPACE}/hf_ckpt/config.json"
+readonly INERNLM="${GITHUB_WORKSPACE}/hf_ckpt/modeling_internlm.py"
+exit_code=0
+expected_num=9
 
 source ./ci_scripts/common/basic_func.sh
 
-for file_path in ${file_list[@]};
-do
-if_exist $file_path
+echo "start to test convert2hf.py."
+
+if [[ -d ${CKPTS_OUTPUT} ]]; then
+    if ! rm -rf ${CKPTS_OUTPUT}/*; then
+       echo "cleaning cached file in ${CKPTS_OUTPUT} failed, exit."
+       exit 1
+    fi
+fi
+
+python ./tools/transformers/convert2hf.py --src_folder ${CKPTS_INPUT} --tgt_folder ${CKPTS_OUTPUT} --tokenizer ./tools/V7_sft.model
+[[ $? -ne 0 ]] && { echo "test convert2hf.py failed.";  exit_code=$(($exit_code + 1)); }
+
+#assert exists model
+file_list=($TOKENIZER $CONFIG $INERNLM)
+for file in ${file_list[@]}; do
+    if [[ ! -f ${file} ]];then
+        echo "file ${file} does not exist."
+        exit_code=$(($exit_code + 1))
+    fi
 done
 
+num=$(num_files "${CKPTS_OUTPUT}")
 
-num_files ${file_dir}
-
-if [ $file_num -ne 9 ]
-then
-    echo "The num of files is not right"
-    ls -l $file_dir
+if [[ ${num} -ne ${expected_num} ]]; then
+    echo "expect: ${expected_num} files, actual: ${num} files."
     exit_code=$(($exit_code + 1)) 
 fi
 
-if [ $exit_code -ne 0 ]
-then
-    exit 1
-fi
+# NOTICE: should not remove the cached files, because the cached files will be used in the next test case.
+exit $exit_code
diff --git a/ci_scripts/model/demo_load_7B_chat_model.py b/ci_scripts/model/demo_load_7B_chat_model.py
index 61cec0d..695be27 100644
--- a/ci_scripts/model/demo_load_7B_chat_model.py
+++ b/ci_scripts/model/demo_load_7B_chat_model.py
@@ -1,4 +1,5 @@
-
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
 from transformers import AutoTokenizer, AutoModelForCausalLM
 
 tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
diff --git a/ci_scripts/model/loaded_as_transformer.py b/ci_scripts/model/loaded_as_transformer.py
index 5e3d28d..5254fb9 100644
--- a/ci_scripts/model/loaded_as_transformer.py
+++ b/ci_scripts/model/loaded_as_transformer.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
 from transformers import AutoModel
 
 model = AutoModel.from_pretrained("../hf_ckpt/", trust_remote_code=True).cuda()
diff --git a/ci_scripts/train/slurm_train.sh b/ci_scripts/train/slurm_train.sh
index 9759c0e..2ece5e5 100644
--- a/ci_scripts/train/slurm_train.sh
+++ b/ci_scripts/train/slurm_train.sh
@@ -1,20 +1,37 @@
 #!/bin/bash
+set -x
 
-rm -rf $GITHUB_WORKSPACE/llm_ckpts/20
+[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
+readonly CKPTS20_PATH="$GITHUB_WORKSPACE/llm_ckpts/20"
+readonly CKPTS20_OUTPUT="${CKPTS20_PATH}/*.pt"
+expected_num=21
+exit_code=0
 
-srun -p llm --quotatype=spot -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ./ci_scripts/train/ci_7B_sft.py
-
-file_dir="$GITHUB_WORKSPACE/llm_ckpts/20/*.pt"
 source ./ci_scripts/common/basic_func.sh
 
-num_files ${file_dir}
+echo "start to test slurm training."
 
-if [ $file_num -ne 21 ]
-then
-    echo "The num of files is not right"
-    ls -l $file_dir
-    rm -rf $GITHUB_WORKSPACE/llm_ckpts
-    exit 1
+if [[ -d ${CKPTS20_PATH} ]]; then
+    if ! rm -rf ${CKPTS20_PATH}/*; then
+       echo "cleaning cached file in ${CKPTS20_PATH} failed, exit."
+       exit 1
+    fi
 fi
 
+srun -p llm --quotatype=spot -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ./ci_scripts/train/ci_7B_sft.py
+[[ $? -ne 0 ]] && { echo "test slurm training failed.";  exit_code=$(($exit_code + 1)); }
 
+num=$(num_files "${CKPTS20_OUTPUT}")
+if [[ ${num} -ne ${expected_num} ]]; then
+    echo "expect: ${expected_num} files, actual: ${num} files."
+    exit_code=$(($exit_code + 1)) 
+fi
+
+# clean the test files.
+if ! rm -rf ${CKPTS_PATH}/*; then
+    echo "cleaning cached file in ${CKPTS_PATH} failed."
+    exit_code=$(($exit_code + 1))
+fi
+
+exit $exit_code
diff --git a/ci_scripts/train/torchrun.sh b/ci_scripts/train/torchrun.sh
index db781f0..9eb4220 100644
--- a/ci_scripts/train/torchrun.sh
+++ b/ci_scripts/train/torchrun.sh
@@ -1,17 +1,37 @@
 #!/bin/bash
+set -x
 
-rm -rf $GITHUB_WORKSPACE/llm_ckpts/20
-srun -p llm -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher "torch"
+[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
+readonly CKPTS20_PATH="$GITHUB_WORKSPACE/llm_ckpts/20"
+readonly CKPTS_OUTPUT="${CKPTS20_PATH}/*.pt"
+expected_num=21
+exit_code=0
 
-file_dir="$GITHUB_WORKSPACE/llm_ckpts/20/*.pt"
 source ./ci_scripts/common/basic_func.sh
 
-num_files ${file_dir}
+echo "start to test torch training."
 
-if [ $file_num -ne 21 ]
-then
-    echo "The num of files is not right"
-    ls -l $file_dir
-    rm -rf $GITHUB_WORKSPACE/llm_ckpts
-    exit 1
+if [[ -d ${CKPTS20_PATH} ]]; then
+    if ! rm -rf ${CKPTS20_PATH}/*; then
+       echo "cleaning cached file in ${CKPTS20_PATH} failed, exit."
+       exit 1
+    fi
 fi
+
+srun -p llm -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
+[[ $? -ne 0 ]] && { echo "test torch training failed.";  exit_code=$(($exit_code + 1)); }
+
+num=$(num_files "${CKPTS_OUTPUT}")
+if [[ ${num} -ne ${expected_num} ]]; then
+    echo "expect: ${expected_num} files, actual: ${num} files."
+    exit_code=$(($exit_code + 1)) 
+fi
+
+# clean the test files.
+if ! rm -rf ${CKPTS_PATH}/*; then
+    echo "cleaning cached file in ${CKPTS_PATH} failed."
+    exit_code=$(($exit_code + 1))
+fi
+
+exit $exit_code