diff --git a/.github/workflows/demo_in_readme.yaml b/.github/workflows/demo_in_readme.yaml
index 7a330ed..a3d4cd9 100644
--- a/.github/workflows/demo_in_readme.yaml
+++ b/.github/workflows/demo_in_readme.yaml
@@ -9,11 +9,11 @@ on:
       - "**.md"
 env:
   WORKSPACE_PREFIX: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-4)
-  SLURM_PARTITION: llm
+  SLURM_PARTITION: llm_s
 
 jobs:
   check-requirements:
-    runs-on: [lmtest]
+    runs-on: [t_cluster]
     steps:
     - name: mask env
       run: |
@@ -37,7 +37,7 @@ jobs:
   dataset-preparation:
     if: ${{ always() }}
     needs: check-requirements
-    runs-on: [lmtest]
+    runs-on: [t_cluster]
     steps:
     - name: mask env
       run: |
@@ -57,7 +57,7 @@ jobs:
   train:
     if: ${{ always() }}
     needs: check-requirements
-    runs-on: [lmtest]
+    runs-on: [t_cluster]
     timeout-minutes: 30
     steps:
     - name: mask env
@@ -83,18 +83,19 @@ jobs:
         source activate internlm-env-test
         export PYTHONPATH=$PWD:$PYTHONPATH
         sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
-        rm -rf $GITHUB_WORKSPACE/llm_ckpts
+        rsync -av --remove-source-files $GITHUB_WORKSPACE/llm_ckpts ${{env.WORKSPACE_PREFIX}}/ci_clean_bak 
 
     - name: torchrun-train
       run: |
         source activate internlm-env-test
         sh ./ci_scripts/train/torchrun.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
-        rm -rf $GITHUB_WORKSPACE/llm_ckpts
+        rsync -av --remove-source-files $GITHUB_WORKSPACE/llm_ckpts ${{env.WORKSPACE_PREFIX}}/ci_clean_bak
 
   convert-model-then-load:
     if: ${{ always() }}
     needs: check-requirements
-    runs-on: [lmtest]
+    runs-on: [t_cluster]
+    timeout-minutes: 15
     steps:
     - name: mask env
       run: |
@@ -107,13 +108,14 @@ jobs:
         export PYTHONPATH=$PWD:$PYTHONPATH
         sh ./ci_scripts/model/convert_to_hf.sh
         cd ./hf_ckpt
-        srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
+        srun -p ${SLURM_PARTITION} --quotatype=spot --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
         cd ..
-        rm -rf $GITHUB_WORKSPACE/hf_ckpt
+        rsync -av --remove-source-files $GITHUB_WORKSPACE/hf_ckpt ${{env.WORKSPACE_PREFIX}}/ci_clean_bak
   load-chat-model-in-hf:
     if: ${{ always() }}
     needs: check-requirements
-    runs-on: [lmtest]
+    runs-on: [t_cluster]
+    timeout-minutes: 15
     steps:
     - name: mask env
       run: |
@@ -123,4 +125,4 @@ jobs:
     - name: chat-model-in-hf
       run: |
         source activate internlm-env-test
-        srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ./ci_scripts/model/demo_load_7B_chat_model.py
+        srun -p ${SLURM_PARTITION} --quotatype=spot --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ./ci_scripts/model/demo_load_7B_chat_model.py
diff --git a/ci_scripts/common/variables.sh b/ci_scripts/common/variables.sh
index cc1b0e0..077fee4 100644
--- a/ci_scripts/common/variables.sh
+++ b/ci_scripts/common/variables.sh
@@ -1,3 +1,4 @@
 #!/bin/bash
 
 readonly DATA_VOLUME=$(echo $GITHUB_WORKSPACE | cut -d '/' -f 1-4)/data
+readonly CLEAN_PATH=$(echo $GITHUB_WORKSPACE | cut -d '/' -f 1-4)/ci_clean_bak
diff --git a/ci_scripts/data/tokenizer_alpaca.sh b/ci_scripts/data/tokenizer_alpaca.sh
index 456921c..db43d80 100644
--- a/ci_scripts/data/tokenizer_alpaca.sh
+++ b/ci_scripts/data/tokenizer_alpaca.sh
@@ -3,6 +3,7 @@ set -x
 
 source ./ci_scripts/common/variables.sh
 [[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
+[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
 
 readonly SRC_DATASET_META=${DATA_VOLUME}/lm_data/alpaca_data/alpaca_data.json
 readonly RESULTS=${DATA_VOLUME}/lm_data/alpaca_data/result
@@ -19,7 +20,7 @@ source ./ci_scripts/common/basic_func.sh
 echo "start to test alpaca_tokenizer.py."
 
 if [[ -d ${RESULTS} ]]; then
-    if ! rm -rf ${RESULTS}/*; then
+    if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
        echo "cleaning test data in ${RESULTS} failed, exit."
        exit 1
     fi
@@ -41,8 +42,8 @@ for file in ${file_list[@]}; do
     fi
 done
 
-# clean the test files.
-if ! rm -rf ${RESULTS}/*; then
+# move the test files.
+if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
     echo "cleaning test data in ${RESULTS} failed."
     exit_code=$(($exit_code + 1))
 fi
diff --git a/ci_scripts/data/tokenizer_chinese.sh b/ci_scripts/data/tokenizer_chinese.sh
index 6b68df8..81a5198 100644
--- a/ci_scripts/data/tokenizer_chinese.sh
+++ b/ci_scripts/data/tokenizer_chinese.sh
@@ -2,7 +2,8 @@
 set -x
 
 source ./ci_scripts/common/variables.sh
-[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci."; exit 1; }
+[[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
+[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
 
 readonly DATA=${DATA_VOLUME}/lm_data/cn_data/raw_data.txt
 readonly RESULT=${DATA_VOLUME}/lm_data/cn_data/result.bin
@@ -16,13 +17,13 @@ echo "start to test tokenizer.py."
 
 num=$(num_files "${RESULTS}")
 if [[ ${num} -gt 0 ]]; then
-    if ! rm -rf ${RESULTS}; then
+    if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
        echo "cleaning test data ${RESULTS} failed, exit."
        exit 1
     fi
 fi
 
-srun -p ${SLURM_PARTITION} --job-name=$1 --gpus-per-task=1 python tools/tokenizer.py --text_input_path ${DATA} --bin_output_path ${RESULT}
+srun -p ${SLURM_PARTITION} --quotatype=spot --job-name=$1 --gpus-per-task=1 python tools/tokenizer.py --text_input_path ${DATA} --bin_output_path ${RESULT}
 [[ $? -ne 0 ]] && { echo "test tokenizer.py failed.";  exit_code=$(($exit_code + 1)); }
 
 file_list=($RESULT $RESULT_META)
@@ -33,8 +34,8 @@ for file in ${file_list[@]}; do
     fi
 done
 
-# clean the test files.
-if ! rm -rf ${RESULTS}/*; then
+# move the test files.
+if ! rsync -av --remove-source-files ${RESULTS} ${CLEAN_PATH}; then
    echo "cleaning cached file in ${RESULTS} failed."
    exit_code=$(($exit_code + 1))
 fi
diff --git a/ci_scripts/model/convert_to_hf.sh b/ci_scripts/model/convert_to_hf.sh
index 7d6536b..d1af389 100644
--- a/ci_scripts/model/convert_to_hf.sh
+++ b/ci_scripts/model/convert_to_hf.sh
@@ -4,6 +4,7 @@ set -x
 source ./ci_scripts/common/variables.sh
 [[ -n ${DATA_VOLUME} ]] || { echo "should set DATA_VOLUME first before ci, exit."; exit 1; }
 [[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
 
 readonly CKPTS_INPUT="${DATA_VOLUME}/lm_data/alpaca_data/llm_ckpts/20"
 readonly CKPTS_OUTPUT="${GITHUB_WORKSPACE}/hf_ckpt"
@@ -18,7 +19,7 @@ source ./ci_scripts/common/basic_func.sh
 echo "start to test convert2hf.py."
 
 if [[ -d ${CKPTS_OUTPUT} ]]; then
-    if ! rm -rf ${CKPTS_OUTPUT}/*; then
+    if ! rsync -av --remove-source-files ${CKPTS_OUTPUT}/* ${CLEAN_PATH}; then
        echo "cleaning cached file in ${CKPTS_OUTPUT} failed, exit."
        exit 1
     fi
diff --git a/ci_scripts/train/load_ckpt.sh b/ci_scripts/train/load_ckpt.sh
index 413dba4..06c6c1e 100644
--- a/ci_scripts/train/load_ckpt.sh
+++ b/ci_scripts/train/load_ckpt.sh
@@ -1,7 +1,10 @@
 #!/bin/bash
 set -x
 
+source ./ci_scripts/common/variables.sh
 [[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
+
 readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
 readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
 readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
@@ -19,7 +22,7 @@ if [[ ! -f ${file} ]]; then
         exit_code=$(($exit_code + 1))
     fi
 
-srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
+srun -p ${SLURM_PARTITION} --exclusive --quotatype=spot --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
 [[ $? -ne 0 ]] && { echo "test slurm training failed.";  exit_code=$(($exit_code + 1)); }
 
 
@@ -29,10 +32,12 @@ if [[ ${num} -ne ${expected_num} ]]; then
     exit_code=$(($exit_code + 1))
 fi
 
-# clean the test files.
-if ! rm -rf ${CKPTS_PATH}/*; then
-    echo "cleaning cached file in ${CKPTS_PATH} failed."
-    exit_code=$(($exit_code + 1))
+# move the test files.
+if [[ -d ${CKPTS_PATH} ]]; then
+    if ! rsync -av --remove-source-files ${CKPTS_PATH} ${CLEAN_PATH}; then
+        echo "cleaning cached file in ${CKPTS_PATH} failed."
+        exit_code=$(($exit_code + 1))
+    fi
 fi
 
 exit $exit_code
diff --git a/ci_scripts/train/slurm_train.sh b/ci_scripts/train/slurm_train.sh
index 19d7c9b..3871fc4 100644
--- a/ci_scripts/train/slurm_train.sh
+++ b/ci_scripts/train/slurm_train.sh
@@ -1,7 +1,10 @@
 #!/bin/bash
 set -x
 
+source ./ci_scripts/common/variables.sh
 [[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
+
 readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
 readonly CKPTS20_PATH="$GITHUB_WORKSPACE/llm_ckpts/20"
 readonly CKPTS20_OUTPUT="${CKPTS20_PATH}/*.pt"
@@ -13,13 +16,13 @@ source ./ci_scripts/common/basic_func.sh
 echo "start to test slurm training."
 
 if [[ -d ${CKPTS20_PATH} ]]; then
-    if ! rm -rf ${CKPTS20_PATH}/*; then
+    if ! rsync -av --remove-source-files ${CKPTS20_PATH} ${CLEAN_PATH}; then
        echo "cleaning cached file in ${CKPTS20_PATH} failed, exit."
        exit 1
     fi
 fi
 
-srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ./ci_scripts/train/ci_7B_sft.py
+srun -p ${SLURM_PARTITION} --exclusive --quotatype=spot --job-name=$1 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ./ci_scripts/train/ci_7B_sft.py
 [[ $? -ne 0 ]] && { echo "test slurm training failed.";  exit_code=$(($exit_code + 1)); }
 
 num=$(num_files "${CKPTS20_OUTPUT}")
diff --git a/ci_scripts/train/torchrun.sh b/ci_scripts/train/torchrun.sh
index 8870761..29ed54f 100644
--- a/ci_scripts/train/torchrun.sh
+++ b/ci_scripts/train/torchrun.sh
@@ -1,7 +1,10 @@
 #!/bin/bash
 set -x
 
+source ./ci_scripts/common/variables.sh
 [[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+[[ -n ${CLEAN_PATH} ]] || { echo "should set CLEAN_PATH first before ci, exit."; exit 1; }
+
 readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
 readonly CKPTS20_PATH="$GITHUB_WORKSPACE/llm_ckpts/20"
 readonly CKPTS_OUTPUT="${CKPTS20_PATH}/*.pt"
@@ -13,13 +16,13 @@ source ./ci_scripts/common/basic_func.sh
 echo "start to test torch training."
 
 if [[ -d ${CKPTS20_PATH} ]]; then
-    if ! rm -rf ${CKPTS20_PATH}/*; then
+    if ! rsync -av --remove-source-files ${CKPTS20_PATH} ${CLEAN_PATH}; then
        echo "cleaning cached file in ${CKPTS20_PATH} failed, exit."
        exit 1
     fi
 fi
 
-srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
+srun -p ${SLURM_PARTITION} --exclusive --quotatype=spot --job-name=$1 -N 1 torchrun --nnodes=1 --nproc_per_node=8 --master_port=29501 train.py --config ./ci_scripts/train/ci_7B_sft.py --launcher torch
 [[ $? -ne 0 ]] && { echo "test torch training failed.";  exit_code=$(($exit_code + 1)); }
 
 num=$(num_files "${CKPTS_OUTPUT}")
@@ -28,8 +31,8 @@ if [[ ${num} -ne ${expected_num} ]]; then
     exit_code=$(($exit_code + 1))
 fi
 
-# clean the test files.
-if ! rm -rf ${CKPTS_PATH}/*; then
+# move the test files.
+if ! rsync -av --remove-source-files ${CKPTS_PATH}/* ${CLEAN_PATH}; then
     echo "cleaning cached file in ${CKPTS_PATH} failed."
     exit_code=$(($exit_code + 1))
 fi
diff --git a/tools/transformers/modeling_internlm.py b/tools/transformers/modeling_internlm.py
index 5439ba7..1dd31cd 100644
--- a/tools/transformers/modeling_internlm.py
+++ b/tools/transformers/modeling_internlm.py
@@ -28,10 +28,19 @@ from torch import nn
 from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 
 from transformers.activations import ACT2FN
-from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPast,
+    CausalLMOutputWithPast,
+    SequenceClassifierOutputWithPast,
+)
 from transformers.modeling_utils import PreTrainedModel
 from transformers.generation.streamers import BaseStreamer
-from transformers.utils import add_start_docstrings, add_start_docstrings_to_model_forward, logging, replace_return_docstrings
+from transformers.utils import (
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    logging,
+    replace_return_docstrings,
+)
 from configuration_internlm import InternLMConfig
 
 
@@ -39,6 +48,7 @@ logger = logging.get_logger(__name__)
 
 _CONFIG_FOR_DOC = "InternLMConfig"
 
+
 # Copied from transformers.models.bart.modeling_bart._make_causal_mask
 def _make_causal_mask(
     input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
@@ -437,6 +447,7 @@ class InternLMModel(InternLMPreTrainedModel):
     Args:
         config: InternLMConfig
     """
+
     _auto_class = "AutoModel"
 
     def __init__(self, config: InternLMConfig):
@@ -765,7 +776,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
         for layer_past in past_key_values:
             reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
         return reordered_past
-    
+
     def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = []):
         prompt = ""
         for record in history:
@@ -774,43 +785,49 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
             prompt += "<s>"
         prompt += f"""<|User|>:{query}<eoh>\n<|Bot|>:"""
         return tokenizer([prompt], return_tensors="pt")
-    
+
     @torch.no_grad()
-    def chat(self, 
-             tokenizer, 
-             query: str,
-             history: List[Tuple[str, str]] = [], 
-             streamer: Optional[BaseStreamer] = None,
-             max_new_tokens: int = 1024,
-             do_sample: bool = True,
-             temperature: float = 0.8,
-             top_p: float = 0.8,
-             **kwargs):
+    def chat(
+        self,
+        tokenizer,
+        query: str,
+        history: List[Tuple[str, str]] = [],
+        streamer: Optional[BaseStreamer] = None,
+        max_new_tokens: int = 1024,
+        do_sample: bool = True,
+        temperature: float = 0.8,
+        top_p: float = 0.8,
+        **kwargs,
+    ):
         inputs = self.build_inputs(tokenizer, query, history)
         inputs = {k: v.to(self.device) for k, v in inputs.items() if torch.is_tensor(v)}
-        outputs = self.generate(**inputs, 
-                                streamer=streamer, 
-                                max_new_tokens=max_new_tokens, 
-                                do_sample=do_sample, 
-                                temperature=temperature, 
-                                top_p=top_p, 
-                                **kwargs)
-        outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]):]
+        outputs = self.generate(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=max_new_tokens,
+            do_sample=do_sample,
+            temperature=temperature,
+            top_p=top_p,
+            **kwargs,
+        )
+        outputs = outputs[0].cpu().tolist()[len(inputs["input_ids"][0]) :]
         response = tokenizer.decode(outputs, skip_special_tokens=True)
         response = response.split("<eoa>")[0]
         history = history + [(query, response)]
         return response, history
-    
+
     @torch.no_grad()
-    def stream_chat(self, 
-                    tokenizer,
-                    query: str,
-                    history: List[Tuple[str, str]] = [], 
-                    max_new_tokens: int = 1024,
-                    do_sample: bool = True,
-                    temperature: float = 0.8,
-                    top_p: float = 0.8,
-                    **kwargs):
+    def stream_chat(
+        self,
+        tokenizer,
+        query: str,
+        history: List[Tuple[str, str]] = [],
+        max_new_tokens: int = 1024,
+        do_sample: bool = True,
+        temperature: float = 0.8,
+        top_p: float = 0.8,
+        **kwargs,
+    ):
         """
         Return a generator in format: (response, history)
         Eg.
@@ -856,12 +873,12 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
                 tokenizer=tokenizer,
                 query=query,
                 streamer=ChatStreamer(tokenizer=tokenizer),
-                history=history, 
+                history=history,
                 max_new_tokens=max_new_tokens,
                 do_sample=do_sample,
                 temperature=temperature,
                 top_p=top_p,
-                **kwargs
+                **kwargs,
             )
 
         def consumer():
@@ -869,7 +886,7 @@ class InternLMForCausalLM(InternLMPreTrainedModel):
             producer.start()
             while True:
                 res = response_queue.get()
-                if res is None:
+                if res is not None:
                     return
                 yield res