fix(conflicts): merge main to develop

2023-08-24 14:26:10 +08:00 · 2023-08-24 14:26:10 +08:00 · 9eec3d9465
parent eee93b5a68 e1cefaef6b
commit 9eec3d9465
17 changed files with 276 additions and 60 deletions
--- a/.github/workflows/demo_in_readme.yaml
+++ b/.github/workflows/demo_in_readme.yaml
@ -39,7 +39,7 @@ jobs:
    needs: check-requirements
    runs-on: [lmtest]
    steps:
-    - name: mask env 
+    - name: mask env
      run: |
        echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
    - uses: actions/checkout@v3
@ -60,15 +60,29 @@ jobs:
    runs-on: [lmtest]
    timeout-minutes: 30
    steps:
-    - name: mask env 
+    - name: mask env
      run: |
        echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
    - uses: actions/checkout@v3
    - name: slurm-train
      id: basic_train
      run: |
        source activate internlm-env-test
        sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
    - name: load_preset_ckpt
      if: ${{ failure() && steps.basic_train.conclusion == 'failure' }}
      run: |
        source activate internlm-env-test
        export PYTHONPATH=$PWD:$PYTHONPATH
        sh ./ci_scripts/train/load_ckpt.sh 7B_load_preset_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
    - name: load_new_ckpt
      run: |
        source activate internlm-env-test
        export PYTHONPATH=$PWD:$PYTHONPATH
        sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
        rm -rf $GITHUB_WORKSPACE/llm_ckpts
    - name: torchrun-train
@ -91,18 +105,17 @@ jobs:
      run: |
        source activate internlm-env-test
        export PYTHONPATH=$PWD:$PYTHONPATH
-        sh ./ci_scripts/model/convert_to_hf.sh 
+        sh ./ci_scripts/model/convert_to_hf.sh
        cd ./hf_ckpt
        srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
        cd ..
        rm -rf $GITHUB_WORKSPACE/hf_ckpt
  load-chat-model-in-hf:
    if: ${{ always() }}
    needs: check-requirements
    runs-on: [lmtest]
    steps:
-    - name: mask env 
+    - name: mask env
      run: |
        echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
    - uses: actions/checkout@v3
--- a/.github/workflows/lint_check.yaml
+++ b/.github/workflows/lint_check.yaml
@ -1,6 +1,6 @@
 name: lint-check
-on: 
+on:
  push:
  pull_request:
    branches:
--- a/.github/workflows/sonar.yaml
+++ b/.github/workflows/sonar.yaml
@ -1,7 +1,7 @@
 name: Sonarqube
 on:
  workflow_dispatch:
-     
+
 jobs:
  sonarqube:
    name: SonarQube Scan
@ -13,4 +13,4 @@ jobs:
      - uses: sonarsource/sonarqube-scan-action@master
        env:
          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
-          SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
+          SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
--- a/README-ja-JP.md
+++ b/README-ja-JP.md
@ -40,6 +40,10 @@ InternLM は、70 億のパラメータを持つベースモデルと、実用
 さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。
 ## 新闻
 InternLM-7B-Chat v1.1 は、コード インタプリタと関数呼び出し機能を備えてリリースされました。 [Lagent](https://github.com/InternLM/lagent) で試すことができます。
 ## InternLM-7B
 ### パフォーマンス評価
@ -80,8 +84,8 @@ Transformers を使用して InternLM 7B チャットモデルをロードする
 ```python
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM
->>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
+>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
->>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
+>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
 >>> model = model.eval()
 >>> response, history = model.chat(tokenizer, "こんにちは", history=[])
 >>> print(response)
--- a/README-zh-Hans.md
+++ b/README-zh-Hans.md
@ -45,6 +45,10 @@ InternLM ，即书生·浦语大模型，包含面向实用场景的70亿参数
 提供了支持模型预训练的轻量级训练框架，无需安装大量依赖包，一套代码支持千卡预训练和单卡人类偏好对齐训练，同时实现了极致的性能优化，实现千卡训练下近90%加速效率。
 ## 新闻
 我们开源了 InternLM-Chat-7B v1.1。该模型能够调用代码解释器和工具插件。你可以在 [Lagent](https://github.com/InternLM/lagent) 中体验这些新功能。
 ## InternLM-7B
 ### 性能评测
@ -74,6 +78,7 @@ InternLM ，即书生·浦语大模型，包含面向实用场景的70亿参数
 | 模型                 | InternLM 格式权重下载地址                                                                                                                      | Transformers 格式权重下载地址                    |
 | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
 | **InternLM 7B**      | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
 | **InternLM Chat 7B v1.1**    | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1)    | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1)       |
 | **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
 | **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k)
@ -85,8 +90,8 @@ InternLM ，即书生·浦语大模型，包含面向实用场景的70亿参数
 ```python
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM
->>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
+>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
->>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
+>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
 >>> model = model.eval()
 >>> response, history = model.chat(tokenizer, "你好", history=[])
 >>> print(response)
@ -117,26 +122,44 @@ streamlit run web_demo.py
 我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。
-1. 首先安装 LMDeploy:
+```bash
 python3 -m pip install lmdeploy
 ```
-   ```bash
+执行以下命令，可以在终端与 `internlm-chat-7b` 模型进行交互式对话，或者通过 WebUI 与它聊天。
   python3 -m pip install lmdeploy
   ```
-2. 快速的部署命令如下：
+```bash
 # 转换权重格式
 python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
-   ```bash
+# 在终端进行交互式对话
-   python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-7b/model
+python3 -m lmdeploy.turbomind.chat ./workspace
   ```
-3. 在导出模型后，你可以直接通过如下命令启动服务，并在客户端与AI对话
+# 启动 gradio 服务
 python3 -m lmdeploy.serve.gradio.app ./workspace
 ```
 以上过程中，LMDeploy 使用的是 FP16 的计算精度。
-   ```bash
+除了 FP16 精度，LMDeploy 还支持 `internlm-chat-7b` 4bit 权重模型推理。它不仅把模型的显存减少到 6G，大约只有 FP16 的 40%，更重要的是，经过 kernel 层面的极致优化，其推理性能在 A100-80G 上可达到 FP16 的 2.4 倍以上。
-   bash workspace/service_docker_up.sh
+
-   python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
+以下是`internlm-chat-7b` 4bit 权重模型的部署方法。推理速度的 bechmark 请参考[这里](https://github.com/InternLM/lmdeploy/blob/main/docs/zh_cn/w4a16.md#%E6%8E%A8%E7%90%86%E9%80%9F%E5%BA%A6)
-   ```
+
 ```bash
 # download prequnantized internlm-chat-7b model from huggingface
 git-lfs install
 git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
 # Convert the model's layout and store it in the default path, ./workspace.
 python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
 # inference lmdeploy's turbomind engine
 python3 -m lmdeploy.turbomind.chat ./workspace
 # serving with gradio
 python3 -m lmdeploy.serve.gradio.app ./workspace
 ```
 LMDeploy 是涵盖了 LLM 任务的全套轻量化、部署和服务的工具箱。请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
 [LMDeploy](https://github.com/InternLM/LMDeploy) 支持了 InternLM 部署的完整流程，请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
 ## 微调&训练
--- a/README.md
+++ b/README.md
@ -45,6 +45,10 @@ InternLM has open-sourced a 7 billion parameter base model and a chat model tail
 Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs.
 ## News
 InternLM-7B-Chat v1.1 is released with code interpreter and function calling capability. You can try it with [Lagent](https://github.com/InternLM/lagent).
 ## InternLM-7B
 ### Performance Evaluation
@ -74,6 +78,7 @@ InternLM 7B and InternLM 7B Chat, trained using InternLM, have been open-sourced
 | Model                         | InternLM Format Weight Download Link                                                                                                                 | Transformers Format Weight Download Link                                         |
 | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
 | **InternLM 7B**         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b)         | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b)                 |
 | **InternLM Chat 7B v1.1**    | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1)    | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1)       |
 | **InternLM Chat 7B**    | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b)    | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)       |
 | **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) |
@ -85,8 +90,8 @@ To load the InternLM 7B Chat model using Transformers, use the following code:
 ```python
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM
->>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
+>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
->>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
+>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
 >>> model = model.eval()
 >>> response, history = model.chat(tokenizer, "hello", history=[])
 >>> print(response)
@ -118,28 +123,45 @@ The effect is as follows
 ### Deployment
-We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the one-click deployment of InternLM.
+We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the workflow of InternLM deployment.
-1. First, install LMDeploy:
+```bash
 python3 -m pip install lmdeploy
 ```
-    ```bash
+You can utilize the following commands to conduct `internlm-chat-7b` FP16 inference, serve it and interact with AI assistant via WebUI:
    python3 -m pip install lmdeploy
    ```
-2. Use the following command for quick deployment:
+```bash
 # convert weight layout
 python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
-    ```bash
+# inference lmdeploy's turbomind engine
-    python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-chat-7b/model
+python3 -m lmdeploy.turbomind.chat ./workspace
    ```
-3. After exporting the model, you can start a server and have a conversation with the deployed model using the following command:
+# serving with gradio
-   
+python3 -m lmdeploy.serve.gradio.app ./workspace
-    ```bash
+```
    bash workspace/service_docker_up.sh
    python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
    ```
-[LMDeploy](https://github.com/InternLM/LMDeploy) provides a complete workflow for deploying InternLM. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
+You can also deploy 4-bit quantized `internlm-chat-7b` model via LMDeploy. It greatly trims down the model's memory overhead to 6G, just 40% of what FP16 inference would take. More importantly, with extreme optimized kernel, the inference performance achieves 2.4x faster than FP16 inference on A100-80G.
 Try the followings to enjoy 4-bit `internlm-chat-7b` on a Geforce RTX 30x GPU card. You can find the inference benchmark from [here](https://github.com/InternLM/lmdeploy/blob/main/docs/en/w4a16.md#inference-performance).
 ```bash
 # download prequnantized internlm-chat-7b model from huggingface
 git-lfs install
 git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
 # Convert the model's layout and store it in the default path, ./workspace.
 python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
 # inference lmdeploy's turbomind engine
 python3 -m lmdeploy.turbomind.chat ./workspace
 # serving with gradio
 python3 -m lmdeploy.serve.gradio.app ./workspace
 ```
 LMDeploy is an efficient toolkit for compressing, deploying, and serving LLM models. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
 ## Fine-tuning & Training
--- a/ci_scripts/common/com_func.py
+++ b/ci_scripts/common/com_func.py
@ -0,0 +1,29 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 def merge_dicts(dict_a: dict, dict_b: dict):
    for key in dict_b.keys():
        if isinstance(dict_b[key], dict):
            dict_b[key] = {**dict_a[key], **dict_b[key]}
            merge_dicts(dict_a[key], dict_b[key])
    dict_c = {**dict_a, **dict_b}
    return dict_c
 def format_dict_to_py_string(data: dict, indent=0, is_nested=False):
    result = ""
    for key, value in data.items():
        if isinstance(value, dict):
            result += f"{' ' * indent}{key} = dict(\n"
            result += format_dict_to_py_string(value, indent + 4, is_nested=True)
            result += f"{' ' * indent})"
        else:
            result += f"{' ' * indent}{key} = {repr(value)}"
        if is_nested:
            result += ","
        result += "\n"
    result = f"""\
 {result}
 """
    return result
--- a/ci_scripts/data/tokenizer_alpaca.sh
+++ b/ci_scripts/data/tokenizer_alpaca.sh
@ -16,7 +16,7 @@ exit_code=0
 source ./ci_scripts/common/basic_func.sh
-echo "start to test alpaca_tokenizer.py." 
+echo "start to test alpaca_tokenizer.py."
 if [[ -d ${RESULTS} ]]; then
    if ! rm -rf ${RESULTS}/*; then
--- a/ci_scripts/data/tokenizer_chinese.sh
+++ b/ci_scripts/data/tokenizer_chinese.sh
@ -12,7 +12,7 @@ exit_code=0
 source ./ci_scripts/common/basic_func.sh
-echo "start to test tokenizer.py." 
+echo "start to test tokenizer.py."
 num=$(num_files "${RESULTS}")
 if [[ ${num} -gt 0 ]]; then
--- a/ci_scripts/model/convert_to_hf.sh
+++ b/ci_scripts/model/convert_to_hf.sh
@ -40,7 +40,7 @@ num=$(num_files "${CKPTS_OUTPUT}")
 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
-    exit_code=$(($exit_code + 1)) 
+    exit_code=$(($exit_code + 1))
 fi
 # NOTICE: should not remove the cached files, because the cached files will be used in the next test case.
--- a/ci_scripts/model/demo_load_7B_chat_model.py
+++ b/ci_scripts/model/demo_load_7B_chat_model.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
-from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import AutoModelForCausalLM, AutoTokenizer
 tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
--- a/ci_scripts/train/ci_7B_sft.py
+++ b/ci_scripts/train/ci_7B_sft.py
@ -10,12 +10,11 @@ VOCAB_SIZE = 103168
 # Ckpt folder format:
 # fs: 'local:/mnt/nfs/XXX'
 # oss: 'boto3:s3://model_weights/XXX'
-MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
+# MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
 # SAVE_CKPT_FOLDER = "local:llm_ckpts"
 SAVE_CKPT_FOLDER = "local:llm_ckpts"
 # LOAD_CKPT_FOLDER = "local:llm_ckpts/49"
 ckpt = dict(
    enable_save_ckpt=True,
    # Path to save training ckpt.
    save_ckpt_folder=SAVE_CKPT_FOLDER,
    # Path to continue training ckpt (load model weights and scheduler/context states).
@ -27,7 +26,7 @@ ckpt = dict(
    load_optimizer=True,
 )
-TRAIN_FOLDER = "/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/train/en"
+TRAIN_FOLDER = "local:../lm_data/alpaca_data/train/en"
 data = dict(
    seq_len=SEQ_LEN,
    # micro_num means the number of micro_batch contained in one gradient update
@ -120,8 +119,8 @@ zero1 parallel:
    2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters.
    3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size.
        For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8.
-pipeline parallel: pipeline parallel size.
+pipeline parallel: pipeline parallel size, only 1 is accepted currently.
-tensor parallel: tensor parallel size, usually the number of GPUs per node.
+tensor parallel: tensor parallel size, usually the number of GPUs per node, only 1 is accepted currently.
 """
 parallel = dict(
    zero1=8,
--- a/ci_scripts/train/generate_config.py
+++ b/ci_scripts/train/generate_config.py
@ -0,0 +1,49 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
 import argparse
 import json
 import os
 from ci_scripts.common import com_func
 from internlm.core.context import Config
 def generate_new_config(config_py_file, test_config_json, case_name):
    # generate path of the new config py
    config_path = os.path.split(config_py_file)
    new_config_py_file = os.path.join(config_path[0], case_name + ".py")
    # merge dict
    origin_config = Config.from_file(config_py_file)
    with open(test_config_json) as f:
        test_config = json.load(f)
    if test_config:
        if case_name not in test_config.keys():
            raise KeyError(f"the {case_name} doesn't exist.Please check {test_config} again!")
    new_config = com_func.merge_dicts(origin_config, test_config[case_name])
    print(f"new config is:\n{new_config}")
    # write new config to py file
    file_content = com_func.format_dict_to_py_string(new_config)
    with open(new_config_py_file, "w") as f:
        f.write(file_content)
    print(f"The new test train config file is {new_config_py_file}")
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--origin_config",
        type=str,
        default="./ci_scripts/train/ci_7B_sft.py",
        help="path to the origin train config file",
    )
    parser.add_argument(
        "--test_config",
        type=str,
        default="./ci_scripts/train/test_config.json",
        help="path to the test train config file",
    )
    parser.add_argument("--case_name", type=str, help="name of the case which will be runned ")
    args = parser.parse_args()
    generate_new_config(args.origin_config, args.test_config, args.case_name)
--- a/ci_scripts/train/load_ckpt.sh
+++ b/ci_scripts/train/load_ckpt.sh
@ -0,0 +1,38 @@
 #!/bin/bash
 set -x
 [[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
 readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
 readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
 readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
 expected_num=21
 exit_code=0
 source ./ci_scripts/common/basic_func.sh
 echo "start to test slurm training with loading checkpoint."
 python ./ci_scripts/train/generate_config.py --case_name $1
 file="./ci_scripts/train/$1.py"
 if [[ ! -f ${file} ]]; then
        echo "expect: ${file} exists, actual: not exist."
        exit_code=$(($exit_code + 1))
    fi
 srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
 [[ $? -ne 0 ]] && { echo "test slurm training failed.";  exit_code=$(($exit_code + 1)); }
 num=$(num_files "${CKPTS40_OUTPUT}")
 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
    exit_code=$(($exit_code + 1))
 fi
 # clean the test files.
 if ! rm -rf ${CKPTS_PATH}/*; then
    echo "cleaning cached file in ${CKPTS_PATH} failed."
    exit_code=$(($exit_code + 1))
 fi
 exit $exit_code
--- a/ci_scripts/train/slurm_train.sh
+++ b/ci_scripts/train/slurm_train.sh
@ -25,12 +25,6 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --
 num=$(num_files "${CKPTS20_OUTPUT}")
 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
    exit_code=$(($exit_code + 1)) 
 fi
 # clean the test files.
 if ! rm -rf ${CKPTS_PATH}/*; then
    echo "cleaning cached file in ${CKPTS_PATH} failed."
    exit_code=$(($exit_code + 1))
 fi
--- a/ci_scripts/train/test_config.json
+++ b/ci_scripts/train/test_config.json
@ -0,0 +1,45 @@
 {
    "7B_basic_train": {
        "SEQ_LEN": 1024,
        "HIDDEN_SIZE": 2048,
        "NUM_ATTENTION_HEAD": 16,
        "NUM_LAYER": 16,
        "TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
        "ckpt": {
            "checkpoint_every": 20
        },
        "data": {
            "total_steps": 20
        }
    },
    "7B_load_new_ckpt": {
        "SEQ_LEN": 1024,
        "HIDDEN_SIZE": 2048,
        "NUM_ATTENTION_HEAD": 16,
        "NUM_LAYER": 16,
        "TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
        "LOAD_CKPT_FOLDER": "local:llm_ckpts/20",
        "ckpt": {
            "load_ckpt_folder": "local:llm_ckpts/20",
            "checkpoint_every": 20
        },
        "data": {
            "total_steps": 40
        }
    },
    "7B_load_preset_ckpt": {
        "SEQ_LEN": 1024,
        "HIDDEN_SIZE": 2048,
        "NUM_ATTENTION_HEAD": 16,
        "NUM_LAYER": 16,
        "TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
        "LOAD_CKPT_FOLDER": "local:../lm_data/alpaca_data/llm_ckpts/20",
        "ckpt": {
            "load_ckpt_folder": "local:../lm_data/alpaca_data/llm_ckpts/20",
            "checkpoint_every": 20
        },
        "data": {
            "total_steps": 40
        }
    }
 }
--- a/ci_scripts/train/torchrun.sh
+++ b/ci_scripts/train/torchrun.sh
@ -25,7 +25,7 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --
 num=$(num_files "${CKPTS_OUTPUT}")
 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
-    exit_code=$(($exit_code + 1)) 
+    exit_code=$(($exit_code + 1))
 fi
 # clean the test files.