fix(conflicts): merge main to develop

2023-08-24 14:26:10 +08:00 · 2023-08-24 14:26:10 +08:00 · 9eec3d9465
parent eee93b5a68 e1cefaef6b
commit 9eec3d9465
17 changed files with 276 additions and 60 deletions
--- a/.github/workflows/demo_in_readme.yaml
+++ b/.github/workflows/demo_in_readme.yaml
@ -39,7 +39,7 @@ jobs:
    needs: check-requirements
    runs-on: [lmtest]
    steps:
-    - name: mask env 
+    - name: mask env
      run: |
        echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
    - uses: actions/checkout@v3
@ -60,15 +60,29 @@ jobs:
    runs-on: [lmtest]
    timeout-minutes: 30
    steps:
-    - name: mask env 
+    - name: mask env
      run: |
        echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
    - uses: actions/checkout@v3

    - name: slurm-train
+      id: basic_train
      run: |
        source activate internlm-env-test
        sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
+
+    - name: load_preset_ckpt
+      if: ${{ failure() && steps.basic_train.conclusion == 'failure' }}
+      run: |
+        source activate internlm-env-test
+        export PYTHONPATH=$PWD:$PYTHONPATH
+        sh ./ci_scripts/train/load_ckpt.sh 7B_load_preset_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
+
+    - name: load_new_ckpt
+      run: |
+        source activate internlm-env-test
+        export PYTHONPATH=$PWD:$PYTHONPATH
+        sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
        rm -rf $GITHUB_WORKSPACE/llm_ckpts

    - name: torchrun-train
@ -91,18 +105,17 @@ jobs:
      run: |
        source activate internlm-env-test
        export PYTHONPATH=$PWD:$PYTHONPATH
-        sh ./ci_scripts/model/convert_to_hf.sh 
+        sh ./ci_scripts/model/convert_to_hf.sh
        cd ./hf_ckpt
        srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
        cd ..
        rm -rf $GITHUB_WORKSPACE/hf_ckpt
-  
  load-chat-model-in-hf:
    if: ${{ always() }}
    needs: check-requirements
    runs-on: [lmtest]
    steps:
-    - name: mask env 
+    - name: mask env
      run: |
        echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
    - uses: actions/checkout@v3
--- a/.github/workflows/lint_check.yaml
+++ b/.github/workflows/lint_check.yaml
@ -1,6 +1,6 @@
 name: lint-check

-on: 
+on:
  push:
  pull_request:
    branches:
--- a/.github/workflows/sonar.yaml
+++ b/.github/workflows/sonar.yaml
@ -1,7 +1,7 @@
 name: Sonarqube
 on:
  workflow_dispatch:
-     
+
 jobs:
  sonarqube:
    name: SonarQube Scan
@ -13,4 +13,4 @@ jobs:
      - uses: sonarsource/sonarqube-scan-action@master
        env:
          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
-          SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
+          SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
--- a/README-ja-JP.md
+++ b/README-ja-JP.md
@ -40,6 +40,10 @@ InternLM は、70 億のパラメータを持つベースモデルと、実用

 さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。

+## 新闻
+
+InternLM-7B-Chat v1.1 は、コード インタプリタと関数呼び出し機能を備えてリリースされました。 [Lagent](https://github.com/InternLM/lagent) で試すことができます。
+
 ## InternLM-7B

 ### パフォーマンス評価
@ -80,8 +84,8 @@ Transformers を使用して InternLM 7B チャットモデルをロードする

 ```python
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM
->>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
->>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
+>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
+>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
 >>> model = model.eval()
 >>> response, history = model.chat(tokenizer, "こんにちは", history=[])
 >>> print(response)
--- a/README-zh-Hans.md
+++ b/README-zh-Hans.md
@ -45,6 +45,10 @@ InternLM ，即书生·浦语大模型，包含面向实用场景的70亿参数

 提供了支持模型预训练的轻量级训练框架，无需安装大量依赖包，一套代码支持千卡预训练和单卡人类偏好对齐训练，同时实现了极致的性能优化，实现千卡训练下近90%加速效率。

+## 新闻
+
+我们开源了 InternLM-Chat-7B v1.1。该模型能够调用代码解释器和工具插件。你可以在 [Lagent](https://github.com/InternLM/lagent) 中体验这些新功能。
+
 ## InternLM-7B

 ### 性能评测
@ -74,6 +78,7 @@ InternLM ，即书生·浦语大模型，包含面向实用场景的70亿参数
 | 模型                 | InternLM 格式权重下载地址                                                                                                                      | Transformers 格式权重下载地址                    |
 | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
 | **InternLM 7B**      | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
+| **InternLM Chat 7B v1.1**    | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1)    | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1)       |
 | **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
 | **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k)

@ -85,8 +90,8 @@ InternLM ，即书生·浦语大模型，包含面向实用场景的70亿参数

 ```python
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM
->>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
->>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
+>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
+>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
 >>> model = model.eval()
 >>> response, history = model.chat(tokenizer, "你好", history=[])
 >>> print(response)
@ -117,26 +122,44 @@ streamlit run web_demo.py

 我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。

-1. 首先安装 LMDeploy:
+```bash
+python3 -m pip install lmdeploy
+```

-   ```bash
-   python3 -m pip install lmdeploy
-   ```
+执行以下命令，可以在终端与 `internlm-chat-7b` 模型进行交互式对话，或者通过 WebUI 与它聊天。

-2. 快速的部署命令如下：
+```bash
+# 转换权重格式
+python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b

-   ```bash
-   python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-7b/model
-   ```
+# 在终端进行交互式对话
+python3 -m lmdeploy.turbomind.chat ./workspace

-3. 在导出模型后，你可以直接通过如下命令启动服务，并在客户端与AI对话
+# 启动 gradio 服务
+python3 -m lmdeploy.serve.gradio.app ./workspace
+```
+以上过程中，LMDeploy 使用的是 FP16 的计算精度。

-   ```bash
-   bash workspace/service_docker_up.sh
-   python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
-   ```
+除了 FP16 精度，LMDeploy 还支持 `internlm-chat-7b` 4bit 权重模型推理。它不仅把模型的显存减少到 6G，大约只有 FP16 的 40%，更重要的是，经过 kernel 层面的极致优化，其推理性能在 A100-80G 上可达到 FP16 的 2.4 倍以上。
+
+以下是`internlm-chat-7b` 4bit 权重模型的部署方法。推理速度的 bechmark 请参考[这里](https://github.com/InternLM/lmdeploy/blob/main/docs/zh_cn/w4a16.md#%E6%8E%A8%E7%90%86%E9%80%9F%E5%BA%A6)
+
+```bash
+# download prequnantized internlm-chat-7b model from huggingface
+git-lfs install
+git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
+
+# Convert the model's layout and store it in the default path, ./workspace.
+python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
+
+# inference lmdeploy's turbomind engine
+python3 -m lmdeploy.turbomind.chat ./workspace
+
+# serving with gradio
+python3 -m lmdeploy.serve.gradio.app ./workspace
+```
+LMDeploy 是涵盖了 LLM 任务的全套轻量化、部署和服务的工具箱。请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。

-[LMDeploy](https://github.com/InternLM/LMDeploy) 支持了 InternLM 部署的完整流程，请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。

 ## 微调&训练

--- a/README.md
+++ b/README.md
@ -45,6 +45,10 @@ InternLM has open-sourced a 7 billion parameter base model and a chat model tail

 Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs.

+## News
+
+InternLM-7B-Chat v1.1 is released with code interpreter and function calling capability. You can try it with [Lagent](https://github.com/InternLM/lagent).
+
 ## InternLM-7B

 ### Performance Evaluation
@ -74,6 +78,7 @@ InternLM 7B and InternLM 7B Chat, trained using InternLM, have been open-sourced
 | Model                         | InternLM Format Weight Download Link                                                                                                                 | Transformers Format Weight Download Link                                         |
 | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
 | **InternLM 7B**         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b)         | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b)                 |
+| **InternLM Chat 7B v1.1**    | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1)    | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1)       |
 | **InternLM Chat 7B**    | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b)    | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)       |
 | **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) |

@ -85,8 +90,8 @@ To load the InternLM 7B Chat model using Transformers, use the following code:

 ```python
 >>> from transformers import AutoTokenizer, AutoModelForCausalLM
->>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
->>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
+>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
+>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
 >>> model = model.eval()
 >>> response, history = model.chat(tokenizer, "hello", history=[])
 >>> print(response)
@ -118,28 +123,45 @@ The effect is as follows

 ### Deployment

-We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the one-click deployment of InternLM.
+We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the workflow of InternLM deployment.

-1. First, install LMDeploy:
+```bash
+python3 -m pip install lmdeploy
+```

-    ```bash
-    python3 -m pip install lmdeploy
-    ```
+You can utilize the following commands to conduct `internlm-chat-7b` FP16 inference, serve it and interact with AI assistant via WebUI:

-2. Use the following command for quick deployment:
+```bash
+# convert weight layout
+python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b

-    ```bash
-    python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-chat-7b/model
-    ```
+# inference lmdeploy's turbomind engine
+python3 -m lmdeploy.turbomind.chat ./workspace

-3. After exporting the model, you can start a server and have a conversation with the deployed model using the following command:
-   
-    ```bash
-    bash workspace/service_docker_up.sh
-    python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
-    ```
+# serving with gradio
+python3 -m lmdeploy.serve.gradio.app ./workspace
+```

-[LMDeploy](https://github.com/InternLM/LMDeploy) provides a complete workflow for deploying InternLM. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
+You can also deploy 4-bit quantized `internlm-chat-7b` model via LMDeploy. It greatly trims down the model's memory overhead to 6G, just 40% of what FP16 inference would take. More importantly, with extreme optimized kernel, the inference performance achieves 2.4x faster than FP16 inference on A100-80G.
+
+Try the followings to enjoy 4-bit `internlm-chat-7b` on a Geforce RTX 30x GPU card. You can find the inference benchmark from [here](https://github.com/InternLM/lmdeploy/blob/main/docs/en/w4a16.md#inference-performance).
+
+```bash
+# download prequnantized internlm-chat-7b model from huggingface
+git-lfs install
+git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
+
+# Convert the model's layout and store it in the default path, ./workspace.
+python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
+
+# inference lmdeploy's turbomind engine
+python3 -m lmdeploy.turbomind.chat ./workspace
+
+# serving with gradio
+python3 -m lmdeploy.serve.gradio.app ./workspace
+```
+
+LMDeploy is an efficient toolkit for compressing, deploying, and serving LLM models. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.

 ## Fine-tuning & Training

--- a/ci_scripts/common/com_func.py
+++ b/ci_scripts/common/com_func.py
@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+
+def merge_dicts(dict_a: dict, dict_b: dict):
+    for key in dict_b.keys():
+        if isinstance(dict_b[key], dict):
+            dict_b[key] = {**dict_a[key], **dict_b[key]}
+            merge_dicts(dict_a[key], dict_b[key])
+    dict_c = {**dict_a, **dict_b}
+    return dict_c
+
+
+def format_dict_to_py_string(data: dict, indent=0, is_nested=False):
+    result = ""
+    for key, value in data.items():
+        if isinstance(value, dict):
+            result += f"{' ' * indent}{key} = dict(\n"
+            result += format_dict_to_py_string(value, indent + 4, is_nested=True)
+            result += f"{' ' * indent})"
+        else:
+            result += f"{' ' * indent}{key} = {repr(value)}"
+        if is_nested:
+            result += ","
+        result += "\n"
+    result = f"""\
+{result}
+"""
+    return result
--- a/ci_scripts/data/tokenizer_alpaca.sh
+++ b/ci_scripts/data/tokenizer_alpaca.sh
@ -16,7 +16,7 @@ exit_code=0

 source ./ci_scripts/common/basic_func.sh

-echo "start to test alpaca_tokenizer.py." 
+echo "start to test alpaca_tokenizer.py."

 if [[ -d ${RESULTS} ]]; then
    if ! rm -rf ${RESULTS}/*; then
--- a/ci_scripts/data/tokenizer_chinese.sh
+++ b/ci_scripts/data/tokenizer_chinese.sh
@ -12,7 +12,7 @@ exit_code=0

 source ./ci_scripts/common/basic_func.sh

-echo "start to test tokenizer.py." 
+echo "start to test tokenizer.py."

 num=$(num_files "${RESULTS}")
 if [[ ${num} -gt 0 ]]; then
--- a/ci_scripts/model/convert_to_hf.sh
+++ b/ci_scripts/model/convert_to_hf.sh
@ -40,7 +40,7 @@ num=$(num_files "${CKPTS_OUTPUT}")

 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
-    exit_code=$(($exit_code + 1)) 
+    exit_code=$(($exit_code + 1))
 fi

 # NOTICE: should not remove the cached files, because the cached files will be used in the next test case.
--- a/ci_scripts/model/demo_load_7B_chat_model.py
+++ b/ci_scripts/model/demo_load_7B_chat_model.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- encoding: utf-8 -*-
-from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import AutoModelForCausalLM, AutoTokenizer

 tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
--- a/ci_scripts/train/ci_7B_sft.py
+++ b/ci_scripts/train/ci_7B_sft.py
@ -10,12 +10,11 @@ VOCAB_SIZE = 103168
 # Ckpt folder format:
 # fs: 'local:/mnt/nfs/XXX'
 # oss: 'boto3:s3://model_weights/XXX'
-MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
+# MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
 # SAVE_CKPT_FOLDER = "local:llm_ckpts"
 SAVE_CKPT_FOLDER = "local:llm_ckpts"
 # LOAD_CKPT_FOLDER = "local:llm_ckpts/49"
 ckpt = dict(
-    enable_save_ckpt=True,
    # Path to save training ckpt.
    save_ckpt_folder=SAVE_CKPT_FOLDER,
    # Path to continue training ckpt (load model weights and scheduler/context states).
@ -27,7 +26,7 @@ ckpt = dict(
    load_optimizer=True,
 )

-TRAIN_FOLDER = "/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/train/en"
+TRAIN_FOLDER = "local:../lm_data/alpaca_data/train/en"
 data = dict(
    seq_len=SEQ_LEN,
    # micro_num means the number of micro_batch contained in one gradient update
@ -120,8 +119,8 @@ zero1 parallel:
    2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters.
    3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size.
        For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8.
-pipeline parallel: pipeline parallel size.
-tensor parallel: tensor parallel size, usually the number of GPUs per node.
+pipeline parallel: pipeline parallel size, only 1 is accepted currently.
+tensor parallel: tensor parallel size, usually the number of GPUs per node, only 1 is accepted currently.
 """
 parallel = dict(
    zero1=8,
--- a/ci_scripts/train/generate_config.py
+++ b/ci_scripts/train/generate_config.py
@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+import argparse
+import json
+import os
+
+from ci_scripts.common import com_func
+from internlm.core.context import Config
+
+
+def generate_new_config(config_py_file, test_config_json, case_name):
+    # generate path of the new config py
+    config_path = os.path.split(config_py_file)
+    new_config_py_file = os.path.join(config_path[0], case_name + ".py")
+
+    # merge dict
+    origin_config = Config.from_file(config_py_file)
+    with open(test_config_json) as f:
+        test_config = json.load(f)
+    if test_config:
+        if case_name not in test_config.keys():
+            raise KeyError(f"the {case_name} doesn't exist.Please check {test_config} again!")
+    new_config = com_func.merge_dicts(origin_config, test_config[case_name])
+    print(f"new config is:\n{new_config}")
+
+    # write new config to py file
+    file_content = com_func.format_dict_to_py_string(new_config)
+    with open(new_config_py_file, "w") as f:
+        f.write(file_content)
+    print(f"The new test train config file is {new_config_py_file}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--origin_config",
+        type=str,
+        default="./ci_scripts/train/ci_7B_sft.py",
+        help="path to the origin train config file",
+    )
+    parser.add_argument(
+        "--test_config",
+        type=str,
+        default="./ci_scripts/train/test_config.json",
+        help="path to the test train config file",
+    )
+    parser.add_argument("--case_name", type=str, help="name of the case which will be runned ")
+    args = parser.parse_args()
+    generate_new_config(args.origin_config, args.test_config, args.case_name)
--- a/ci_scripts/train/load_ckpt.sh
+++ b/ci_scripts/train/load_ckpt.sh
@ -0,0 +1,38 @@
+#!/bin/bash
+set -x
+
+[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
+readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
+readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
+readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
+expected_num=21
+exit_code=0
+
+source ./ci_scripts/common/basic_func.sh
+
+echo "start to test slurm training with loading checkpoint."
+
+python ./ci_scripts/train/generate_config.py --case_name $1
+file="./ci_scripts/train/$1.py"
+if [[ ! -f ${file} ]]; then
+        echo "expect: ${file} exists, actual: not exist."
+        exit_code=$(($exit_code + 1))
+    fi
+
+srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
+[[ $? -ne 0 ]] && { echo "test slurm training failed.";  exit_code=$(($exit_code + 1)); }
+
+
+num=$(num_files "${CKPTS40_OUTPUT}")
+if [[ ${num} -ne ${expected_num} ]]; then
+    echo "expect: ${expected_num} files, actual: ${num} files."
+    exit_code=$(($exit_code + 1))
+fi
+
+# clean the test files.
+if ! rm -rf ${CKPTS_PATH}/*; then
+    echo "cleaning cached file in ${CKPTS_PATH} failed."
+    exit_code=$(($exit_code + 1))
+fi
+
+exit $exit_code
--- a/ci_scripts/train/slurm_train.sh
+++ b/ci_scripts/train/slurm_train.sh
@ -25,12 +25,6 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --
 num=$(num_files "${CKPTS20_OUTPUT}")
 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
-    exit_code=$(($exit_code + 1)) 
-fi
-
-# clean the test files.
-if ! rm -rf ${CKPTS_PATH}/*; then
-    echo "cleaning cached file in ${CKPTS_PATH} failed."
    exit_code=$(($exit_code + 1))
 fi

--- a/ci_scripts/train/test_config.json
+++ b/ci_scripts/train/test_config.json
@ -0,0 +1,45 @@
+{
+    "7B_basic_train": {
+        "SEQ_LEN": 1024,
+        "HIDDEN_SIZE": 2048,
+        "NUM_ATTENTION_HEAD": 16,
+        "NUM_LAYER": 16,
+        "TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
+        "ckpt": {
+            "checkpoint_every": 20
+        },
+        "data": {
+            "total_steps": 20
+        }
+    },
+    "7B_load_new_ckpt": {
+        "SEQ_LEN": 1024,
+        "HIDDEN_SIZE": 2048,
+        "NUM_ATTENTION_HEAD": 16,
+        "NUM_LAYER": 16,
+        "TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
+        "LOAD_CKPT_FOLDER": "local:llm_ckpts/20",
+        "ckpt": {
+            "load_ckpt_folder": "local:llm_ckpts/20",
+            "checkpoint_every": 20
+        },
+        "data": {
+            "total_steps": 40
+        }
+    },
+    "7B_load_preset_ckpt": {
+        "SEQ_LEN": 1024,
+        "HIDDEN_SIZE": 2048,
+        "NUM_ATTENTION_HEAD": 16,
+        "NUM_LAYER": 16,
+        "TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
+        "LOAD_CKPT_FOLDER": "local:../lm_data/alpaca_data/llm_ckpts/20",
+        "ckpt": {
+            "load_ckpt_folder": "local:../lm_data/alpaca_data/llm_ckpts/20",
+            "checkpoint_every": 20
+        },
+        "data": {
+            "total_steps": 40
+        }
+    }
+}
--- a/ci_scripts/train/torchrun.sh
+++ b/ci_scripts/train/torchrun.sh
@ -25,7 +25,7 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --
 num=$(num_files "${CKPTS_OUTPUT}")
 if [[ ${num} -ne ${expected_num} ]]; then
    echo "expect: ${expected_num} files, actual: ${num} files."
-    exit_code=$(($exit_code + 1)) 
+    exit_code=$(($exit_code + 1))
 fi

 # clean the test files.