fix(conflicts): merge main to develop

pull/227/head^2
huangting4201 2023-08-24 14:26:10 +08:00
commit 9eec3d9465
17 changed files with 276 additions and 60 deletions

View File

@ -39,7 +39,7 @@ jobs:
needs: check-requirements needs: check-requirements
runs-on: [lmtest] runs-on: [lmtest]
steps: steps:
- name: mask env - name: mask env
run: | run: |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -60,15 +60,29 @@ jobs:
runs-on: [lmtest] runs-on: [lmtest]
timeout-minutes: 30 timeout-minutes: 30
steps: steps:
- name: mask env - name: mask env
run: | run: |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: slurm-train - name: slurm-train
id: basic_train
run: | run: |
source activate internlm-env-test source activate internlm-env-test
sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB} sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
- name: load_preset_ckpt
if: ${{ failure() && steps.basic_train.conclusion == 'failure' }}
run: |
source activate internlm-env-test
export PYTHONPATH=$PWD:$PYTHONPATH
sh ./ci_scripts/train/load_ckpt.sh 7B_load_preset_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
- name: load_new_ckpt
run: |
source activate internlm-env-test
export PYTHONPATH=$PWD:$PYTHONPATH
sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
rm -rf $GITHUB_WORKSPACE/llm_ckpts rm -rf $GITHUB_WORKSPACE/llm_ckpts
- name: torchrun-train - name: torchrun-train
@ -91,18 +105,17 @@ jobs:
run: | run: |
source activate internlm-env-test source activate internlm-env-test
export PYTHONPATH=$PWD:$PYTHONPATH export PYTHONPATH=$PWD:$PYTHONPATH
sh ./ci_scripts/model/convert_to_hf.sh sh ./ci_scripts/model/convert_to_hf.sh
cd ./hf_ckpt cd ./hf_ckpt
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
cd .. cd ..
rm -rf $GITHUB_WORKSPACE/hf_ckpt rm -rf $GITHUB_WORKSPACE/hf_ckpt
load-chat-model-in-hf: load-chat-model-in-hf:
if: ${{ always() }} if: ${{ always() }}
needs: check-requirements needs: check-requirements
runs-on: [lmtest] runs-on: [lmtest]
steps: steps:
- name: mask env - name: mask env
run: | run: |
echo "::add-mask::${{env.WORKSPACE_PREFIX}}" echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -1,6 +1,6 @@
name: lint-check name: lint-check
on: on:
push: push:
pull_request: pull_request:
branches: branches:

View File

@ -1,7 +1,7 @@
name: Sonarqube name: Sonarqube
on: on:
workflow_dispatch: workflow_dispatch:
jobs: jobs:
sonarqube: sonarqube:
name: SonarQube Scan name: SonarQube Scan
@ -13,4 +13,4 @@ jobs:
- uses: sonarsource/sonarqube-scan-action@master - uses: sonarsource/sonarqube-scan-action@master
env: env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }} SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}

View File

@ -40,6 +40,10 @@ InternLM は、70 億のパラメータを持つベースモデルと、実用
さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。 さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。
## 新闻
InternLM-7B-Chat v1.1 は、コード インタプリタと関数呼び出し機能を備えてリリースされました。 [Lagent](https://github.com/InternLM/lagent) で試すことができます。
## InternLM-7B ## InternLM-7B
### パフォーマンス評価 ### パフォーマンス評価
@ -80,8 +84,8 @@ Transformers を使用して InternLM 7B チャットモデルをロードする
```python ```python
>>> from transformers import AutoTokenizer, AutoModelForCausalLM >>> from transformers import AutoTokenizer, AutoModelForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True) >>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda() >>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
>>> model = model.eval() >>> model = model.eval()
>>> response, history = model.chat(tokenizer, "こんにちは", history=[]) >>> response, history = model.chat(tokenizer, "こんにちは", history=[])
>>> print(response) >>> print(response)

View File

@ -45,6 +45,10 @@ InternLM 即书生·浦语大模型包含面向实用场景的70亿参数
提供了支持模型预训练的轻量级训练框架无需安装大量依赖包一套代码支持千卡预训练和单卡人类偏好对齐训练同时实现了极致的性能优化实现千卡训练下近90%加速效率。 提供了支持模型预训练的轻量级训练框架无需安装大量依赖包一套代码支持千卡预训练和单卡人类偏好对齐训练同时实现了极致的性能优化实现千卡训练下近90%加速效率。
## 新闻
我们开源了 InternLM-Chat-7B v1.1。该模型能够调用代码解释器和工具插件。你可以在 [Lagent](https://github.com/InternLM/lagent) 中体验这些新功能。
## InternLM-7B ## InternLM-7B
### 性能评测 ### 性能评测
@ -74,6 +78,7 @@ InternLM 即书生·浦语大模型包含面向实用场景的70亿参数
| 模型 | InternLM 格式权重下载地址 | Transformers 格式权重下载地址 | | 模型 | InternLM 格式权重下载地址 | Transformers 格式权重下载地址 |
| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) | | **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
| **InternLM Chat 7B v1.1** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1) | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1) |
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) | **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k)
@ -85,8 +90,8 @@ InternLM 即书生·浦语大模型包含面向实用场景的70亿参数
```python ```python
>>> from transformers import AutoTokenizer, AutoModelForCausalLM >>> from transformers import AutoTokenizer, AutoModelForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True) >>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda() >>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
>>> model = model.eval() >>> model = model.eval()
>>> response, history = model.chat(tokenizer, "你好", history=[]) >>> response, history = model.chat(tokenizer, "你好", history=[])
>>> print(response) >>> print(response)
@ -117,26 +122,44 @@ streamlit run web_demo.py
我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。 我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。
1. 首先安装 LMDeploy: ```bash
python3 -m pip install lmdeploy
```
```bash 执行以下命令,可以在终端与 `internlm-chat-7b` 模型进行交互式对话,或者通过 WebUI 与它聊天。
python3 -m pip install lmdeploy
```
2. 快速的部署命令如下: ```bash
# 转换权重格式
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
```bash # 在终端进行交互式对话
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-7b/model python3 -m lmdeploy.turbomind.chat ./workspace
```
3. 在导出模型后你可以直接通过如下命令启动服务并在客户端与AI对话 # 启动 gradio 服务
python3 -m lmdeploy.serve.gradio.app ./workspace
```
以上过程中LMDeploy 使用的是 FP16 的计算精度。
```bash 除了 FP16 精度LMDeploy 还支持 `internlm-chat-7b` 4bit 权重模型推理。它不仅把模型的显存减少到 6G大约只有 FP16 的 40%,更重要的是,经过 kernel 层面的极致优化,其推理性能在 A100-80G 上可达到 FP16 的 2.4 倍以上。
bash workspace/service_docker_up.sh
python3 -m lmdeploy.serve.client {server_ip_addresss}:33337 以下是`internlm-chat-7b` 4bit 权重模型的部署方法。推理速度的 bechmark 请参考[这里](https://github.com/InternLM/lmdeploy/blob/main/docs/zh_cn/w4a16.md#%E6%8E%A8%E7%90%86%E9%80%9F%E5%BA%A6)
```
```bash
# download prequnantized internlm-chat-7b model from huggingface
git-lfs install
git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
# Convert the model's layout and store it in the default path, ./workspace.
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
# inference lmdeploy's turbomind engine
python3 -m lmdeploy.turbomind.chat ./workspace
# serving with gradio
python3 -m lmdeploy.serve.gradio.app ./workspace
```
LMDeploy 是涵盖了 LLM 任务的全套轻量化、部署和服务的工具箱。请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
[LMDeploy](https://github.com/InternLM/LMDeploy) 支持了 InternLM 部署的完整流程,请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
## 微调&训练 ## 微调&训练

View File

@ -45,6 +45,10 @@ InternLM has open-sourced a 7 billion parameter base model and a chat model tail
Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs. Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs.
## News
InternLM-7B-Chat v1.1 is released with code interpreter and function calling capability. You can try it with [Lagent](https://github.com/InternLM/lagent).
## InternLM-7B ## InternLM-7B
### Performance Evaluation ### Performance Evaluation
@ -74,6 +78,7 @@ InternLM 7B and InternLM 7B Chat, trained using InternLM, have been open-sourced
| Model | InternLM Format Weight Download Link | Transformers Format Weight Download Link | | Model | InternLM Format Weight Download Link | Transformers Format Weight Download Link |
| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | | ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) | | **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
| **InternLM Chat 7B v1.1** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1) | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1) |
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | | **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) |
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) | | **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) |
@ -85,8 +90,8 @@ To load the InternLM 7B Chat model using Transformers, use the following code:
```python ```python
>>> from transformers import AutoTokenizer, AutoModelForCausalLM >>> from transformers import AutoTokenizer, AutoModelForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True) >>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda() >>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
>>> model = model.eval() >>> model = model.eval()
>>> response, history = model.chat(tokenizer, "hello", history=[]) >>> response, history = model.chat(tokenizer, "hello", history=[])
>>> print(response) >>> print(response)
@ -118,28 +123,45 @@ The effect is as follows
### Deployment ### Deployment
We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the one-click deployment of InternLM. We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the workflow of InternLM deployment.
1. First, install LMDeploy: ```bash
python3 -m pip install lmdeploy
```
```bash You can utilize the following commands to conduct `internlm-chat-7b` FP16 inference, serve it and interact with AI assistant via WebUI:
python3 -m pip install lmdeploy
```
2. Use the following command for quick deployment: ```bash
# convert weight layout
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
```bash # inference lmdeploy's turbomind engine
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-chat-7b/model python3 -m lmdeploy.turbomind.chat ./workspace
```
3. After exporting the model, you can start a server and have a conversation with the deployed model using the following command: # serving with gradio
python3 -m lmdeploy.serve.gradio.app ./workspace
```bash ```
bash workspace/service_docker_up.sh
python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
```
[LMDeploy](https://github.com/InternLM/LMDeploy) provides a complete workflow for deploying InternLM. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM. You can also deploy 4-bit quantized `internlm-chat-7b` model via LMDeploy. It greatly trims down the model's memory overhead to 6G, just 40% of what FP16 inference would take. More importantly, with extreme optimized kernel, the inference performance achieves 2.4x faster than FP16 inference on A100-80G.
Try the followings to enjoy 4-bit `internlm-chat-7b` on a Geforce RTX 30x GPU card. You can find the inference benchmark from [here](https://github.com/InternLM/lmdeploy/blob/main/docs/en/w4a16.md#inference-performance).
```bash
# download prequnantized internlm-chat-7b model from huggingface
git-lfs install
git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
# Convert the model's layout and store it in the default path, ./workspace.
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
# inference lmdeploy's turbomind engine
python3 -m lmdeploy.turbomind.chat ./workspace
# serving with gradio
python3 -m lmdeploy.serve.gradio.app ./workspace
```
LMDeploy is an efficient toolkit for compressing, deploying, and serving LLM models. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
## Fine-tuning & Training ## Fine-tuning & Training

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
def merge_dicts(dict_a: dict, dict_b: dict):
for key in dict_b.keys():
if isinstance(dict_b[key], dict):
dict_b[key] = {**dict_a[key], **dict_b[key]}
merge_dicts(dict_a[key], dict_b[key])
dict_c = {**dict_a, **dict_b}
return dict_c
def format_dict_to_py_string(data: dict, indent=0, is_nested=False):
result = ""
for key, value in data.items():
if isinstance(value, dict):
result += f"{' ' * indent}{key} = dict(\n"
result += format_dict_to_py_string(value, indent + 4, is_nested=True)
result += f"{' ' * indent})"
else:
result += f"{' ' * indent}{key} = {repr(value)}"
if is_nested:
result += ","
result += "\n"
result = f"""\
{result}
"""
return result

View File

@ -16,7 +16,7 @@ exit_code=0
source ./ci_scripts/common/basic_func.sh source ./ci_scripts/common/basic_func.sh
echo "start to test alpaca_tokenizer.py." echo "start to test alpaca_tokenizer.py."
if [[ -d ${RESULTS} ]]; then if [[ -d ${RESULTS} ]]; then
if ! rm -rf ${RESULTS}/*; then if ! rm -rf ${RESULTS}/*; then

View File

@ -12,7 +12,7 @@ exit_code=0
source ./ci_scripts/common/basic_func.sh source ./ci_scripts/common/basic_func.sh
echo "start to test tokenizer.py." echo "start to test tokenizer.py."
num=$(num_files "${RESULTS}") num=$(num_files "${RESULTS}")
if [[ ${num} -gt 0 ]]; then if [[ ${num} -gt 0 ]]; then

View File

@ -40,7 +40,7 @@ num=$(num_files "${CKPTS_OUTPUT}")
if [[ ${num} -ne ${expected_num} ]]; then if [[ ${num} -ne ${expected_num} ]]; then
echo "expect: ${expected_num} files, actual: ${num} files." echo "expect: ${expected_num} files, actual: ${num} files."
exit_code=$(($exit_code + 1)) exit_code=$(($exit_code + 1))
fi fi
# NOTICE: should not remove the cached files, because the cached files will be used in the next test case. # NOTICE: should not remove the cached files, because the cached files will be used in the next test case.

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
from transformers import AutoTokenizer, AutoModelForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda() model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()

View File

@ -10,12 +10,11 @@ VOCAB_SIZE = 103168
# Ckpt folder format: # Ckpt folder format:
# fs: 'local:/mnt/nfs/XXX' # fs: 'local:/mnt/nfs/XXX'
# oss: 'boto3:s3://model_weights/XXX' # oss: 'boto3:s3://model_weights/XXX'
MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx" # MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
# SAVE_CKPT_FOLDER = "local:llm_ckpts" # SAVE_CKPT_FOLDER = "local:llm_ckpts"
SAVE_CKPT_FOLDER = "local:llm_ckpts" SAVE_CKPT_FOLDER = "local:llm_ckpts"
# LOAD_CKPT_FOLDER = "local:llm_ckpts/49" # LOAD_CKPT_FOLDER = "local:llm_ckpts/49"
ckpt = dict( ckpt = dict(
enable_save_ckpt=True,
# Path to save training ckpt. # Path to save training ckpt.
save_ckpt_folder=SAVE_CKPT_FOLDER, save_ckpt_folder=SAVE_CKPT_FOLDER,
# Path to continue training ckpt (load model weights and scheduler/context states). # Path to continue training ckpt (load model weights and scheduler/context states).
@ -27,7 +26,7 @@ ckpt = dict(
load_optimizer=True, load_optimizer=True,
) )
TRAIN_FOLDER = "/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/train/en" TRAIN_FOLDER = "local:../lm_data/alpaca_data/train/en"
data = dict( data = dict(
seq_len=SEQ_LEN, seq_len=SEQ_LEN,
# micro_num means the number of micro_batch contained in one gradient update # micro_num means the number of micro_batch contained in one gradient update
@ -120,8 +119,8 @@ zero1 parallel:
2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters. 2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters.
3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size. 3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size.
For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8. For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8.
pipeline parallel: pipeline parallel size. pipeline parallel: pipeline parallel size, only 1 is accepted currently.
tensor parallel: tensor parallel size, usually the number of GPUs per node. tensor parallel: tensor parallel size, usually the number of GPUs per node, only 1 is accepted currently.
""" """
parallel = dict( parallel = dict(
zero1=8, zero1=8,

View File

@ -0,0 +1,49 @@
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import argparse
import json
import os
from ci_scripts.common import com_func
from internlm.core.context import Config
def generate_new_config(config_py_file, test_config_json, case_name):
# generate path of the new config py
config_path = os.path.split(config_py_file)
new_config_py_file = os.path.join(config_path[0], case_name + ".py")
# merge dict
origin_config = Config.from_file(config_py_file)
with open(test_config_json) as f:
test_config = json.load(f)
if test_config:
if case_name not in test_config.keys():
raise KeyError(f"the {case_name} doesn't exist.Please check {test_config} again!")
new_config = com_func.merge_dicts(origin_config, test_config[case_name])
print(f"new config is:\n{new_config}")
# write new config to py file
file_content = com_func.format_dict_to_py_string(new_config)
with open(new_config_py_file, "w") as f:
f.write(file_content)
print(f"The new test train config file is {new_config_py_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--origin_config",
type=str,
default="./ci_scripts/train/ci_7B_sft.py",
help="path to the origin train config file",
)
parser.add_argument(
"--test_config",
type=str,
default="./ci_scripts/train/test_config.json",
help="path to the test train config file",
)
parser.add_argument("--case_name", type=str, help="name of the case which will be runned ")
args = parser.parse_args()
generate_new_config(args.origin_config, args.test_config, args.case_name)

View File

@ -0,0 +1,38 @@
#!/bin/bash
set -x
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
expected_num=21
exit_code=0
source ./ci_scripts/common/basic_func.sh
echo "start to test slurm training with loading checkpoint."
python ./ci_scripts/train/generate_config.py --case_name $1
file="./ci_scripts/train/$1.py"
if [[ ! -f ${file} ]]; then
echo "expect: ${file} exists, actual: not exist."
exit_code=$(($exit_code + 1))
fi
srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
[[ $? -ne 0 ]] && { echo "test slurm training failed."; exit_code=$(($exit_code + 1)); }
num=$(num_files "${CKPTS40_OUTPUT}")
if [[ ${num} -ne ${expected_num} ]]; then
echo "expect: ${expected_num} files, actual: ${num} files."
exit_code=$(($exit_code + 1))
fi
# clean the test files.
if ! rm -rf ${CKPTS_PATH}/*; then
echo "cleaning cached file in ${CKPTS_PATH} failed."
exit_code=$(($exit_code + 1))
fi
exit $exit_code

View File

@ -25,12 +25,6 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --
num=$(num_files "${CKPTS20_OUTPUT}") num=$(num_files "${CKPTS20_OUTPUT}")
if [[ ${num} -ne ${expected_num} ]]; then if [[ ${num} -ne ${expected_num} ]]; then
echo "expect: ${expected_num} files, actual: ${num} files." echo "expect: ${expected_num} files, actual: ${num} files."
exit_code=$(($exit_code + 1))
fi
# clean the test files.
if ! rm -rf ${CKPTS_PATH}/*; then
echo "cleaning cached file in ${CKPTS_PATH} failed."
exit_code=$(($exit_code + 1)) exit_code=$(($exit_code + 1))
fi fi

View File

@ -0,0 +1,45 @@
{
"7B_basic_train": {
"SEQ_LEN": 1024,
"HIDDEN_SIZE": 2048,
"NUM_ATTENTION_HEAD": 16,
"NUM_LAYER": 16,
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
"ckpt": {
"checkpoint_every": 20
},
"data": {
"total_steps": 20
}
},
"7B_load_new_ckpt": {
"SEQ_LEN": 1024,
"HIDDEN_SIZE": 2048,
"NUM_ATTENTION_HEAD": 16,
"NUM_LAYER": 16,
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
"LOAD_CKPT_FOLDER": "local:llm_ckpts/20",
"ckpt": {
"load_ckpt_folder": "local:llm_ckpts/20",
"checkpoint_every": 20
},
"data": {
"total_steps": 40
}
},
"7B_load_preset_ckpt": {
"SEQ_LEN": 1024,
"HIDDEN_SIZE": 2048,
"NUM_ATTENTION_HEAD": 16,
"NUM_LAYER": 16,
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
"LOAD_CKPT_FOLDER": "local:../lm_data/alpaca_data/llm_ckpts/20",
"ckpt": {
"load_ckpt_folder": "local:../lm_data/alpaca_data/llm_ckpts/20",
"checkpoint_every": 20
},
"data": {
"total_steps": 40
}
}
}

View File

@ -25,7 +25,7 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --
num=$(num_files "${CKPTS_OUTPUT}") num=$(num_files "${CKPTS_OUTPUT}")
if [[ ${num} -ne ${expected_num} ]]; then if [[ ${num} -ne ${expected_num} ]]; then
echo "expect: ${expected_num} files, actual: ${num} files." echo "expect: ${expected_num} files, actual: ${num} files."
exit_code=$(($exit_code + 1)) exit_code=$(($exit_code + 1))
fi fi
# clean the test files. # clean the test files.