mirror of https://github.com/InternLM/InternLM
fix(conflicts): merge main to develop
commit
9eec3d9465
|
@ -39,7 +39,7 @@ jobs:
|
||||||
needs: check-requirements
|
needs: check-requirements
|
||||||
runs-on: [lmtest]
|
runs-on: [lmtest]
|
||||||
steps:
|
steps:
|
||||||
- name: mask env
|
- name: mask env
|
||||||
run: |
|
run: |
|
||||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
@ -60,15 +60,29 @@ jobs:
|
||||||
runs-on: [lmtest]
|
runs-on: [lmtest]
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
steps:
|
steps:
|
||||||
- name: mask env
|
- name: mask env
|
||||||
run: |
|
run: |
|
||||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: slurm-train
|
- name: slurm-train
|
||||||
|
id: basic_train
|
||||||
run: |
|
run: |
|
||||||
source activate internlm-env-test
|
source activate internlm-env-test
|
||||||
sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||||
|
|
||||||
|
- name: load_preset_ckpt
|
||||||
|
if: ${{ failure() && steps.basic_train.conclusion == 'failure' }}
|
||||||
|
run: |
|
||||||
|
source activate internlm-env-test
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
sh ./ci_scripts/train/load_ckpt.sh 7B_load_preset_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||||
|
|
||||||
|
- name: load_new_ckpt
|
||||||
|
run: |
|
||||||
|
source activate internlm-env-test
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||||
rm -rf $GITHUB_WORKSPACE/llm_ckpts
|
rm -rf $GITHUB_WORKSPACE/llm_ckpts
|
||||||
|
|
||||||
- name: torchrun-train
|
- name: torchrun-train
|
||||||
|
@ -91,18 +105,17 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
source activate internlm-env-test
|
source activate internlm-env-test
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
sh ./ci_scripts/model/convert_to_hf.sh
|
sh ./ci_scripts/model/convert_to_hf.sh
|
||||||
cd ./hf_ckpt
|
cd ./hf_ckpt
|
||||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
|
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
|
||||||
cd ..
|
cd ..
|
||||||
rm -rf $GITHUB_WORKSPACE/hf_ckpt
|
rm -rf $GITHUB_WORKSPACE/hf_ckpt
|
||||||
|
|
||||||
load-chat-model-in-hf:
|
load-chat-model-in-hf:
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
needs: check-requirements
|
needs: check-requirements
|
||||||
runs-on: [lmtest]
|
runs-on: [lmtest]
|
||||||
steps:
|
steps:
|
||||||
- name: mask env
|
- name: mask env
|
||||||
run: |
|
run: |
|
||||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
name: lint-check
|
name: lint-check
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
name: Sonarqube
|
name: Sonarqube
|
||||||
on:
|
on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
sonarqube:
|
sonarqube:
|
||||||
name: SonarQube Scan
|
name: SonarQube Scan
|
||||||
|
@ -13,4 +13,4 @@ jobs:
|
||||||
- uses: sonarsource/sonarqube-scan-action@master
|
- uses: sonarsource/sonarqube-scan-action@master
|
||||||
env:
|
env:
|
||||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||||
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
||||||
|
|
|
@ -40,6 +40,10 @@ InternLM は、70 億のパラメータを持つベースモデルと、実用
|
||||||
|
|
||||||
さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。
|
さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。
|
||||||
|
|
||||||
|
## 新闻
|
||||||
|
|
||||||
|
InternLM-7B-Chat v1.1 は、コード インタプリタと関数呼び出し機能を備えてリリースされました。 [Lagent](https://github.com/InternLM/lagent) で試すことができます。
|
||||||
|
|
||||||
## InternLM-7B
|
## InternLM-7B
|
||||||
|
|
||||||
### パフォーマンス評価
|
### パフォーマンス評価
|
||||||
|
@ -80,8 +84,8 @@ Transformers を使用して InternLM 7B チャットモデルをロードする
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
|
||||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
|
||||||
>>> model = model.eval()
|
>>> model = model.eval()
|
||||||
>>> response, history = model.chat(tokenizer, "こんにちは", history=[])
|
>>> response, history = model.chat(tokenizer, "こんにちは", history=[])
|
||||||
>>> print(response)
|
>>> print(response)
|
||||||
|
|
|
@ -45,6 +45,10 @@ InternLM ,即书生·浦语大模型,包含面向实用场景的70亿参数
|
||||||
|
|
||||||
提供了支持模型预训练的轻量级训练框架,无需安装大量依赖包,一套代码支持千卡预训练和单卡人类偏好对齐训练,同时实现了极致的性能优化,实现千卡训练下近90%加速效率。
|
提供了支持模型预训练的轻量级训练框架,无需安装大量依赖包,一套代码支持千卡预训练和单卡人类偏好对齐训练,同时实现了极致的性能优化,实现千卡训练下近90%加速效率。
|
||||||
|
|
||||||
|
## 新闻
|
||||||
|
|
||||||
|
我们开源了 InternLM-Chat-7B v1.1。该模型能够调用代码解释器和工具插件。你可以在 [Lagent](https://github.com/InternLM/lagent) 中体验这些新功能。
|
||||||
|
|
||||||
## InternLM-7B
|
## InternLM-7B
|
||||||
|
|
||||||
### 性能评测
|
### 性能评测
|
||||||
|
@ -74,6 +78,7 @@ InternLM ,即书生·浦语大模型,包含面向实用场景的70亿参数
|
||||||
| 模型 | InternLM 格式权重下载地址 | Transformers 格式权重下载地址 |
|
| 模型 | InternLM 格式权重下载地址 | Transformers 格式权重下载地址 |
|
||||||
| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
|
| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
|
||||||
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
|
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
|
||||||
|
| **InternLM Chat 7B v1.1** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1) | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1) |
|
||||||
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
|
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
|
||||||
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k)
|
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k)
|
||||||
|
|
||||||
|
@ -85,8 +90,8 @@ InternLM ,即书生·浦语大模型,包含面向实用场景的70亿参数
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
|
||||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
|
||||||
>>> model = model.eval()
|
>>> model = model.eval()
|
||||||
>>> response, history = model.chat(tokenizer, "你好", history=[])
|
>>> response, history = model.chat(tokenizer, "你好", history=[])
|
||||||
>>> print(response)
|
>>> print(response)
|
||||||
|
@ -117,26 +122,44 @@ streamlit run web_demo.py
|
||||||
|
|
||||||
我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。
|
我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。
|
||||||
|
|
||||||
1. 首先安装 LMDeploy:
|
```bash
|
||||||
|
python3 -m pip install lmdeploy
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
执行以下命令,可以在终端与 `internlm-chat-7b` 模型进行交互式对话,或者通过 WebUI 与它聊天。
|
||||||
python3 -m pip install lmdeploy
|
|
||||||
```
|
|
||||||
|
|
||||||
2. 快速的部署命令如下:
|
```bash
|
||||||
|
# 转换权重格式
|
||||||
|
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
|
||||||
|
|
||||||
```bash
|
# 在终端进行交互式对话
|
||||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-7b/model
|
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||||
```
|
|
||||||
|
|
||||||
3. 在导出模型后,你可以直接通过如下命令启动服务,并在客户端与AI对话
|
# 启动 gradio 服务
|
||||||
|
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||||
|
```
|
||||||
|
以上过程中,LMDeploy 使用的是 FP16 的计算精度。
|
||||||
|
|
||||||
```bash
|
除了 FP16 精度,LMDeploy 还支持 `internlm-chat-7b` 4bit 权重模型推理。它不仅把模型的显存减少到 6G,大约只有 FP16 的 40%,更重要的是,经过 kernel 层面的极致优化,其推理性能在 A100-80G 上可达到 FP16 的 2.4 倍以上。
|
||||||
bash workspace/service_docker_up.sh
|
|
||||||
python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
|
以下是`internlm-chat-7b` 4bit 权重模型的部署方法。推理速度的 bechmark 请参考[这里](https://github.com/InternLM/lmdeploy/blob/main/docs/zh_cn/w4a16.md#%E6%8E%A8%E7%90%86%E9%80%9F%E5%BA%A6)
|
||||||
```
|
|
||||||
|
```bash
|
||||||
|
# download prequnantized internlm-chat-7b model from huggingface
|
||||||
|
git-lfs install
|
||||||
|
git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
|
||||||
|
|
||||||
|
# Convert the model's layout and store it in the default path, ./workspace.
|
||||||
|
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
|
||||||
|
|
||||||
|
# inference lmdeploy's turbomind engine
|
||||||
|
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||||
|
|
||||||
|
# serving with gradio
|
||||||
|
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||||
|
```
|
||||||
|
LMDeploy 是涵盖了 LLM 任务的全套轻量化、部署和服务的工具箱。请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
|
||||||
|
|
||||||
[LMDeploy](https://github.com/InternLM/LMDeploy) 支持了 InternLM 部署的完整流程,请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
|
|
||||||
|
|
||||||
## 微调&训练
|
## 微调&训练
|
||||||
|
|
||||||
|
|
58
README.md
58
README.md
|
@ -45,6 +45,10 @@ InternLM has open-sourced a 7 billion parameter base model and a chat model tail
|
||||||
|
|
||||||
Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs.
|
Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs.
|
||||||
|
|
||||||
|
## News
|
||||||
|
|
||||||
|
InternLM-7B-Chat v1.1 is released with code interpreter and function calling capability. You can try it with [Lagent](https://github.com/InternLM/lagent).
|
||||||
|
|
||||||
## InternLM-7B
|
## InternLM-7B
|
||||||
|
|
||||||
### Performance Evaluation
|
### Performance Evaluation
|
||||||
|
@ -74,6 +78,7 @@ InternLM 7B and InternLM 7B Chat, trained using InternLM, have been open-sourced
|
||||||
| Model | InternLM Format Weight Download Link | Transformers Format Weight Download Link |
|
| Model | InternLM Format Weight Download Link | Transformers Format Weight Download Link |
|
||||||
| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
|
| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
|
||||||
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
|
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
|
||||||
|
| **InternLM Chat 7B v1.1** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1) | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1) |
|
||||||
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) |
|
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) |
|
||||||
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) |
|
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) |
|
||||||
|
|
||||||
|
@ -85,8 +90,8 @@ To load the InternLM 7B Chat model using Transformers, use the following code:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
|
||||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
|
||||||
>>> model = model.eval()
|
>>> model = model.eval()
|
||||||
>>> response, history = model.chat(tokenizer, "hello", history=[])
|
>>> response, history = model.chat(tokenizer, "hello", history=[])
|
||||||
>>> print(response)
|
>>> print(response)
|
||||||
|
@ -118,28 +123,45 @@ The effect is as follows
|
||||||
|
|
||||||
### Deployment
|
### Deployment
|
||||||
|
|
||||||
We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the one-click deployment of InternLM.
|
We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the workflow of InternLM deployment.
|
||||||
|
|
||||||
1. First, install LMDeploy:
|
```bash
|
||||||
|
python3 -m pip install lmdeploy
|
||||||
|
```
|
||||||
|
|
||||||
```bash
|
You can utilize the following commands to conduct `internlm-chat-7b` FP16 inference, serve it and interact with AI assistant via WebUI:
|
||||||
python3 -m pip install lmdeploy
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Use the following command for quick deployment:
|
```bash
|
||||||
|
# convert weight layout
|
||||||
|
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
|
||||||
|
|
||||||
```bash
|
# inference lmdeploy's turbomind engine
|
||||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-chat-7b/model
|
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||||
```
|
|
||||||
|
|
||||||
3. After exporting the model, you can start a server and have a conversation with the deployed model using the following command:
|
# serving with gradio
|
||||||
|
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||||
```bash
|
```
|
||||||
bash workspace/service_docker_up.sh
|
|
||||||
python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
|
|
||||||
```
|
|
||||||
|
|
||||||
[LMDeploy](https://github.com/InternLM/LMDeploy) provides a complete workflow for deploying InternLM. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
|
You can also deploy 4-bit quantized `internlm-chat-7b` model via LMDeploy. It greatly trims down the model's memory overhead to 6G, just 40% of what FP16 inference would take. More importantly, with extreme optimized kernel, the inference performance achieves 2.4x faster than FP16 inference on A100-80G.
|
||||||
|
|
||||||
|
Try the followings to enjoy 4-bit `internlm-chat-7b` on a Geforce RTX 30x GPU card. You can find the inference benchmark from [here](https://github.com/InternLM/lmdeploy/blob/main/docs/en/w4a16.md#inference-performance).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# download prequnantized internlm-chat-7b model from huggingface
|
||||||
|
git-lfs install
|
||||||
|
git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
|
||||||
|
|
||||||
|
# Convert the model's layout and store it in the default path, ./workspace.
|
||||||
|
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
|
||||||
|
|
||||||
|
# inference lmdeploy's turbomind engine
|
||||||
|
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||||
|
|
||||||
|
# serving with gradio
|
||||||
|
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||||
|
```
|
||||||
|
|
||||||
|
LMDeploy is an efficient toolkit for compressing, deploying, and serving LLM models. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
|
||||||
|
|
||||||
## Fine-tuning & Training
|
## Fine-tuning & Training
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
def merge_dicts(dict_a: dict, dict_b: dict):
|
||||||
|
for key in dict_b.keys():
|
||||||
|
if isinstance(dict_b[key], dict):
|
||||||
|
dict_b[key] = {**dict_a[key], **dict_b[key]}
|
||||||
|
merge_dicts(dict_a[key], dict_b[key])
|
||||||
|
dict_c = {**dict_a, **dict_b}
|
||||||
|
return dict_c
|
||||||
|
|
||||||
|
|
||||||
|
def format_dict_to_py_string(data: dict, indent=0, is_nested=False):
|
||||||
|
result = ""
|
||||||
|
for key, value in data.items():
|
||||||
|
if isinstance(value, dict):
|
||||||
|
result += f"{' ' * indent}{key} = dict(\n"
|
||||||
|
result += format_dict_to_py_string(value, indent + 4, is_nested=True)
|
||||||
|
result += f"{' ' * indent})"
|
||||||
|
else:
|
||||||
|
result += f"{' ' * indent}{key} = {repr(value)}"
|
||||||
|
if is_nested:
|
||||||
|
result += ","
|
||||||
|
result += "\n"
|
||||||
|
result = f"""\
|
||||||
|
{result}
|
||||||
|
"""
|
||||||
|
return result
|
|
@ -16,7 +16,7 @@ exit_code=0
|
||||||
|
|
||||||
source ./ci_scripts/common/basic_func.sh
|
source ./ci_scripts/common/basic_func.sh
|
||||||
|
|
||||||
echo "start to test alpaca_tokenizer.py."
|
echo "start to test alpaca_tokenizer.py."
|
||||||
|
|
||||||
if [[ -d ${RESULTS} ]]; then
|
if [[ -d ${RESULTS} ]]; then
|
||||||
if ! rm -rf ${RESULTS}/*; then
|
if ! rm -rf ${RESULTS}/*; then
|
||||||
|
|
|
@ -12,7 +12,7 @@ exit_code=0
|
||||||
|
|
||||||
source ./ci_scripts/common/basic_func.sh
|
source ./ci_scripts/common/basic_func.sh
|
||||||
|
|
||||||
echo "start to test tokenizer.py."
|
echo "start to test tokenizer.py."
|
||||||
|
|
||||||
num=$(num_files "${RESULTS}")
|
num=$(num_files "${RESULTS}")
|
||||||
if [[ ${num} -gt 0 ]]; then
|
if [[ ${num} -gt 0 ]]; then
|
||||||
|
|
|
@ -40,7 +40,7 @@ num=$(num_files "${CKPTS_OUTPUT}")
|
||||||
|
|
||||||
if [[ ${num} -ne ${expected_num} ]]; then
|
if [[ ${num} -ne ${expected_num} ]]; then
|
||||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||||
exit_code=$(($exit_code + 1))
|
exit_code=$(($exit_code + 1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# NOTICE: should not remove the cached files, because the cached files will be used in the next test case.
|
# NOTICE: should not remove the cached files, because the cached files will be used in the next test case.
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
||||||
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
||||||
|
|
|
@ -10,12 +10,11 @@ VOCAB_SIZE = 103168
|
||||||
# Ckpt folder format:
|
# Ckpt folder format:
|
||||||
# fs: 'local:/mnt/nfs/XXX'
|
# fs: 'local:/mnt/nfs/XXX'
|
||||||
# oss: 'boto3:s3://model_weights/XXX'
|
# oss: 'boto3:s3://model_weights/XXX'
|
||||||
MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
|
# MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
|
||||||
# SAVE_CKPT_FOLDER = "local:llm_ckpts"
|
# SAVE_CKPT_FOLDER = "local:llm_ckpts"
|
||||||
SAVE_CKPT_FOLDER = "local:llm_ckpts"
|
SAVE_CKPT_FOLDER = "local:llm_ckpts"
|
||||||
# LOAD_CKPT_FOLDER = "local:llm_ckpts/49"
|
# LOAD_CKPT_FOLDER = "local:llm_ckpts/49"
|
||||||
ckpt = dict(
|
ckpt = dict(
|
||||||
enable_save_ckpt=True,
|
|
||||||
# Path to save training ckpt.
|
# Path to save training ckpt.
|
||||||
save_ckpt_folder=SAVE_CKPT_FOLDER,
|
save_ckpt_folder=SAVE_CKPT_FOLDER,
|
||||||
# Path to continue training ckpt (load model weights and scheduler/context states).
|
# Path to continue training ckpt (load model weights and scheduler/context states).
|
||||||
|
@ -27,7 +26,7 @@ ckpt = dict(
|
||||||
load_optimizer=True,
|
load_optimizer=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
TRAIN_FOLDER = "/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/train/en"
|
TRAIN_FOLDER = "local:../lm_data/alpaca_data/train/en"
|
||||||
data = dict(
|
data = dict(
|
||||||
seq_len=SEQ_LEN,
|
seq_len=SEQ_LEN,
|
||||||
# micro_num means the number of micro_batch contained in one gradient update
|
# micro_num means the number of micro_batch contained in one gradient update
|
||||||
|
@ -120,8 +119,8 @@ zero1 parallel:
|
||||||
2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters.
|
2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters.
|
||||||
3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size.
|
3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size.
|
||||||
For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8.
|
For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8.
|
||||||
pipeline parallel: pipeline parallel size.
|
pipeline parallel: pipeline parallel size, only 1 is accepted currently.
|
||||||
tensor parallel: tensor parallel size, usually the number of GPUs per node.
|
tensor parallel: tensor parallel size, usually the number of GPUs per node, only 1 is accepted currently.
|
||||||
"""
|
"""
|
||||||
parallel = dict(
|
parallel = dict(
|
||||||
zero1=8,
|
zero1=8,
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
from ci_scripts.common import com_func
|
||||||
|
from internlm.core.context import Config
|
||||||
|
|
||||||
|
|
||||||
|
def generate_new_config(config_py_file, test_config_json, case_name):
|
||||||
|
# generate path of the new config py
|
||||||
|
config_path = os.path.split(config_py_file)
|
||||||
|
new_config_py_file = os.path.join(config_path[0], case_name + ".py")
|
||||||
|
|
||||||
|
# merge dict
|
||||||
|
origin_config = Config.from_file(config_py_file)
|
||||||
|
with open(test_config_json) as f:
|
||||||
|
test_config = json.load(f)
|
||||||
|
if test_config:
|
||||||
|
if case_name not in test_config.keys():
|
||||||
|
raise KeyError(f"the {case_name} doesn't exist.Please check {test_config} again!")
|
||||||
|
new_config = com_func.merge_dicts(origin_config, test_config[case_name])
|
||||||
|
print(f"new config is:\n{new_config}")
|
||||||
|
|
||||||
|
# write new config to py file
|
||||||
|
file_content = com_func.format_dict_to_py_string(new_config)
|
||||||
|
with open(new_config_py_file, "w") as f:
|
||||||
|
f.write(file_content)
|
||||||
|
print(f"The new test train config file is {new_config_py_file}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--origin_config",
|
||||||
|
type=str,
|
||||||
|
default="./ci_scripts/train/ci_7B_sft.py",
|
||||||
|
help="path to the origin train config file",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--test_config",
|
||||||
|
type=str,
|
||||||
|
default="./ci_scripts/train/test_config.json",
|
||||||
|
help="path to the test train config file",
|
||||||
|
)
|
||||||
|
parser.add_argument("--case_name", type=str, help="name of the case which will be runned ")
|
||||||
|
args = parser.parse_args()
|
||||||
|
generate_new_config(args.origin_config, args.test_config, args.case_name)
|
|
@ -0,0 +1,38 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -x
|
||||||
|
|
||||||
|
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
|
||||||
|
readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
|
||||||
|
readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
|
||||||
|
readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
|
||||||
|
expected_num=21
|
||||||
|
exit_code=0
|
||||||
|
|
||||||
|
source ./ci_scripts/common/basic_func.sh
|
||||||
|
|
||||||
|
echo "start to test slurm training with loading checkpoint."
|
||||||
|
|
||||||
|
python ./ci_scripts/train/generate_config.py --case_name $1
|
||||||
|
file="./ci_scripts/train/$1.py"
|
||||||
|
if [[ ! -f ${file} ]]; then
|
||||||
|
echo "expect: ${file} exists, actual: not exist."
|
||||||
|
exit_code=$(($exit_code + 1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
|
||||||
|
[[ $? -ne 0 ]] && { echo "test slurm training failed."; exit_code=$(($exit_code + 1)); }
|
||||||
|
|
||||||
|
|
||||||
|
num=$(num_files "${CKPTS40_OUTPUT}")
|
||||||
|
if [[ ${num} -ne ${expected_num} ]]; then
|
||||||
|
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||||
|
exit_code=$(($exit_code + 1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
# clean the test files.
|
||||||
|
if ! rm -rf ${CKPTS_PATH}/*; then
|
||||||
|
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
||||||
|
exit_code=$(($exit_code + 1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit $exit_code
|
|
@ -25,12 +25,6 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --
|
||||||
num=$(num_files "${CKPTS20_OUTPUT}")
|
num=$(num_files "${CKPTS20_OUTPUT}")
|
||||||
if [[ ${num} -ne ${expected_num} ]]; then
|
if [[ ${num} -ne ${expected_num} ]]; then
|
||||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||||
exit_code=$(($exit_code + 1))
|
|
||||||
fi
|
|
||||||
|
|
||||||
# clean the test files.
|
|
||||||
if ! rm -rf ${CKPTS_PATH}/*; then
|
|
||||||
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
|
||||||
exit_code=$(($exit_code + 1))
|
exit_code=$(($exit_code + 1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
{
|
||||||
|
"7B_basic_train": {
|
||||||
|
"SEQ_LEN": 1024,
|
||||||
|
"HIDDEN_SIZE": 2048,
|
||||||
|
"NUM_ATTENTION_HEAD": 16,
|
||||||
|
"NUM_LAYER": 16,
|
||||||
|
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
|
||||||
|
"ckpt": {
|
||||||
|
"checkpoint_every": 20
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"total_steps": 20
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"7B_load_new_ckpt": {
|
||||||
|
"SEQ_LEN": 1024,
|
||||||
|
"HIDDEN_SIZE": 2048,
|
||||||
|
"NUM_ATTENTION_HEAD": 16,
|
||||||
|
"NUM_LAYER": 16,
|
||||||
|
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
|
||||||
|
"LOAD_CKPT_FOLDER": "local:llm_ckpts/20",
|
||||||
|
"ckpt": {
|
||||||
|
"load_ckpt_folder": "local:llm_ckpts/20",
|
||||||
|
"checkpoint_every": 20
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"total_steps": 40
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"7B_load_preset_ckpt": {
|
||||||
|
"SEQ_LEN": 1024,
|
||||||
|
"HIDDEN_SIZE": 2048,
|
||||||
|
"NUM_ATTENTION_HEAD": 16,
|
||||||
|
"NUM_LAYER": 16,
|
||||||
|
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
|
||||||
|
"LOAD_CKPT_FOLDER": "local:../lm_data/alpaca_data/llm_ckpts/20",
|
||||||
|
"ckpt": {
|
||||||
|
"load_ckpt_folder": "local:../lm_data/alpaca_data/llm_ckpts/20",
|
||||||
|
"checkpoint_every": 20
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"total_steps": 40
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,7 +25,7 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --
|
||||||
num=$(num_files "${CKPTS_OUTPUT}")
|
num=$(num_files "${CKPTS_OUTPUT}")
|
||||||
if [[ ${num} -ne ${expected_num} ]]; then
|
if [[ ${num} -ne ${expected_num} ]]; then
|
||||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||||
exit_code=$(($exit_code + 1))
|
exit_code=$(($exit_code + 1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# clean the test files.
|
# clean the test files.
|
||||||
|
|
Loading…
Reference in New Issue