mirror of https://github.com/InternLM/InternLM
fix(conflicts): merge main to develop
commit
9eec3d9465
|
@ -39,7 +39,7 @@ jobs:
|
|||
needs: check-requirements
|
||||
runs-on: [lmtest]
|
||||
steps:
|
||||
- name: mask env
|
||||
- name: mask env
|
||||
run: |
|
||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||
- uses: actions/checkout@v3
|
||||
|
@ -60,15 +60,29 @@ jobs:
|
|||
runs-on: [lmtest]
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: mask env
|
||||
- name: mask env
|
||||
run: |
|
||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: slurm-train
|
||||
id: basic_train
|
||||
run: |
|
||||
source activate internlm-env-test
|
||||
sh ./ci_scripts/train/slurm_train.sh ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||
|
||||
- name: load_preset_ckpt
|
||||
if: ${{ failure() && steps.basic_train.conclusion == 'failure' }}
|
||||
run: |
|
||||
source activate internlm-env-test
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
sh ./ci_scripts/train/load_ckpt.sh 7B_load_preset_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||
|
||||
- name: load_new_ckpt
|
||||
run: |
|
||||
source activate internlm-env-test
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
sh ./ci_scripts/train/load_ckpt.sh 7B_load_new_ckpt ${GITHUB_RUN_ID}-${GITHUB_JOB}
|
||||
rm -rf $GITHUB_WORKSPACE/llm_ckpts
|
||||
|
||||
- name: torchrun-train
|
||||
|
@ -91,18 +105,17 @@ jobs:
|
|||
run: |
|
||||
source activate internlm-env-test
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
sh ./ci_scripts/model/convert_to_hf.sh
|
||||
sh ./ci_scripts/model/convert_to_hf.sh
|
||||
cd ./hf_ckpt
|
||||
srun -p ${SLURM_PARTITION} --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 python ../ci_scripts/model/loaded_as_transformer.py
|
||||
cd ..
|
||||
rm -rf $GITHUB_WORKSPACE/hf_ckpt
|
||||
|
||||
load-chat-model-in-hf:
|
||||
if: ${{ always() }}
|
||||
needs: check-requirements
|
||||
runs-on: [lmtest]
|
||||
steps:
|
||||
- name: mask env
|
||||
- name: mask env
|
||||
run: |
|
||||
echo "::add-mask::${{env.WORKSPACE_PREFIX}}"
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
name: lint-check
|
||||
|
||||
on:
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
branches:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
name: Sonarqube
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
|
||||
jobs:
|
||||
sonarqube:
|
||||
name: SonarQube Scan
|
||||
|
@ -13,4 +13,4 @@ jobs:
|
|||
- uses: sonarsource/sonarqube-scan-action@master
|
||||
env:
|
||||
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
|
||||
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
||||
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
|
||||
|
|
|
@ -40,6 +40,10 @@ InternLM は、70 億のパラメータを持つベースモデルと、実用
|
|||
|
||||
さらに、大規模な依存関係を必要とせずにモデルの事前学習をサポートする軽量な学習フレームワークが提供されます。単一のコードベースで、数千の GPU を持つ大規模クラスタでの事前学習と、単一の GPU での微調整をサポートし、顕著な性能最適化を達成します。InternLM は、1024GPU でのトレーニングにおいて 90% 近いアクセラレーション効率を達成しています。
|
||||
|
||||
## 新闻
|
||||
|
||||
InternLM-7B-Chat v1.1 は、コード インタプリタと関数呼び出し機能を備えてリリースされました。 [Lagent](https://github.com/InternLM/lagent) で試すことができます。
|
||||
|
||||
## InternLM-7B
|
||||
|
||||
### パフォーマンス評価
|
||||
|
@ -80,8 +84,8 @@ Transformers を使用して InternLM 7B チャットモデルをロードする
|
|||
|
||||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
|
||||
>>> model = model.eval()
|
||||
>>> response, history = model.chat(tokenizer, "こんにちは", history=[])
|
||||
>>> print(response)
|
||||
|
|
|
@ -45,6 +45,10 @@ InternLM ,即书生·浦语大模型,包含面向实用场景的70亿参数
|
|||
|
||||
提供了支持模型预训练的轻量级训练框架,无需安装大量依赖包,一套代码支持千卡预训练和单卡人类偏好对齐训练,同时实现了极致的性能优化,实现千卡训练下近90%加速效率。
|
||||
|
||||
## 新闻
|
||||
|
||||
我们开源了 InternLM-Chat-7B v1.1。该模型能够调用代码解释器和工具插件。你可以在 [Lagent](https://github.com/InternLM/lagent) 中体验这些新功能。
|
||||
|
||||
## InternLM-7B
|
||||
|
||||
### 性能评测
|
||||
|
@ -74,6 +78,7 @@ InternLM ,即书生·浦语大模型,包含面向实用场景的70亿参数
|
|||
| 模型 | InternLM 格式权重下载地址 | Transformers 格式权重下载地址 |
|
||||
| -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
|
||||
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
|
||||
| **InternLM Chat 7B v1.1** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1) | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1) |
|
||||
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)
|
||||
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k)
|
||||
|
||||
|
@ -85,8 +90,8 @@ InternLM ,即书生·浦语大模型,包含面向实用场景的70亿参数
|
|||
|
||||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
|
||||
>>> model = model.eval()
|
||||
>>> response, history = model.chat(tokenizer, "你好", history=[])
|
||||
>>> print(response)
|
||||
|
@ -117,26 +122,44 @@ streamlit run web_demo.py
|
|||
|
||||
我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。
|
||||
|
||||
1. 首先安装 LMDeploy:
|
||||
```bash
|
||||
python3 -m pip install lmdeploy
|
||||
```
|
||||
|
||||
```bash
|
||||
python3 -m pip install lmdeploy
|
||||
```
|
||||
执行以下命令,可以在终端与 `internlm-chat-7b` 模型进行交互式对话,或者通过 WebUI 与它聊天。
|
||||
|
||||
2. 快速的部署命令如下:
|
||||
```bash
|
||||
# 转换权重格式
|
||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
|
||||
|
||||
```bash
|
||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-7b/model
|
||||
```
|
||||
# 在终端进行交互式对话
|
||||
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||
|
||||
3. 在导出模型后,你可以直接通过如下命令启动服务,并在客户端与AI对话
|
||||
# 启动 gradio 服务
|
||||
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||
```
|
||||
以上过程中,LMDeploy 使用的是 FP16 的计算精度。
|
||||
|
||||
```bash
|
||||
bash workspace/service_docker_up.sh
|
||||
python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
|
||||
```
|
||||
除了 FP16 精度,LMDeploy 还支持 `internlm-chat-7b` 4bit 权重模型推理。它不仅把模型的显存减少到 6G,大约只有 FP16 的 40%,更重要的是,经过 kernel 层面的极致优化,其推理性能在 A100-80G 上可达到 FP16 的 2.4 倍以上。
|
||||
|
||||
以下是`internlm-chat-7b` 4bit 权重模型的部署方法。推理速度的 bechmark 请参考[这里](https://github.com/InternLM/lmdeploy/blob/main/docs/zh_cn/w4a16.md#%E6%8E%A8%E7%90%86%E9%80%9F%E5%BA%A6)
|
||||
|
||||
```bash
|
||||
# download prequnantized internlm-chat-7b model from huggingface
|
||||
git-lfs install
|
||||
git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
|
||||
|
||||
# Convert the model's layout and store it in the default path, ./workspace.
|
||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
|
||||
|
||||
# inference lmdeploy's turbomind engine
|
||||
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||
|
||||
# serving with gradio
|
||||
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||
```
|
||||
LMDeploy 是涵盖了 LLM 任务的全套轻量化、部署和服务的工具箱。请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
|
||||
|
||||
[LMDeploy](https://github.com/InternLM/LMDeploy) 支持了 InternLM 部署的完整流程,请参考 [部署教程](https://github.com/InternLM/LMDeploy) 了解 InternLM 的更多部署细节。
|
||||
|
||||
## 微调&训练
|
||||
|
||||
|
|
58
README.md
58
README.md
|
@ -45,6 +45,10 @@ InternLM has open-sourced a 7 billion parameter base model and a chat model tail
|
|||
|
||||
Additionally, a lightweight training framework is offered to support model pre-training without the need for extensive dependencies. With a single codebase, it supports pre-training on large-scale clusters with thousands of GPUs, and fine-tuning on a single GPU while achieving remarkable performance optimizations. InternLM achieves nearly 90% acceleration efficiency during training on 1024 GPUs.
|
||||
|
||||
## News
|
||||
|
||||
InternLM-7B-Chat v1.1 is released with code interpreter and function calling capability. You can try it with [Lagent](https://github.com/InternLM/lagent).
|
||||
|
||||
## InternLM-7B
|
||||
|
||||
### Performance Evaluation
|
||||
|
@ -74,6 +78,7 @@ InternLM 7B and InternLM 7B Chat, trained using InternLM, have been open-sourced
|
|||
| Model | InternLM Format Weight Download Link | Transformers Format Weight Download Link |
|
||||
| ----------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- |
|
||||
| **InternLM 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [🤗internlm/intern-7b](https://huggingface.co/internlm/internlm-7b) |
|
||||
| **InternLM Chat 7B v1.1** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-v1.1) | [🤗internlm/intern-chat-7b-v1.1](https://huggingface.co/internlm/internlm-chat-7b-v1.1) |
|
||||
| **InternLM Chat 7B** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [🤗internlm/intern-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) |
|
||||
| **InternLM Chat 7B 8k** | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-8k) | [🤗internlm/intern-chat-7b-8k](https://huggingface.co/internlm/internlm-chat-7b-8k) |
|
||||
|
||||
|
@ -85,8 +90,8 @@ To load the InternLM 7B Chat model using Transformers, use the following code:
|
|||
|
||||
```python
|
||||
>>> from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b-v1_1", trust_remote_code=True).cuda()
|
||||
>>> model = model.eval()
|
||||
>>> response, history = model.chat(tokenizer, "hello", history=[])
|
||||
>>> print(response)
|
||||
|
@ -118,28 +123,45 @@ The effect is as follows
|
|||
|
||||
### Deployment
|
||||
|
||||
We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the one-click deployment of InternLM.
|
||||
We use [LMDeploy](https://github.com/InternLM/LMDeploy) to complete the workflow of InternLM deployment.
|
||||
|
||||
1. First, install LMDeploy:
|
||||
```bash
|
||||
python3 -m pip install lmdeploy
|
||||
```
|
||||
|
||||
```bash
|
||||
python3 -m pip install lmdeploy
|
||||
```
|
||||
You can utilize the following commands to conduct `internlm-chat-7b` FP16 inference, serve it and interact with AI assistant via WebUI:
|
||||
|
||||
2. Use the following command for quick deployment:
|
||||
```bash
|
||||
# convert weight layout
|
||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b
|
||||
|
||||
```bash
|
||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b /path/to/internlm-chat-7b/model
|
||||
```
|
||||
# inference lmdeploy's turbomind engine
|
||||
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||
|
||||
3. After exporting the model, you can start a server and have a conversation with the deployed model using the following command:
|
||||
|
||||
```bash
|
||||
bash workspace/service_docker_up.sh
|
||||
python3 -m lmdeploy.serve.client {server_ip_addresss}:33337
|
||||
```
|
||||
# serving with gradio
|
||||
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||
```
|
||||
|
||||
[LMDeploy](https://github.com/InternLM/LMDeploy) provides a complete workflow for deploying InternLM. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
|
||||
You can also deploy 4-bit quantized `internlm-chat-7b` model via LMDeploy. It greatly trims down the model's memory overhead to 6G, just 40% of what FP16 inference would take. More importantly, with extreme optimized kernel, the inference performance achieves 2.4x faster than FP16 inference on A100-80G.
|
||||
|
||||
Try the followings to enjoy 4-bit `internlm-chat-7b` on a Geforce RTX 30x GPU card. You can find the inference benchmark from [here](https://github.com/InternLM/lmdeploy/blob/main/docs/en/w4a16.md#inference-performance).
|
||||
|
||||
```bash
|
||||
# download prequnantized internlm-chat-7b model from huggingface
|
||||
git-lfs install
|
||||
git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
|
||||
|
||||
# Convert the model's layout and store it in the default path, ./workspace.
|
||||
python3 -m lmdeploy.serve.turbomind.deploy internlm-chat-7b ./llama2-chat-7b-w4 awq --group-size 128
|
||||
|
||||
# inference lmdeploy's turbomind engine
|
||||
python3 -m lmdeploy.turbomind.chat ./workspace
|
||||
|
||||
# serving with gradio
|
||||
python3 -m lmdeploy.serve.gradio.app ./workspace
|
||||
```
|
||||
|
||||
LMDeploy is an efficient toolkit for compressing, deploying, and serving LLM models. Please refer to the [deployment tutorial](https://github.com/InternLM/LMDeploy) for more details on deploying InternLM.
|
||||
|
||||
## Fine-tuning & Training
|
||||
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
|
||||
def merge_dicts(dict_a: dict, dict_b: dict):
|
||||
for key in dict_b.keys():
|
||||
if isinstance(dict_b[key], dict):
|
||||
dict_b[key] = {**dict_a[key], **dict_b[key]}
|
||||
merge_dicts(dict_a[key], dict_b[key])
|
||||
dict_c = {**dict_a, **dict_b}
|
||||
return dict_c
|
||||
|
||||
|
||||
def format_dict_to_py_string(data: dict, indent=0, is_nested=False):
|
||||
result = ""
|
||||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
result += f"{' ' * indent}{key} = dict(\n"
|
||||
result += format_dict_to_py_string(value, indent + 4, is_nested=True)
|
||||
result += f"{' ' * indent})"
|
||||
else:
|
||||
result += f"{' ' * indent}{key} = {repr(value)}"
|
||||
if is_nested:
|
||||
result += ","
|
||||
result += "\n"
|
||||
result = f"""\
|
||||
{result}
|
||||
"""
|
||||
return result
|
|
@ -16,7 +16,7 @@ exit_code=0
|
|||
|
||||
source ./ci_scripts/common/basic_func.sh
|
||||
|
||||
echo "start to test alpaca_tokenizer.py."
|
||||
echo "start to test alpaca_tokenizer.py."
|
||||
|
||||
if [[ -d ${RESULTS} ]]; then
|
||||
if ! rm -rf ${RESULTS}/*; then
|
||||
|
|
|
@ -12,7 +12,7 @@ exit_code=0
|
|||
|
||||
source ./ci_scripts/common/basic_func.sh
|
||||
|
||||
echo "start to test tokenizer.py."
|
||||
echo "start to test tokenizer.py."
|
||||
|
||||
num=$(num_files "${RESULTS}")
|
||||
if [[ ${num} -gt 0 ]]; then
|
||||
|
|
|
@ -40,7 +40,7 @@ num=$(num_files "${CKPTS_OUTPUT}")
|
|||
|
||||
if [[ ${num} -ne ${expected_num} ]]; then
|
||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||
exit_code=$(($exit_code + 1))
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
# NOTICE: should not remove the cached files, because the cached files will be used in the next test case.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).cuda()
|
||||
|
|
|
@ -10,12 +10,11 @@ VOCAB_SIZE = 103168
|
|||
# Ckpt folder format:
|
||||
# fs: 'local:/mnt/nfs/XXX'
|
||||
# oss: 'boto3:s3://model_weights/XXX'
|
||||
MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
|
||||
# MODEL_ONLY_FOLDER = "local:llm_ckpts/xxxx"
|
||||
# SAVE_CKPT_FOLDER = "local:llm_ckpts"
|
||||
SAVE_CKPT_FOLDER = "local:llm_ckpts"
|
||||
# LOAD_CKPT_FOLDER = "local:llm_ckpts/49"
|
||||
ckpt = dict(
|
||||
enable_save_ckpt=True,
|
||||
# Path to save training ckpt.
|
||||
save_ckpt_folder=SAVE_CKPT_FOLDER,
|
||||
# Path to continue training ckpt (load model weights and scheduler/context states).
|
||||
|
@ -27,7 +26,7 @@ ckpt = dict(
|
|||
load_optimizer=True,
|
||||
)
|
||||
|
||||
TRAIN_FOLDER = "/mnt/petrelfs/qa-caif-cicd/data/lm_data/alpaca_data/train/en"
|
||||
TRAIN_FOLDER = "local:../lm_data/alpaca_data/train/en"
|
||||
data = dict(
|
||||
seq_len=SEQ_LEN,
|
||||
# micro_num means the number of micro_batch contained in one gradient update
|
||||
|
@ -120,8 +119,8 @@ zero1 parallel:
|
|||
2. if zero1 == 1, zero is not used, and all dp groups retain the full amount of model parameters.
|
||||
3. zero1 > 1 and zero1 <= dp world size, the world size of zero is a subset of dp world size.
|
||||
For smaller models, it is usually a better choice to split the parameters within nodes with a setting <= 8.
|
||||
pipeline parallel: pipeline parallel size.
|
||||
tensor parallel: tensor parallel size, usually the number of GPUs per node.
|
||||
pipeline parallel: pipeline parallel size, only 1 is accepted currently.
|
||||
tensor parallel: tensor parallel size, usually the number of GPUs per node, only 1 is accepted currently.
|
||||
"""
|
||||
parallel = dict(
|
||||
zero1=8,
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
from ci_scripts.common import com_func
|
||||
from internlm.core.context import Config
|
||||
|
||||
|
||||
def generate_new_config(config_py_file, test_config_json, case_name):
|
||||
# generate path of the new config py
|
||||
config_path = os.path.split(config_py_file)
|
||||
new_config_py_file = os.path.join(config_path[0], case_name + ".py")
|
||||
|
||||
# merge dict
|
||||
origin_config = Config.from_file(config_py_file)
|
||||
with open(test_config_json) as f:
|
||||
test_config = json.load(f)
|
||||
if test_config:
|
||||
if case_name not in test_config.keys():
|
||||
raise KeyError(f"the {case_name} doesn't exist.Please check {test_config} again!")
|
||||
new_config = com_func.merge_dicts(origin_config, test_config[case_name])
|
||||
print(f"new config is:\n{new_config}")
|
||||
|
||||
# write new config to py file
|
||||
file_content = com_func.format_dict_to_py_string(new_config)
|
||||
with open(new_config_py_file, "w") as f:
|
||||
f.write(file_content)
|
||||
print(f"The new test train config file is {new_config_py_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--origin_config",
|
||||
type=str,
|
||||
default="./ci_scripts/train/ci_7B_sft.py",
|
||||
help="path to the origin train config file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test_config",
|
||||
type=str,
|
||||
default="./ci_scripts/train/test_config.json",
|
||||
help="path to the test train config file",
|
||||
)
|
||||
parser.add_argument("--case_name", type=str, help="name of the case which will be runned ")
|
||||
args = parser.parse_args()
|
||||
generate_new_config(args.origin_config, args.test_config, args.case_name)
|
|
@ -0,0 +1,38 @@
|
|||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
[[ -n ${GITHUB_WORKSPACE} ]] || { echo "should set GITHUB_WORKSPACE first before ci, exit."; exit 1; }
|
||||
readonly CKPTS_PATH="$GITHUB_WORKSPACE/llm_ckpts"
|
||||
readonly CKPTS40_PATH="$GITHUB_WORKSPACE/llm_ckpts/40"
|
||||
readonly CKPTS40_OUTPUT="${CKPTS40_PATH}/*.pt"
|
||||
expected_num=21
|
||||
exit_code=0
|
||||
|
||||
source ./ci_scripts/common/basic_func.sh
|
||||
|
||||
echo "start to test slurm training with loading checkpoint."
|
||||
|
||||
python ./ci_scripts/train/generate_config.py --case_name $1
|
||||
file="./ci_scripts/train/$1.py"
|
||||
if [[ ! -f ${file} ]]; then
|
||||
echo "expect: ${file} exists, actual: not exist."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
srun -p ${SLURM_PARTITION} --exclusive --job-name=$2 -n 8 --ntasks-per-node=8 --gpus-per-task=1 python train.py --config ${file}
|
||||
[[ $? -ne 0 ]] && { echo "test slurm training failed."; exit_code=$(($exit_code + 1)); }
|
||||
|
||||
|
||||
num=$(num_files "${CKPTS40_OUTPUT}")
|
||||
if [[ ${num} -ne ${expected_num} ]]; then
|
||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
# clean the test files.
|
||||
if ! rm -rf ${CKPTS_PATH}/*; then
|
||||
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
exit $exit_code
|
|
@ -25,12 +25,6 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -n 8 --ntasks-per-node=8 --
|
|||
num=$(num_files "${CKPTS20_OUTPUT}")
|
||||
if [[ ${num} -ne ${expected_num} ]]; then
|
||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
# clean the test files.
|
||||
if ! rm -rf ${CKPTS_PATH}/*; then
|
||||
echo "cleaning cached file in ${CKPTS_PATH} failed."
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
{
|
||||
"7B_basic_train": {
|
||||
"SEQ_LEN": 1024,
|
||||
"HIDDEN_SIZE": 2048,
|
||||
"NUM_ATTENTION_HEAD": 16,
|
||||
"NUM_LAYER": 16,
|
||||
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
|
||||
"ckpt": {
|
||||
"checkpoint_every": 20
|
||||
},
|
||||
"data": {
|
||||
"total_steps": 20
|
||||
}
|
||||
},
|
||||
"7B_load_new_ckpt": {
|
||||
"SEQ_LEN": 1024,
|
||||
"HIDDEN_SIZE": 2048,
|
||||
"NUM_ATTENTION_HEAD": 16,
|
||||
"NUM_LAYER": 16,
|
||||
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
|
||||
"LOAD_CKPT_FOLDER": "local:llm_ckpts/20",
|
||||
"ckpt": {
|
||||
"load_ckpt_folder": "local:llm_ckpts/20",
|
||||
"checkpoint_every": 20
|
||||
},
|
||||
"data": {
|
||||
"total_steps": 40
|
||||
}
|
||||
},
|
||||
"7B_load_preset_ckpt": {
|
||||
"SEQ_LEN": 1024,
|
||||
"HIDDEN_SIZE": 2048,
|
||||
"NUM_ATTENTION_HEAD": 16,
|
||||
"NUM_LAYER": 16,
|
||||
"TRAIN_FOLDER":"local:../lm_data/alpaca_data/train/en",
|
||||
"LOAD_CKPT_FOLDER": "local:../lm_data/alpaca_data/llm_ckpts/20",
|
||||
"ckpt": {
|
||||
"load_ckpt_folder": "local:../lm_data/alpaca_data/llm_ckpts/20",
|
||||
"checkpoint_every": 20
|
||||
},
|
||||
"data": {
|
||||
"total_steps": 40
|
||||
}
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ srun -p ${SLURM_PARTITION} --exclusive --job-name=$1 -N 1 torchrun --nnodes=1 --
|
|||
num=$(num_files "${CKPTS_OUTPUT}")
|
||||
if [[ ${num} -ne ${expected_num} ]]; then
|
||||
echo "expect: ${expected_num} files, actual: ${num} files."
|
||||
exit_code=$(($exit_code + 1))
|
||||
exit_code=$(($exit_code + 1))
|
||||
fi
|
||||
|
||||
# clean the test files.
|
||||
|
|
Loading…
Reference in New Issue