[CI]: fix and pass pre-commit hook (#666)

pull/667/head^2
Yuzhe Gu 2024-01-26 17:26:04 +08:00 committed by GitHub
parent 1cb9870cb3
commit 78bcb07f0e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 637 additions and 546 deletions

View File

@ -24,15 +24,3 @@ jobs:
run: | run: |
pip install isort==5.12.0 pip install isort==5.12.0
isort --check --profile=black . isort --check --profile=black .
- name: lint-black
run: |
pip install black==22.8.0
BLACK_EXCLUDE_SETTINGS='\.venv/|\.local/|\.cache/|\.git/'
black --line-length=120 --check --exclude $BLACK_EXCLUDE_SETTINGS ./chat/web_demo.py
- name: lint-pylint
run: |
pip install pylint==v2.17.2
PYLINT_DISABLE_LIST="C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203"
pylint --rcfile .pylintrc --disable=$PYLINT_DISABLE_LIST ./chat/web_demo.py

View File

@ -1,53 +1,44 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos: repos:
- repo: https://github.com/psf/black - repo: https://github.com/PyCQA/flake8
rev: '22.8.0' rev: 5.0.4
hooks: hooks:
- id: black - id: flake8
args: - repo: https://github.com/PyCQA/isort
- --line-length=120 rev: 5.11.5
- repo: https://github.com/pycqa/isort
rev: '5.12.0'
hooks: hooks:
- id: isort - id: isort
name: isort - repo: https://github.com/pre-commit/mirrors-yapf
files: "\\.(py)$" rev: v0.32.0
args:
- --profile=black
- repo: https://github.com/PyCQA/flake8
rev: '3.8.4'
hooks: hooks:
- id: flake8 - id: yapf
args: - repo: https://github.com/codespell-project/codespell
- --ignore=F403,F405,W504,W503,E203 rev: v2.2.1
- --max-line-length=120
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.9.0
hooks: hooks:
- id: python-check-blanket-noqa - id: codespell
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0 rev: v4.3.0
hooks: hooks:
- id: trailing-whitespace - id: trailing-whitespace
- id: end-of-file-fixer - id: check-yaml
- id: check-added-large-files - id: end-of-file-fixer
args: ['--maxkb=100',--enforce-all] - id: requirements-txt-fixer
- id: check-json - id: double-quote-string-fixer
- id: check-docstring-first - id: check-merge-conflict
- id: check-yaml - id: fix-encoding-pragma
- id: debug-statements args: ["--remove"]
- id: mixed-line-ending - id: mixed-line-ending
- repo: https://github.com/PyCQA/pylint/ args: ["--fix=lf"]
rev: v2.17.2 - repo: https://github.com/executablebooks/mdformat
rev: 0.7.9
hooks: hooks:
- id: pylint - id: mdformat
name: pylint args: ["--number", "--table-width", "200"]
entry: pylint additional_dependencies:
language: system - mdformat-openmmlab
types: [python] - mdformat_frontmatter
args: - linkify-it-py
[ - repo: https://github.com/myint/docformatter
'--rcfile=.pylintrc', rev: v1.3.1
'--disable=C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203' hooks:
] - id: docformatter
args: ["--in-place", "--wrap-descriptions", "79"]

View File

@ -16,7 +16,9 @@
[![license](./assets/license.svg)](./LICENSE) [![license](./assets/license.svg)](./LICENSE)
[![evaluation](./assets/compass_support.svg)](https://github.com/internLM/OpenCompass/) [![evaluation](./assets/compass_support.svg)](https://github.com/internLM/OpenCompass/)
<!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) --> <!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) -->
[📘Commercial Application](#license) | [📘Commercial Application](#license) |
[🤗HuggingFace](https://huggingface.co/internlm) | [🤗HuggingFace](https://huggingface.co/internlm) |
[🆕Update News](#news) | [🆕Update News](#news) |
@ -45,26 +47,26 @@ InternLM2 series are released with the following features:
## News ## News
[2024.01.23] We release InternLM2-Math-7B and InternLM2-Math-20B with pretraining and SFT checkpoints. They surpass ChatGPT with small sizes. See [InternLM-Math](https://github.com/InternLM/internlm-math) for details and download. \[2024.01.23\] We release InternLM2-Math-7B and InternLM2-Math-20B with pretraining and SFT checkpoints. They surpass ChatGPT with small sizes. See [InternLM-Math](https://github.com/InternLM/internlm-math) for details and download.
[2024.01.17] We release InternLM2-7B and InternLM2-20B and their corresponding chat models with stronger capabilities in all dimensions. See [model zoo below](#model-zoo) for download or [model cards](./model_cards/) for more details. \[2024.01.17\] We release InternLM2-7B and InternLM2-20B and their corresponding chat models with stronger capabilities in all dimensions. See [model zoo below](#model-zoo) for download or [model cards](./model_cards/) for more details.
[2023.12.13] InternLM-7B-Chat and InternLM-20B-Chat checkpoints are updated. With an improved finetuning strategy, the new chat models can generate higher quality responses with greater stylistic diversity. \[2023.12.13\] InternLM-7B-Chat and InternLM-20B-Chat checkpoints are updated. With an improved finetuning strategy, the new chat models can generate higher quality responses with greater stylistic diversity.
[2023.09.20] InternLM-20B is released with base and chat versions. \[2023.09.20\] InternLM-20B is released with base and chat versions.
## Model Zoo ## Model Zoo
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date | | Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 | | **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 | | **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 | | **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 | | **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 | | **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 | | **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 | | **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 | | **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
**Notes:** **Notes:**
@ -85,22 +87,22 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
### Objective Evaluation ### Objective Evaluation
| Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B | | Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
|-----------------------|-------------------|--------------------------|--------------|-------------------|-------------|---------------------|--------------------------------|---------------|---------------------| | ---------------- | ----------------- | ------------------------ | ------------ | ----------------- | ----------- | ------------------ | -------------------------- | ------------- | ------------------ |
| MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 | | MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 |
| CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 | | CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 |
| AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 | | AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 |
| C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 | | C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 |
| TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 | | TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 |
| NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 | | NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 |
| C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 | | C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 |
| CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 | | CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 |
| WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 | | WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 |
| BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 | | BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 |
| GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 | | GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 |
| Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 | | Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 |
| HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 | | HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 |
| MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 | | MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 |
- Performance of MBPP is reported with MBPP(Sanitized) - Performance of MBPP is reported with MBPP(Sanitized)
@ -108,16 +110,16 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
- We have evaluated our model on [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) and InternLM2-Chat-20B surpass Claude 2, GPT-4(0613) and Gemini Pro. - We have evaluated our model on [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) and InternLM2-Chat-20B surpass Claude 2, GPT-4(0613) and Gemini Pro.
| Model Name | Win Rate | Length | | Model Name | Win Rate | Length |
| ----------------------- | -------- | ------ | | ------------------ | -------- | ------ |
| GPT-4 Turbo | 50.00% | 2049 | | GPT-4 Turbo | 50.00% | 2049 |
| GPT-4 | 23.58% | 1365 | | GPT-4 | 23.58% | 1365 |
| GPT-4 0314 | 22.07% | 1371 | | GPT-4 0314 | 22.07% | 1371 |
| Mistral Medium | 21.86% | 1500 | | Mistral Medium | 21.86% | 1500 |
| XwinLM 70b V0.1 | 21.81% | 1775 | | XwinLM 70b V0.1 | 21.81% | 1775 |
| InternLM2 Chat 20B | 21.75% | 2373 | | InternLM2 Chat 20B | 21.75% | 2373 |
| Mixtral 8x7B v0.1 | 18.26% | 1465 | | Mixtral 8x7B v0.1 | 18.26% | 1465 |
| Claude 2 | 17.19% | 1069 | | Claude 2 | 17.19% | 1069 |
| Gemini Pro | 16.85% | 1315 | | Gemini Pro | 16.85% | 1315 |
| GPT-4 0613 | 15.76% | 1140 | | GPT-4 0613 | 15.76% | 1140 |
| Claude 2.1 | 15.73% | 1096 | | Claude 2.1 | 15.73% | 1096 |
@ -129,9 +131,11 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
We briefly show the usages with [Transformers](#import-from-transformers), [ModelScope](#import-from-modelscope), and [Web demos](#dialogue). We briefly show the usages with [Transformers](#import-from-transformers), [ModelScope](#import-from-modelscope), and [Web demos](#dialogue).
The chat models adopt [chatml format](./chat/chat_format.md) to support both chat and agent applications. The chat models adopt [chatml format](./chat/chat_format.md) to support both chat and agent applications.
To ensure a better usage effect, please make sure that the installed transformers library version meets the following requirements before performing inference with [Transformers](#import-from-transformers) or [ModelScope](#import-from-modelscope): To ensure a better usage effect, please make sure that the installed transformers library version meets the following requirements before performing inference with [Transformers](#import-from-transformers) or [ModelScope](#import-from-modelscope):
``` ```
transformers >= 4.34 transformers >= 4.34
``` ```
### Import from Transformers ### Import from Transformers
To load the InternLM2-7B-Chat model using Transformers, use the following code: To load the InternLM2-7B-Chat model using Transformers, use the following code:

View File

@ -16,6 +16,7 @@
[![license](./assets//license.svg)](https://github.com/open-mmlab/mmdetection/blob/main/LICENSE) [![license](./assets//license.svg)](https://github.com/open-mmlab/mmdetection/blob/main/LICENSE)
[![evaluation](./assets//compass_support.svg)](https://github.com/internLM/OpenCompass/) [![evaluation](./assets//compass_support.svg)](https://github.com/internLM/OpenCompass/)
<!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) --> <!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) -->
[📘商业授权](#开源许可证) | [📘商业授权](#开源许可证) |
@ -43,26 +44,26 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
## 更新 ## 更新
[2024.01.23] 我们发布了 InternLM2-Math-7B 和 InternLM2-Math-20B 以及相关的对话模型。InternLM-Math以较小的尺寸超过了ChatGPT的表现。可以点击[InternLM-Math](https://github.com/InternLM/internlm-math)进行下载,并了解详情。 \[2024.01.23\] 我们发布了 InternLM2-Math-7B 和 InternLM2-Math-20B 以及相关的对话模型。InternLM-Math以较小的尺寸超过了ChatGPT的表现。可以点击[InternLM-Math](https://github.com/InternLM/internlm-math)进行下载,并了解详情。
[2024.01.17] 我们发布了 InternLM2-7B 和 InternLM2-20B 以及相关的对话模型InternLM2 在数理、代码、对话、创作等各方面能力都获得了长足进步,综合性能达到开源模型的领先水平。可以点击[下面的模型库](#model-zoo)进行下载或者[查看模型文档](./model_cards/)来了解更多细节. \[2024.01.17\] 我们发布了 InternLM2-7B 和 InternLM2-20B 以及相关的对话模型InternLM2 在数理、代码、对话、创作等各方面能力都获得了长足进步,综合性能达到开源模型的领先水平。可以点击[下面的模型库](#model-zoo)进行下载或者[查看模型文档](./model_cards/)来了解更多细节.
[2023.12.13] 我们更新了 InternLM-7B-Chat 和 InternLM-20B-Chat 模型权重。通过改进微调数据和训练策略,新版对话模型生成的回复质量更高、语言风格更加多元。 \[2023.12.13\] 我们更新了 InternLM-7B-Chat 和 InternLM-20B-Chat 模型权重。通过改进微调数据和训练策略,新版对话模型生成的回复质量更高、语言风格更加多元。
[2023.09.20] InternLM-20B 已发布,包括基础版和对话版。 \[2023.09.20\] InternLM-20B 已发布,包括基础版和对话版。
## Model Zoo ## Model Zoo
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date | | Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 | | **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 | | **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 | | **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 | | **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 | | **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 | | **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 | | **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 | | **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
**模型说明:** **模型说明:**
@ -83,22 +84,22 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
### 客观评测 ### 客观评测
| Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B | | Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
|-----------------------|-------------------|--------------------------|--------------|-------------------|-------------|---------------------|--------------------------------|---------------|---------------------| | ---------------- | ----------------- | ------------------------ | ------------ | ----------------- | ----------- | ------------------ | -------------------------- | ------------- | ------------------ |
| MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 | | MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 |
| CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 | | CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 |
| AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 | | AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 |
| C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 | | C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 |
| TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 | | TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 |
| NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 | | NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 |
| C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 | | C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 |
| CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 | | CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 |
| WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 | | WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 |
| BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 | | BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 |
| GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 | | GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 |
| Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 | | Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 |
| HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 | | HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 |
| MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 | | MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 |
- MBPP性能使用的是MBPP(Sanitized)版本数据集 - MBPP性能使用的是MBPP(Sanitized)版本数据集
@ -106,16 +107,16 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
- 我们评测了InternLM2-Chat在[AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) 上的性能结果表明InternLM2-Chat在AlpacaEval上已经超过了 Claude 2, GPT-4(0613) 和 Gemini Pro. - 我们评测了InternLM2-Chat在[AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) 上的性能结果表明InternLM2-Chat在AlpacaEval上已经超过了 Claude 2, GPT-4(0613) 和 Gemini Pro.
| Model Name | Win Rate | Length | | Model Name | Win Rate | Length |
| ----------------------- | -------- | ------ | | ------------------ | -------- | ------ |
| GPT-4 Turbo | 50.00% | 2049 | | GPT-4 Turbo | 50.00% | 2049 |
| GPT-4 | 23.58% | 1365 | | GPT-4 | 23.58% | 1365 |
| GPT-4 0314 | 22.07% | 1371 | | GPT-4 0314 | 22.07% | 1371 |
| Mistral Medium | 21.86% | 1500 | | Mistral Medium | 21.86% | 1500 |
| XwinLM 70b V0.1 | 21.81% | 1775 | | XwinLM 70b V0.1 | 21.81% | 1775 |
| InternLM2 Chat 20B | 21.75% | 2373 | | InternLM2 Chat 20B | 21.75% | 2373 |
| Mixtral 8x7B v0.1 | 18.26% | 1465 | | Mixtral 8x7B v0.1 | 18.26% | 1465 |
| Claude 2 | 17.19% | 1069 | | Claude 2 | 17.19% | 1069 |
| Gemini Pro | 16.85% | 1315 | | Gemini Pro | 16.85% | 1315 |
| GPT-4 0613 | 15.76% | 1140 | | GPT-4 0613 | 15.76% | 1140 |
| Claude 2.1 | 15.73% | 1096 | | Claude 2.1 | 15.73% | 1096 |
@ -127,9 +128,11 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
接下来我们展示使用 [Transformers](#import-from-transformers)[ModelScope](#import-from-modelscope) 和 [Web demo](#dialogue) 进行推理。 接下来我们展示使用 [Transformers](#import-from-transformers)[ModelScope](#import-from-modelscope) 和 [Web demo](#dialogue) 进行推理。
对话模型采用了 [chatml 格式](./chat/chat_format.md) 来支持通用对话和智能体应用。 对话模型采用了 [chatml 格式](./chat/chat_format.md) 来支持通用对话和智能体应用。
为了保障更好的使用效果,在用 [Transformers](#import-from-transformers) 或 [ModelScope](#import-from-modelscope) 进行推理前,请确保安装的 transformers 库版本满足以下要求: 为了保障更好的使用效果,在用 [Transformers](#import-from-transformers) 或 [ModelScope](#import-from-modelscope) 进行推理前,请确保安装的 transformers 库版本满足以下要求:
``` ```
transformers >= 4.34 transformers >= 4.34
``` ```
### 通过 Transformers 加载 ### 通过 Transformers 加载
通过以下的代码从 Transformers 加载 InternLM2-7B-Chat 模型 (可修改模型名称替换不同的模型) 通过以下的代码从 Transformers 加载 InternLM2-7B-Chat 模型 (可修改模型名称替换不同的模型)

View File

@ -4,18 +4,18 @@ English | [简体中文](README_zh-CN.md)
## Introduction ## Introduction
InternLM-Chat-7B v1.1 has been released as the first open-source model with code interpreter capabilities, supportting external tools such as Python code interpreter and search engine. InternLM-Chat-7B v1.1 has been released as the first open-source model with code interpreter capabilities, supporting external tools such as Python code interpreter and search engine.
InternLM2-Chat, open sourced on January 17, 2024, further enhances its capabilities in code interpreter and general tool utilization. With improved and more generalized instruction understanding, tool selection, and reflection abilities, InternLM2-Chat can more reliably support complex agents and multi-step tool calling for more intricate tasks. InternLM2-Chat exhibits decent computational and reasoning abilities even without external tools, surpassing ChatGPT in mathematical performance. When combined with a code interpreter, InternLM2-Chat-20B obtains comparable results to GPT-4 on GSM8K and MATH. Leveraging strong foundational capabilities in mathematics and tools, InternLM2-Chat provides practical data analysis capabilities. InternLM2-Chat, open sourced on January 17, 2024, further enhances its capabilities in code interpreter and general tool utilization. With improved and more generalized instruction understanding, tool selection, and reflection abilities, InternLM2-Chat can more reliably support complex agents and multi-step tool calling for more intricate tasks. InternLM2-Chat exhibits decent computational and reasoning abilities even without external tools, surpassing ChatGPT in mathematical performance. When combined with a code interpreter, InternLM2-Chat-20B obtains comparable results to GPT-4 on GSM8K and MATH. Leveraging strong foundational capabilities in mathematics and tools, InternLM2-Chat provides practical data analysis capabilities.
The results of InternLM2-Chat-20B on math code interpreter is as below: The results of InternLM2-Chat-20B on math code interpreter is as below:
| | GSM8K | MATH | | | GSM8K | MATH |
| :---: | :---: | :--: | | :--------------------------------------: | :---: | :--: |
| InternLM2-Chat-20B | 79.6 | 32.5 | | InternLM2-Chat-20B | 79.6 | 32.5 |
| InternLM2-Chat-20B with Code Interpreter | 84.5 | 51.2 | | InternLM2-Chat-20B with Code Interpreter | 84.5 | 51.2 |
| ChatGPT (GPT-3.5) | 78.2 | 28.0 | | ChatGPT (GPT-3.5) | 78.2 | 28.0 |
| GPT-4 | 91.4 | 45.8 | | GPT-4 | 91.4 | 45.8 |
## Usages ## Usages

View File

@ -10,12 +10,12 @@ InternLM2-Chat 进一步提高了它在代码解释和通用工具调用方面
以下是 InternLM2-Chat-20B 在数学代码解释器上的结果。 以下是 InternLM2-Chat-20B 在数学代码解释器上的结果。
| | GSM8K | MATH | | | GSM8K | MATH |
| :---: | :---: | :--: | | :---------------------------------: | :---: | :--: |
| InternLM2-Chat-20B 单纯依靠内在能力 | 79.6 | 32.5 | | InternLM2-Chat-20B 单纯依靠内在能力 | 79.6 | 32.5 |
| InternLM2-Chat-20B 配合代码解释器 | 84.5 | 51.2 | | InternLM2-Chat-20B 配合代码解释器 | 84.5 | 51.2 |
| ChatGPT (GPT-3.5) | 78.2 | 28.0 | | ChatGPT (GPT-3.5) | 78.2 | 28.0 |
| GPT-4 | 91.4 | 45.8 | | GPT-4 | 91.4 | 45.8 |
## 体验 ## 体验

View File

@ -40,7 +40,7 @@ streamlit run examples/react_web_demo.py
## 用 InternLM-Chat 构建一个 ReAct 智能体 ## 用 InternLM-Chat 构建一个 ReAct 智能体
**注意:**如果你想要启动一个 HuggingFace 的模型,请先运行 pip install -e .[all]。 \*\*注意:\*\*如果你想要启动一个 HuggingFace 的模型,请先运行 pip install -e .\[all\]。
```python ```python
# Import necessary modules and classes from the "lagent" library. # Import necessary modules and classes from the "lagent" library.

View File

@ -21,20 +21,21 @@ python pal_inference.py \
``` ```
Parameter explanation: Parameter explanation:
| Parameter | Description |
| :--------: | :--------------------: | | Parameter | Description |
| \<model\> | Path to the model used for inference | | :-----------------------: | :----------------------------------------------------------------------: |
| \<out_dir\> | Generated code will be saved in the specified output folder | | \<model> | Path to the model used for inference |
| --dataset <dataset> | Name of the dataset used for code generation (defaults to gsm8k) | | \<out_dir> | Generated code will be saved in the specified output folder |
| --max_length <length> | Maximum input token length for the model (defaults to 2048) | | --dataset <dataset> | Name of the dataset used for code generation (defaults to gsm8k) |
| --top_p <threshold> | Probability threshold for the sum of candidate tokens (defaults to 0.8) | | --max_length <length> | Maximum input token length for the model (defaults to 2048) |
| --eoh <end token> | User input end identifier (defaults to "") | | --top_p <threshold> | Probability threshold for the sum of candidate tokens (defaults to 0.8) |
| --eoa <end token> | Model input end identifier (defaults to "") | | --eoh <end token> | User input end identifier (defaults to "") |
| --eos <end token> | System input end identifier (defaults to "") | | --eoa <end token> | Model input end identifier (defaults to "") |
| --temperature -t <temp> | Sampling temperature during generation (defaults to 1.0) | | --eos <end token> | System input end identifier (defaults to "") |
| --time_out <time> | Maximum time (in seconds) for executing generated code (defaults to 100) | | --temperature -t <temp> | Sampling temperature during generation (defaults to 1.0) |
| --verbose, -v | Print code error messages (optional) | | --time_out <time> | Maximum time (in seconds) for executing generated code (defaults to 100) |
| --append, -a | Append output to historical results (optional) | | --verbose, -v | Print code error messages (optional) |
| --append, -a | Append output to historical results (optional) |
A simple usage example is as follows: A simple usage example is as follows:

View File

@ -17,6 +17,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# isort: skip_file
import argparse import argparse
import copy import copy
import json import json
@ -31,68 +32,87 @@ import tqdm
from datasets import load_dataset from datasets import load_dataset
from torch import nn from torch import nn
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList from transformers.generation.utils import (LogitsProcessorList,
StoppingCriteriaList)
from transformers.utils import logging from transformers.utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description="PAL Inference") parser = argparse.ArgumentParser(description='PAL Inference')
parser.add_argument("model", type=str, help="Path to the pre-trained LLM used for inference.") parser.add_argument('model',
type=str,
help='Path to the pre-trained LLM used for inference.')
parser.add_argument( parser.add_argument(
"out_dir", type=str, help="Name of the output folder where generated code snippets will be saved." 'out_dir',
type=str,
help=
'Name of the output folder where generated code snippets will be saved.'
) )
parser.add_argument("--dataset", default="gsm8k", type=str, help="Name of the dataset used for code generation.") parser.add_argument('--dataset',
default='gsm8k',
type=str,
help='Name of the dataset used for code generation.')
parser.add_argument( parser.add_argument(
"--max_length", '--max_length',
default=2048, default=2048,
type=int, type=int,
help="Maximum input token length for the natural language description.", help='Maximum input token length for the natural language description.',
) )
parser.add_argument( parser.add_argument(
"--top_p", '--top_p',
default=0.8, default=0.8,
type=float, type=float,
help="Probability threshold to choose sample tokens during generation.", help='Probability threshold to choose sample tokens during generation.',
) )
parser.add_argument( parser.add_argument(
"--eoh", '--eoh',
default="", default='',
type=str, type=str,
help="End of human (user) token.", help='End of human (user) token.',
) )
parser.add_argument( parser.add_argument(
"--eoa", '--eoa',
default="", default='',
type=str, type=str,
help="End of assistant (bot) token.", help='End of assistant (bot) token.',
) )
parser.add_argument( parser.add_argument(
"--eos", '--eos',
default="", default='',
type=str, type=str,
help="End of system token.", help='End of system token.',
) )
parser.add_argument( parser.add_argument(
"--temperature", "-t", default=1.0, type=float, help="Temperature of token sampling during generation." '--temperature',
'-t',
default=1.0,
type=float,
help='Temperature of token sampling during generation.')
parser.add_argument(
'--time_out',
default=100,
type=float,
help='Maximum time allowed for executing generated code.')
parser.add_argument(
'--verbose',
'-v',
action='store_true',
help=
'Print code error information when executing generated code (optional).',
) )
parser.add_argument( parser.add_argument(
"--time_out", default=100, type=float, help="Maximum time allowed for executing generated code." '--append',
) '-a',
parser.add_argument( action='store_true',
"--verbose", help='Append output to the history results (optional).')
"-v",
action="store_true",
help="Print code error information when executing generated code (optional).",
)
parser.add_argument("--append", "-a", action="store_true", help="Append output to the history results (optional).")
args = parser.parse_args() args = parser.parse_args()
return args return args
class Timeout: class Timeout:
"""Timer to execute code """Timer to execute code.
Adapted from https://github.com/reasoning-machines/pal Adapted from https://github.com/reasoning-machines/pal
@ -101,7 +121,7 @@ class Timeout:
error_message (str) error_message (str)
""" """
def __init__(self, seconds=1, error_message="Timeout"): def __init__(self, seconds=1, error_message='Timeout'):
self.seconds = seconds self.seconds = seconds
self.error_message = error_message self.error_message = error_message
@ -133,15 +153,16 @@ def generate_interactive(
generation_config: Optional[GenerationConfig] = None, generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None, logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None, stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
List[int]]] = None,
additional_eos_token_id: Optional[int] = None, additional_eos_token_id: Optional[int] = None,
**kwargs, **kwargs,
): ):
inputs = tokenizer([prompt], padding=True, return_tensors="pt") inputs = tokenizer([prompt], padding=True, return_tensors='pt')
input_length = len(inputs["input_ids"][0]) input_length = len(inputs['input_ids'][0])
for k, v in inputs.items(): for k, v in inputs.items():
inputs[k] = v.cuda() inputs[k] = v.cuda()
input_ids = inputs["input_ids"] input_ids = inputs['input_ids']
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841 # pylint: disable=W0612 batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841 # pylint: disable=W0612
if generation_config is None: if generation_config is None:
generation_config = model.generation_config generation_config = model.generation_config
@ -155,12 +176,13 @@ def generate_interactive(
eos_token_id = [eos_token_id] eos_token_id = [eos_token_id]
if additional_eos_token_id is not None: if additional_eos_token_id is not None:
eos_token_id.append(additional_eos_token_id) eos_token_id.append(additional_eos_token_id)
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None has_default_max_length = kwargs.get(
'max_length') is None and generation_config.max_length is not None
if has_default_max_length and generation_config.max_new_tokens is None: if has_default_max_length and generation_config.max_new_tokens is None:
warnings.warn( warnings.warn(
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. " f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we" 'This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we'
" recommend using `max_new_tokens` to control the maximum length of the generation.", ' recommend using `max_new_tokens` to control the maximum length of the generation.',
UserWarning, UserWarning,
) )
elif generation_config.max_new_tokens is not None: elif generation_config.max_new_tokens is not None:
@ -169,22 +191,23 @@ def generate_interactive(
logger.warn( # pylint: disable=W4902 logger.warn( # pylint: disable=W4902
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(=" f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. " f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. " 'Please refer to the documentation for more information. '
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)", '(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)',
UserWarning, UserWarning,
) )
if input_ids_seq_length >= generation_config.max_length: if input_ids_seq_length >= generation_config.max_length:
input_ids_string = "input_ids" input_ids_string = 'input_ids'
logger.warning( logger.warning(
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to" f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider" f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`." ' increasing `max_new_tokens`.')
)
# 2. Set generation parameters if not already defined # 2. Set generation parameters if not already defined
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList(
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() )
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList(
)
logits_processor = model._get_logits_processor( logits_processor = model._get_logits_processor(
generation_config=generation_config, generation_config=generation_config,
@ -195,14 +218,15 @@ def generate_interactive(
) )
stopping_criteria = model._get_stopping_criteria( stopping_criteria = model._get_stopping_criteria(
generation_config=generation_config, stopping_criteria=stopping_criteria generation_config=generation_config,
) stopping_criteria=stopping_criteria)
logits_warper = model._get_logits_warper(generation_config) logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None scores = None
while True: while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs) model_inputs = model.prepare_inputs_for_generation(
input_ids, **model_kwargs)
# forward pass to get next token # forward pass to get next token
outputs = model( outputs = model(
**model_inputs, **model_inputs,
@ -226,8 +250,10 @@ def generate_interactive(
# update generated ids, model inputs, and length for next step # update generated ids, model inputs, and length for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False) model_kwargs = model._update_model_kwargs_for_generation(
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long()) outputs, model_kwargs, is_encoder_decoder=False)
unfinished_sequences = unfinished_sequences.mul(
(min(next_tokens != i for i in eos_token_id)).long())
output_token_ids = input_ids[0].cpu().tolist() output_token_ids = input_ids[0].cpu().tolist()
output_token_ids = output_token_ids[input_length:] output_token_ids = output_token_ids[input_length:]
@ -238,12 +264,13 @@ def generate_interactive(
yield response yield response
# stop when each sentence is finished, or if we exceed the maximum length # stop when each sentence is finished, or if we exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores): if unfinished_sequences.max() == 0 or stopping_criteria(
input_ids, scores):
break break
class GenericRuntime: class GenericRuntime:
"""Adapted from https://github.com/reasoning-machines/pal""" """Adapted from https://github.com/reasoning-machines/pal."""
GLOBAL_DICT: dict = {} GLOBAL_DICT: dict = {}
LOCAL_DICT = None LOCAL_DICT = None
@ -251,7 +278,8 @@ class GenericRuntime:
def __init__(self): def __init__(self):
self._global_vars = copy.copy(self.GLOBAL_DICT) self._global_vars = copy.copy(self.GLOBAL_DICT)
self._local_vars = copy.copy(self.LOCAL_DICT) if self.LOCAL_DICT else None self._local_vars = copy.copy(
self.LOCAL_DICT) if self.LOCAL_DICT else None
for c in self.HEADERS: for c in self.HEADERS:
self.exec_code(c) self.exec_code(c)
@ -268,7 +296,7 @@ class GenericRuntime:
@property @property
def answer(self): def answer(self):
return self._global_vars["answer"] return self._global_vars['answer']
class PALInterface: class PALInterface:
@ -292,7 +320,7 @@ class PALInterface:
tokenizer: AutoTokenizer, tokenizer: AutoTokenizer,
generation_config: GenerationConfig, generation_config: GenerationConfig,
additional_eos_token_id: int = 103028, additional_eos_token_id: int = 103028,
get_answer_expr: str = "solution()", get_answer_expr: str = 'solution()',
verbose: bool = False, verbose: bool = False,
): ):
self.runtime = GenericRuntime() self.runtime = GenericRuntime()
@ -308,11 +336,11 @@ class PALInterface:
# The api will generate response word by word # The api will generate response word by word
# we only need the last generation as the final results # we only need the last generation as the final results
for cur_gen in generate_interactive( for cur_gen in generate_interactive(
model=self.model, model=self.model,
tokenizer=self.tokenizer, tokenizer=self.tokenizer,
prompt=prompt, prompt=prompt,
additional_eos_token_id=self.additional_eos_token_id, additional_eos_token_id=self.additional_eos_token_id,
**asdict(self.generation_config), **asdict(self.generation_config),
): ):
continue continue
# Get final response # Get final response
@ -322,11 +350,11 @@ class PALInterface:
return code return code
def process_generation_to_code(self, gens: str): def process_generation_to_code(self, gens: str):
if "```python" in gens: if '```python' in gens:
gens = gens.split("```python")[1].split("```")[0] gens = gens.split('```python')[1].split('```')[0]
elif "```" in gens: elif '```' in gens:
gens = gens.split("```")[1].split("```")[0] gens = gens.split('```')[1].split('```')[0]
code = gens.split("\n") code = gens.split('\n')
return code return code
def run(self, prompt, time_out: float = 100): def run(self, prompt, time_out: float = 100):
@ -340,7 +368,7 @@ class PALInterface:
return exec_result return exec_result
def execute(self, code: List[str]): def execute(self, code: List[str]):
self.runtime.exec_code("\n".join(code)) self.runtime.exec_code('\n'.join(code))
return self.runtime.eval_code(self.answer_expr) return self.runtime.eval_code(self.answer_expr)
def clear_history(self): def clear_history(self):
@ -348,21 +376,24 @@ class PALInterface:
def load_model(args): def load_model(args):
model = AutoModelForCausalLM.from_pretrained(args.model, trust_remote_code=True).to(torch.bfloat16).cuda() model = AutoModelForCausalLM.from_pretrained(args.model,
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True) trust_remote_code=True).to(
torch.bfloat16).cuda()
tokenizer = AutoTokenizer.from_pretrained(args.model,
trust_remote_code=True)
return model, tokenizer return model, tokenizer
def load_data(args): def load_data(args):
# Load data from huggingface dataset # Load data from huggingface dataset
if args.dataset == "gsm8k": if args.dataset == 'gsm8k':
gsm8k = load_dataset(path=args.dataset, name="main") gsm8k = load_dataset(path=args.dataset, name='main')
test_set = gsm8k["test"] test_set = gsm8k['test']
input_data = [] input_data = []
for data in test_set: for data in test_set:
question = data["question"] question = data['question']
target = float(data["answer"].split("#")[-1].replace(",", "")) target = float(data['answer'].split('#')[-1].replace(',', ''))
input_data.append({"question": question, "target": target}) input_data.append({'question': question, 'target': target})
else: else:
raise NotImplementedError raise NotImplementedError
return input_data return input_data
@ -419,52 +450,62 @@ def main():
args = parse_args() args = parse_args()
print("load model begin.") print('load model begin.')
model, tokenizer = load_model(args) model, tokenizer = load_model(args)
print("load model end.") print('load model end.')
generation_config = GenerationConfig(max_length=args.max_length, top_p=args.top_p, temperature=args.temperature) generation_config = GenerationConfig(max_length=args.max_length,
top_p=args.top_p,
temperature=args.temperature)
verbose = args.verbose verbose = args.verbose
interface = PALInterface(model=model, tokenizer=tokenizer, generation_config=generation_config, verbose=verbose) interface = PALInterface(model=model,
tokenizer=tokenizer,
generation_config=generation_config,
verbose=verbose)
if not os.path.exists(args.out_dir): if not os.path.exists(args.out_dir):
os.makedirs(args.out_dir) os.makedirs(args.out_dir)
savepath = os.path.join(args.out_dir, args.dataset + ".json") savepath = os.path.join(args.out_dir, args.dataset + '.json')
# Load from history results # Load from history results
if args.append and os.path.exists(savepath): if args.append and os.path.exists(savepath):
lines = open(savepath).readlines() lines = open(savepath).readlines()
num_skip_exps = len(lines) num_skip_exps = len(lines)
scores = [x["score"] for x in map(json.loads, lines)] scores = [x['score'] for x in map(json.loads, lines)]
else: else:
num_skip_exps = 0 num_skip_exps = 0
scores = [] scores = []
examples = load_data(args) examples = load_data(args)
with open(savepath, "a" if args.append else "w") as f: with open(savepath, 'a' if args.append else 'w') as f:
pbar = tqdm.tqdm(examples[num_skip_exps:], initial=num_skip_exps, total=len(examples)) pbar = tqdm.tqdm(examples[num_skip_exps:],
initial=num_skip_exps,
total=len(examples))
for x in pbar: for x in pbar:
question = x["question"] question = x['question']
result = copy.copy(x) result = copy.copy(x)
try: try:
answer = interface.run( answer = interface.run(
prompt=PROMPT.format(question=question, eoh=args.eoh, eoa=args.eoa, eos=args.eos), prompt=PROMPT.format(question=question,
eoh=args.eoh,
eoa=args.eoa,
eos=args.eos),
time_out=args.time_out, time_out=args.time_out,
) )
answer = float(answer) answer = float(answer)
score = 1 if abs(answer - x["target"]) < 1e-3 else 0 score = 1 if abs(answer - x['target']) < 1e-3 else 0
except Exception as e: except Exception as e:
if verbose: if verbose:
print(e) print(e)
answer = "" answer = ''
score = 0 score = 0
scores.append(score) scores.append(score)
result["answer"] = answer result['answer'] = answer
result["score"] = score result['score'] = score
result["generation"] = interface.history result['generation'] = interface.history
f.write(json.dumps(result) + "\n") f.write(json.dumps(result) + '\n')
interface.clear_history() interface.clear_history()
f.flush() f.flush()
@ -473,5 +514,5 @@ def main():
torch.cuda.empty_cache() torch.cuda.empty_cache()
if __name__ == "__main__": if __name__ == '__main__':
main() main()

View File

@ -21,20 +21,21 @@ python pal_inference.py \
``` ```
参数说明: 参数说明:
| 参数 | 说明 |
| :--------: | :--------------------: | | 参数 | 说明 |
| \<model\> | 用于推理的模型的路径 | | :-----------------------: | :-----------------------------------------: |
| \<out_dir\> | 生成代码将保存在指定的输出文件夹中 | | \<model> | 用于推理的模型的路径 |
| --dataset <dataset> | 用于代码生成的数据集名称默认gsm8k | | \<out_dir> | 生成代码将保存在指定的输出文件夹中 |
| --max_length <length> | 模型最大输入 token 长度默认2048 | | --dataset <dataset> | 用于代码生成的数据集名称默认gsm8k |
| --top_p <threshold> | 候选 token 相加的概率阈值默认0.8 | | --max_length <length> | 模型最大输入 token 长度默认2048 |
| --eoh <end token> | 用户输入结束标识符 (默认: "") | | --top_p <threshold> | 候选 token 相加的概率阈值默认0.8 |
| --eoa <end token> | 模型输入结束标识符 (默认: "") | | --eoh <end token> | 用户输入结束标识符 (默认: "") |
| --eos <end token> | 系统输入结束标识符. (默认: "") | | --eoa <end token> | 模型输入结束标识符 (默认: "") |
| --temperature -t <temp> | 生成过程中的采样温度默认1.0 | | --eos <end token> | 系统输入结束标识符. (默认: "") |
| --time_out <time> | 执行生成的代码的最大时间默认100 | | --temperature -t <temp> | 生成过程中的采样温度默认1.0 |
| --verbose, -v | 打印代码错误信息(可选) | | --time_out <time> | 执行生成的代码的最大时间默认100 |
| --append, -a | 将输出追加到历史结果中(可选) | | --verbose, -v | 打印代码错误信息(可选) |
| --append, -a | 将输出追加到历史结果中(可选) |
简单的使用示例如下: 简单的使用示例如下:

View File

@ -70,14 +70,14 @@ The weather in Shanghai is 22 celsius<|im_end|>
``` ```
1. Firstthere will be a new system prompt that describe the protocol of tools in json format. The content starts with `<|im_start|>system name=<|plugin|>\n` and ends with `<|im_end|>`. `name=<|plugin|>` indicates the system prompt comes from tools. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant. 1. Firstthere will be a new system prompt that describe the protocol of tools in json format. The content starts with `<|im_start|>system name=<|plugin|>\n` and ends with `<|im_end|>`. `name=<|plugin|>` indicates the system prompt comes from tools. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant.
2. Model will call the tools in a streaming format after receving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the tools. The model will output `<|action_start|><|plugin|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|plugin|>` indicates it want to use common tools. Then the model will output the calling parameters in json format and end it with `<|action_end|>`. 2. Model will call the tools in a streaming format after receiving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the tools. The model will output `<|action_start|><|plugin|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|plugin|>` indicates it want to use common tools. Then the model will output the calling parameters in json format and end it with `<|action_end|>`.
3. The environment returns the function calling outputs, starts with `<|im_start|>environment name=<|plugin|>`, indicating it comes from the function calling in the environment. The content is in json format, `{"temperature": 22}`, ends with `<|im_end|>`. 3. The environment returns the function calling outputs, starts with `<|im_start|>environment name=<|plugin|>`, indicating it comes from the function calling in the environment. The content is in json format, `{"temperature": 22}`, ends with `<|im_end|>`.
### Code Interpreter ### Code Interpreter
InternLM2-Chat support to use code interpreter to solve complex math problems and data analysis. The whole process is similar to the function calling, shown as below. InternLM2-Chat support to use code interpreter to solve complex math problems and data analysis. The whole process is similar to the function calling, shown as below.
``` ````
<|im_start|>system <|im_start|>system
You are InternLM2-Chat, a harmless AI assistant<|im_end|> You are InternLM2-Chat, a harmless AI assistant<|im_end|>
<|im_start|>system name=<|interpreter|> <|im_start|>system name=<|interpreter|>
@ -130,17 +130,17 @@ Can you explain the code?<|im_end|>
<|im_start|>assistant <|im_start|>assistant
The code uses Python's Plotly library to create a polar plot that shows the relationship between wind direction and rainfall. It starts by creating a Plotly Figure object. Then, I added two `Scatterpolar` traces. The first trace shows the relationship between wind direction at 9 am and average rainfall, and the second trace shows the same for 3 pm. In these traces, the `r` parameter represents the average rainfall, and `theta` represents the wind direction. `fill='toself'` means the shape is filled to its own boundary. Next, I updated the chart layout, hid the radial axis, added a legend, set the title and its font, chose a dark theme, and set the chart's background color, width, and height. Finally, the chart was displayed using `fig.show()`. The code uses Python's Plotly library to create a polar plot that shows the relationship between wind direction and rainfall. It starts by creating a Plotly Figure object. Then, I added two `Scatterpolar` traces. The first trace shows the relationship between wind direction at 9 am and average rainfall, and the second trace shows the same for 3 pm. In these traces, the `r` parameter represents the average rainfall, and `theta` represents the wind direction. `fill='toself'` means the shape is filled to its own boundary. Next, I updated the chart layout, hid the radial axis, added a legend, set the title and its font, chose a dark theme, and set the chart's background color, width, and height. Finally, the chart was displayed using `fig.show()`.
<|im_end|> <|im_end|>
``` ````
1. Firstthere will be a new system prompt that describe the usages of code interpreter. The content starts with `<|im_start|>system name=<|interpreter|>\n` and ends with `<|im_end|>`. `name=<|interpreter|>` indicates the system prompt comes from code interpreter. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant. 1. Firstthere will be a new system prompt that describe the usages of code interpreter. The content starts with `<|im_start|>system name=<|interpreter|>\n` and ends with `<|im_end|>`. `name=<|interpreter|>` indicates the system prompt comes from code interpreter. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant.
2. Model will call the tools in a streaming format after receving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the code interpreter. The model will output `<|action_start|><|interpreter|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|interpreter|>` indicates it want to use code interpreter. Then the model will output the code in a markdown-style code block and end it with `<|action_end|>`. 2. Model will call the tools in a streaming format after receiving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the code interpreter. The model will output `<|action_start|><|interpreter|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|interpreter|>` indicates it want to use code interpreter. Then the model will output the code in a markdown-style code block and end it with `<|action_end|>`.
3. The environment will execute the code and return outputs, starts with `<|im_start|>environment name=<|interpreter|>`, indicating it comes from the code interpreter in the environment, and the content ends with `<|im_end|>`. 3. The environment will execute the code and return outputs, starts with `<|im_start|>environment name=<|interpreter|>`, indicating it comes from the code interpreter in the environment, and the content ends with `<|im_end|>`.
### Function Call + Code Interpreter ### Function Call + Code Interpreter
InternLM2-Chat can choose to use tools or code interpreter automatically. We only need to send the system prompt of both to the model respectively. An example is shown as below InternLM2-Chat can choose to use tools or code interpreter automatically. We only need to send the system prompt of both to the model respectively. An example is shown as below
``` ````
<|im_start|>system <|im_start|>system
You are InternLM2-Chat, a harmless AI assistant<|im_end|> You are InternLM2-Chat, a harmless AI assistant<|im_end|>
<|im_start|>system name=<|interpreter|> <|im_start|>system name=<|interpreter|>
@ -221,4 +221,4 @@ Sure, I will search for the weather of Shanghai.<|action_start|><|plugin|>
{"temperature": 22}<|im_end|> {"temperature": 22}<|im_end|>
<|im_start|>assistant <|im_start|>assistant
The weather in Shanghai is 22 celsius<|im_end|> The weather in Shanghai is 22 celsius<|im_end|>
``` ````

View File

@ -77,7 +77,7 @@ InternLM2-Chat 的通用工具调用function call支持多种格式
InternLM2-Chat 支持使用代码解释器解决复杂数学问题和进行数据分析。整个调用流程和通用工具调用类似,如下所示 InternLM2-Chat 支持使用代码解释器解决复杂数学问题和进行数据分析。整个调用流程和通用工具调用类似,如下所示
``` ````
<|im_start|>system <|im_start|>system
你是书生浦语2一个无害的人工智能助手<|im_end|> 你是书生浦语2一个无害的人工智能助手<|im_end|>
<|im_start|>system name=<|interpreter|> <|im_start|>system name=<|interpreter|>
@ -128,19 +128,18 @@ fig.show()
<|im_start|>assistant <|im_start|>assistant
我使用的是Python的Plotly库来创建一个极坐标图以展示风向与降雨量之间的关系。代码中首先创建了一个Plotly的Figure对象。接着我添加了两个`Scatterpolar`追踪。第一个追踪展示的是上午9点的风向与降雨量的平均值的关系第二个追踪展示的是下午3点的风向与降雨量的平均值的关系。在这两个追踪中`r`参数代表降雨量的平均值,`theta`参数代表风向。`fill='toself'`表示图形以自身为边界进行填充。然后我更新了图表的布局,隐藏了径向轴,添加了图例,设置了标题和标题字体,选择了暗色主题,并设定了图表的背景色、宽度和高度。最后,使用`fig.show()`展示了图表。 我使用的是Python的Plotly库来创建一个极坐标图以展示风向与降雨量之间的关系。代码中首先创建了一个Plotly的Figure对象。接着我添加了两个`Scatterpolar`追踪。第一个追踪展示的是上午9点的风向与降雨量的平均值的关系第二个追踪展示的是下午3点的风向与降雨量的平均值的关系。在这两个追踪中`r`参数代表降雨量的平均值,`theta`参数代表风向。`fill='toself'`表示图形以自身为边界进行填充。然后我更新了图表的布局,隐藏了径向轴,添加了图例,设置了标题和标题字体,选择了暗色主题,并设定了图表的背景色、宽度和高度。最后,使用`fig.show()`展示了图表。
<|im_end|> <|im_end|>
``` ````
1. 首先在系统提示中提供代码解释器的格式和字段描述。内容以 `<|im_start|>system name=<|interpreter|>\n`开头,`<|im_end|>` 结尾,`name=<|interpreter|>` 体现了这是来自代码解释器的指令。InternLM2-Chat 支持 system 角色对模型的提示和约束多次出现。所以我们会看到前面还有关于对话的要求。 1. 首先在系统提示中提供代码解释器的格式和字段描述。内容以 `<|im_start|>system name=<|interpreter|>\n`开头,`<|im_end|>` 结尾,`name=<|interpreter|>` 体现了这是来自代码解释器的指令。InternLM2-Chat 支持 system 角色对模型的提示和约束多次出现。所以我们会看到前面还有关于对话的要求。
2. 用户可以上传一个文件,并对模型提出要求,文件的上传会以单独的形式向模型发出一条指令,以 `<|im_start|>user name=file` 开头,以 json 形式给出路径和文件大小` 2. 用户可以上传一个文件,并对模型提出要求,文件的上传会以单独的形式向模型发出一条指令,以 `<|im_start|>user name=file` 开头,以 json 形式给出路径和文件大小` [{"path": "data.csv", size='10K'}]`,以 `<|im_end|>`结尾。
[{"path": "data.csv", size='10K'}]`,以 `<|im_end|>`结尾。 3. 模型在接受到用户指令后,会以流式的形式调用工具,及自然地生成文字进行思考/回应用户,然后输出`<|action_start|><|interpreter|>`。`<|action_start|>`表示要调用外部插件,同时 `<|interpreter|>` 表示调用的是代码解释器。然后模型输出 markdown 中 python 代码块格式代码内容,再以 `<|action_end|>` 表示工具调用结束。
2. 模型在接受到用户指令后,会以流式的形式调用工具,及自然地生成文字进行思考/回应用户,然后输出`<|action_start|><|interpreter|>`。`<|action_start|>`表示要调用外部插件,同时 `<|interpreter|>` 表示调用的是代码解释器。然后模型输出 markdown 中 python 代码块格式代码内容,再以 `<|action_end|>` 表示工具调用结束。 4. 系统会执行代码块中的代码,然后返回调用结果,以 `<|im_start|>environment name=<|interpreter|>`开头,表示是来自环境关于代码解释器执行的输出,以`<|im_end|>`结尾。
3. 系统会执行代码块中的代码,然后返回调用结果,以 `<|im_start|>environment name=<|interpreter|>`开头,表示是来自环境关于代码解释器执行的输出,以`<|im_end|>`结尾。
### 同时使用工具和代码解释器 ### 同时使用工具和代码解释器
InternLM2-Chat 能够在一个对话过程中自主选择调用工具或代码解释器。在工具和代码解释器同时开启的情况下,只需要将各自的系统提示合并在一起给模型即可。一个调用工具和代码解释器的对话历史样例如下。 InternLM2-Chat 能够在一个对话过程中自主选择调用工具或代码解释器。在工具和代码解释器同时开启的情况下,只需要将各自的系统提示合并在一起给模型即可。一个调用工具和代码解释器的对话历史样例如下。
``` ````
<|im_start|>system <|im_start|>system
你是书生浦语2一个无害的人工智能助手<|im_end|> 你是书生浦语2一个无害的人工智能助手<|im_end|>
<|im_start|>system name=<|interpreter|> <|im_start|>system name=<|interpreter|>
@ -219,4 +218,4 @@ fig.show()
{"temperature": 22}<|im_end|> {"temperature": 22}<|im_end|>
<|im_start|>assistant <|im_start|>assistant
上海的天气是 22 摄氏度<|im_end|> 上海的天气是 22 摄氏度<|im_end|>
``` ````

View File

@ -6,7 +6,6 @@ English | [简体中文](lmdeploy_zh_cn.md)
This article primarily highlights the basic usage of LMDeploy. For a comprehensive understanding of the toolkit, we invite you to refer to [the tutorials](https://lmdeploy.readthedocs.io/en/latest/). This article primarily highlights the basic usage of LMDeploy. For a comprehensive understanding of the toolkit, we invite you to refer to [the tutorials](https://lmdeploy.readthedocs.io/en/latest/).
## Installation ## Installation
Install lmdeploy with pip (python 3.8+) Install lmdeploy with pip (python 3.8+)

View File

@ -6,7 +6,6 @@
本文主要介绍 LMDeploy 的基本用法,包括[安装](#安装)、[离线批处理](#离线批处理)和[推理服务](#推理服务)。更全面的介绍请参考 [LMDeploy 用户指南](https://lmdeploy.readthedocs.io/zh-cn/latest/)。 本文主要介绍 LMDeploy 的基本用法,包括[安装](#安装)、[离线批处理](#离线批处理)和[推理服务](#推理服务)。更全面的介绍请参考 [LMDeploy 用户指南](https://lmdeploy.readthedocs.io/zh-cn/latest/)。
## 安装 ## 安装
使用 pippython 3.8+)安装 LMDeploy 使用 pippython 3.8+)安装 LMDeploy
@ -27,6 +26,7 @@ print(response)
``` ```
LMDeploy 实现了 dynamic ntk支持长文本外推。使用如下代码可以把 InternLM2 的文本外推到 200K LMDeploy 实现了 dynamic ntk支持长文本外推。使用如下代码可以把 InternLM2 的文本外推到 200K
```python ```python
from lmdeploy import pipeline, TurbomindEngineConfig from lmdeploy import pipeline, TurbomindEngineConfig
engine_config = TurbomindEngineConfig(session_len=200000, engine_config = TurbomindEngineConfig(session_len=200000,

View File

@ -1,37 +1,48 @@
# Multi-Chats by OpenAOE # Multi-Chats by OpenAOE
English | [简体中文](openaoe_zh_cn.md) English | [简体中文](openaoe_zh_cn.md)
## Introduction ## Introduction
[OpenAOE](https://github.com/InternLM/OpenAOE) is a LLM-Group-Chat Framework, which can chat with multiple LLMs (commercial/open source LLMs) at the same time. OpenAOE provides both backend API and WEB-UI to meet different usage needs. [OpenAOE](https://github.com/InternLM/OpenAOE) is a LLM-Group-Chat Framework, which can chat with multiple LLMs (commercial/open source LLMs) at the same time. OpenAOE provides both backend API and WEB-UI to meet different usage needs.
Currently already supported LLMs: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, Spark, etc. Currently already supported LLMs: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, Spark, etc.
## Quick Run ## Quick Run
> [!TIP]
> \[!TIP\]
> Require python >= 3.9 > Require python >= 3.9
We provide three different ways to run OpenAOE: `run by pip` `run by docker` and `run by source code` as well. We provide three different ways to run OpenAOE: `run by pip` `run by docker` and `run by source code` as well.
### Run by pip ### Run by pip
#### **Install** #### **Install**
```shell ```shell
pip install -U openaoe pip install -U openaoe
``` ```
#### **Start** #### **Start**
```shell ```shell
openaoe -f /path/to/your/config-template.yaml openaoe -f /path/to/your/config-template.yaml
``` ```
### Run by docker ### Run by docker
#### **Install** #### **Install**
There are two ways to get the OpenAOE docker image by: There are two ways to get the OpenAOE docker image by:
1. pull the OpenAOE docker image 1. pull the OpenAOE docker image
```shell ```shell
docker pull opensealion/openaoe:latest docker pull opensealion/openaoe:latest
``` ```
2. or build a docker image 2. or build a docker image
```shell ```shell
git clone https://github.com/internlm/OpenAOE git clone https://github.com/internlm/OpenAOE
cd OpenAOE cd OpenAOE
@ -39,32 +50,38 @@ docker build . -f docker/Dockerfile -t openaoe:latest
``` ```
#### **Start** #### **Start**
```shell ```shell
docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest
``` ```
### Run by source code ### Run by source code
#### **Install** #### **Install**
1. clone this project 1. clone this project
```shell ```shell
git clone https://github.com/internlm/OpenAOE git clone https://github.com/internlm/OpenAOE
``` ```
2. [_optional_] build the frontend project when the frontend codes are changed
2. \[_optional_\] build the frontend project when the frontend codes are changed
```shell ```shell
cd OpenAOE/openaoe/frontend cd OpenAOE/openaoe/frontend
npm install npm install
npm run build npm run build
``` ```
#### **Start** #### **Start**
```shell ```shell
cd OpenAOE cd OpenAOE
pip install -r openaoe/backend/requirements.txt pip install -r openaoe/backend/requirements.txt
python -m openaoe.main -f /path/to/your/config-template.yaml python -m openaoe.main -f /path/to/your/config-template.yaml
``` ```
> [!TIP] > \[!TIP\]
> `/path/to/your/config-tempalte.yaml` is the configuration file loaded by OpenAOE at startup, > `/path/to/your/config-tempalte.yaml` is the configuration file loaded by OpenAOE at startup,
> which contains the relevant configuration information for the LLMs, > which contains the relevant configuration information for the LLMs,
> including: API URLs, AKSKs, Tokens, etc. > including: API URLs, AKSKs, Tokens, etc.

View File

@ -2,37 +2,47 @@
[English](openaoe.md) | 简体中文 [English](openaoe.md) | 简体中文
## 介绍 ## 介绍
[OpenAOE](https://github.com/InternLM/OpenAOE) 是一个 LLM-Group-Chat 框架,可以同时与多个商业大模型或开源大模型进行聊天。 OpenAOE还提供后端API和WEB-UI以满足不同的使用需求。 [OpenAOE](https://github.com/InternLM/OpenAOE) 是一个 LLM-Group-Chat 框架,可以同时与多个商业大模型或开源大模型进行聊天。 OpenAOE还提供后端API和WEB-UI以满足不同的使用需求。
目前已经支持的大模型有: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, 讯飞星火等。 目前已经支持的大模型有: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, 讯飞星火等。
## 快速安装 ## 快速安装
我们将提供 3 种不同的方式安装:基于 pip、基于 docker 以及基于源代码,实现开箱即用。 我们将提供 3 种不同的方式安装:基于 pip、基于 docker 以及基于源代码,实现开箱即用。
### 基于 pip ### 基于 pip
> [!TIP]
> \[!TIP\]
> 需要 python >= 3.9 > 需要 python >= 3.9
#### **安装** #### **安装**
```shell ```shell
pip install -U openaoe pip install -U openaoe
``` ```
#### **运行** #### **运行**
```shell ```shell
openaoe -f /path/to/your/config-template.yaml openaoe -f /path/to/your/config-template.yaml
``` ```
### 基于 docker ### 基于 docker
#### **安装** #### **安装**
有两种方式获取 OpenAOE 的 docker 镜像: 有两种方式获取 OpenAOE 的 docker 镜像:
1. 官方拉取 1. 官方拉取
```shell ```shell
docker pull opensealion/openaoe:latest docker pull opensealion/openaoe:latest
``` ```
2. 本地构建 2. 本地构建
```shell ```shell
git clone https://github.com/internlm/OpenAOE git clone https://github.com/internlm/OpenAOE
cd OpenAOE cd OpenAOE
@ -40,31 +50,37 @@ docker build . -f docker/Dockerfile -t openaoe:latest
``` ```
#### **运行** #### **运行**
```shell ```shell
docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest
``` ```
### 基于源代码 ### 基于源代码
#### **安装** #### **安装**
1. 克隆项目 1. 克隆项目
```shell ```shell
git clone https://github.com/internlm/OpenAOE git clone https://github.com/internlm/OpenAOE
``` ```
2. [_可选_] (如果前端代码发生变动)重新构建前端项目
2. \[_可选_\] (如果前端代码发生变动)重新构建前端项目
```shell ```shell
cd OpenAOE/openaoe/frontend cd OpenAOE/openaoe/frontend
npm install npm install
npm run build npm run build
``` ```
#### **运行** #### **运行**
```shell ```shell
cd OpenAOE cd OpenAOE
pip install -r openaoe/backend/requirements.txt pip install -r openaoe/backend/requirements.txt
python -m openaoe.main -f /path/to/your/config-template.yaml python -m openaoe.main -f /path/to/your/config-template.yaml
`````` ```
> [!TIP] > \[!TIP\]
> `/path/to/your/config-template.yaml` 是 OpenAOE 启动时读取的配置文件,里面包含了大模型的相关配置信息, > `/path/to/your/config-template.yaml` 是 OpenAOE 启动时读取的配置文件,里面包含了大模型的相关配置信息,
> 包括调用API地址、AKSK、Token等信息是 OpenAOE 启动的必备文件。模板文件可以在 `openaoe/backend/config/config-template.yaml` 中找到。 > 包括调用API地址、AKSK、Token等信息是 OpenAOE 启动的必备文件。模板文件可以在 `openaoe/backend/config/config-template.yaml` 中找到。

View File

@ -1,13 +1,20 @@
""" """This script refers to the dialogue example of streamlit, the interactive
This script refers to the dialogue example of streamlit, the interactive generation code of chatglm2 and transformers. generation code of chatglm2 and transformers.
We mainly modified part of the code logic to adapt to the generation of our model.
We mainly modified part of the code logic to adapt to the
generation of our model.
Please refer to these links below for more information: Please refer to these links below for more information:
1. streamlit chat example: https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps 1. streamlit chat example:
2. chatglm2: https://github.com/THUDM/ChatGLM2-6B https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
3. transformers: https://github.com/huggingface/transformers 2. chatglm2:
Please run with the command `streamlit run path/to/web_demo.py --server.address=0.0.0.0 --server.port 7860`. https://github.com/THUDM/ChatGLM2-6B
3. transformers:
https://github.com/huggingface/transformers
Please run with the command `streamlit run path/to/web_demo.py
--server.address=0.0.0.0 --server.port 7860`.
Using `python path/to/web_demo.py` may cause unknown problems. Using `python path/to/web_demo.py` may cause unknown problems.
""" """
# isort: skip_file
import copy import copy
import warnings import warnings
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
@ -16,7 +23,8 @@ from typing import Callable, List, Optional
import streamlit as st import streamlit as st
import torch import torch
from torch import nn from torch import nn
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList from transformers.generation.utils import (LogitsProcessorList,
StoppingCriteriaList)
from transformers.utils import logging from transformers.utils import logging
from transformers import AutoTokenizer, AutoModelForCausalLM # isort: skip from transformers import AutoTokenizer, AutoModelForCausalLM # isort: skip
@ -42,16 +50,17 @@ def generate_interactive(
generation_config: Optional[GenerationConfig] = None, generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None, logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None, stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
List[int]]] = None,
additional_eos_token_id: Optional[int] = None, additional_eos_token_id: Optional[int] = None,
**kwargs, **kwargs,
): ):
inputs = tokenizer([prompt], padding=True, return_tensors="pt") inputs = tokenizer([prompt], padding=True, return_tensors='pt')
input_length = len(inputs["input_ids"][0]) input_length = len(inputs['input_ids'][0])
for k, v in inputs.items(): for k, v in inputs.items():
inputs[k] = v.cuda() inputs[k] = v.cuda()
input_ids = inputs["input_ids"] input_ids = inputs['input_ids']
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841 # pylint: disable=W0612 _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
if generation_config is None: if generation_config is None:
generation_config = model.generation_config generation_config = model.generation_config
generation_config = copy.deepcopy(generation_config) generation_config = copy.deepcopy(generation_config)
@ -64,36 +73,45 @@ def generate_interactive(
eos_token_id = [eos_token_id] eos_token_id = [eos_token_id]
if additional_eos_token_id is not None: if additional_eos_token_id is not None:
eos_token_id.append(additional_eos_token_id) eos_token_id.append(additional_eos_token_id)
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None has_default_max_length = kwargs.get(
'max_length') is None and generation_config.max_length is not None
if has_default_max_length and generation_config.max_new_tokens is None: if has_default_max_length and generation_config.max_new_tokens is None:
warnings.warn( warnings.warn(
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. " f"Using 'max_length''s default ({repr(generation_config.max_length)}) \
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we" to control the generation length. "
" recommend using `max_new_tokens` to control the maximum length of the generation.", 'This behaviour is deprecated and will be removed from the \
config in v5 of Transformers -- we'
' recommend using `max_new_tokens` to control the maximum \
length of the generation.',
UserWarning, UserWarning,
) )
elif generation_config.max_new_tokens is not None: elif generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length generation_config.max_length = generation_config.max_new_tokens + \
input_ids_seq_length
if not has_default_max_length: if not has_default_max_length:
logger.warn( # pylint: disable=W4902 logger.warn( # pylint: disable=W4902
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(=" f"Both 'max_new_tokens' (={generation_config.max_new_tokens}) "
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. " f"and 'max_length'(={generation_config.max_length}) seem to "
"Please refer to the documentation for more information. " "have been set. 'max_new_tokens' will take precedence. "
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)", 'Please refer to the documentation for more information. '
'(https://huggingface.co/docs/transformers/main/'
'en/main_classes/text_generation)',
UserWarning, UserWarning,
) )
if input_ids_seq_length >= generation_config.max_length: if input_ids_seq_length >= generation_config.max_length:
input_ids_string = "input_ids" input_ids_string = 'input_ids'
logger.warning( logger.warning(
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to" f"Input length of {input_ids_string} is {input_ids_seq_length}, "
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider" f"but 'max_length' is set to {generation_config.max_length}. "
" increasing `max_new_tokens`." 'This can lead to unexpected behavior. You should consider'
) " increasing 'max_new_tokens'.")
# 2. Set generation parameters if not already defined # 2. Set generation parameters if not already defined
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() logits_processor = logits_processor if logits_processor is not None \
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None \
else StoppingCriteriaList()
logits_processor = model._get_logits_processor( logits_processor = model._get_logits_processor(
generation_config=generation_config, generation_config=generation_config,
@ -104,14 +122,15 @@ def generate_interactive(
) )
stopping_criteria = model._get_stopping_criteria( stopping_criteria = model._get_stopping_criteria(
generation_config=generation_config, stopping_criteria=stopping_criteria generation_config=generation_config,
) stopping_criteria=stopping_criteria)
logits_warper = model._get_logits_warper(generation_config) logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None scores = None
while True: while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs) model_inputs = model.prepare_inputs_for_generation(
input_ids, **model_kwargs)
# forward pass to get next token # forward pass to get next token
outputs = model( outputs = model(
**model_inputs, **model_inputs,
@ -135,8 +154,10 @@ def generate_interactive(
# update generated ids, model inputs, and length for next step # update generated ids, model inputs, and length for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False) model_kwargs = model._update_model_kwargs_for_generation(
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long()) outputs, model_kwargs, is_encoder_decoder=False)
unfinished_sequences = unfinished_sequences.mul(
(min(next_tokens != i for i in eos_token_id)).long())
output_token_ids = input_ids[0].cpu().tolist() output_token_ids = input_ids[0].cpu().tolist()
output_token_ids = output_token_ids[input_length:] output_token_ids = output_token_ids[input_length:]
@ -146,8 +167,10 @@ def generate_interactive(
response = tokenizer.decode(output_token_ids) response = tokenizer.decode(output_token_ids)
yield response yield response
# stop when each sentence is finished, or if we exceed the maximum length # stop when each sentence is finished
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores): # or if we exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(
input_ids, scores):
break break
@ -157,44 +180,48 @@ def on_btn_click():
@st.cache_resource @st.cache_resource
def load_model(): def load_model():
model = ( model = (AutoModelForCausalLM.from_pretrained('internlm/internlm2-chat-7b',
AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True) trust_remote_code=True).to(
.to(torch.bfloat16) torch.bfloat16).cuda())
.cuda() tokenizer = AutoTokenizer.from_pretrained('internlm/internlm2-chat-7b',
) trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True)
return model, tokenizer return model, tokenizer
def prepare_generation_config(): def prepare_generation_config():
with st.sidebar: with st.sidebar:
max_length = st.slider("Max Length", min_value=8, max_value=32768, value=32768) max_length = st.slider('Max Length',
top_p = st.slider("Top P", 0.0, 1.0, 0.8, step=0.01) min_value=8,
temperature = st.slider("Temperature", 0.0, 1.0, 0.7, step=0.01) max_value=32768,
st.button("Clear Chat History", on_click=on_btn_click) value=32768)
top_p = st.slider('Top P', 0.0, 1.0, 0.8, step=0.01)
temperature = st.slider('Temperature', 0.0, 1.0, 0.7, step=0.01)
st.button('Clear Chat History', on_click=on_btn_click)
generation_config = GenerationConfig(max_length=max_length, top_p=top_p, temperature=temperature) generation_config = GenerationConfig(max_length=max_length,
top_p=top_p,
temperature=temperature)
return generation_config return generation_config
user_prompt = "<|im_start|>user\n{user}<|im_end|>\n" user_prompt = '<|im_start|>user\n{user}<|im_end|>\n'
robot_prompt = "<|im_start|>assistant\n{robot}<|im_end|>\n" robot_prompt = '<|im_start|>assistant\n{robot}<|im_end|>\n'
cur_query_prompt = "<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n" cur_query_prompt = '<|im_start|>user\n{user}<|im_end|>\n\
<|im_start|>assistant\n'
def combine_history(prompt): def combine_history(prompt):
messages = st.session_state.messages messages = st.session_state.messages
meta_instruction = ( meta_instruction = ('You are InternLM (书生·浦语), a helpful, honest, '
"You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai " 'and harmless AI assistant developed by Shanghai '
"AI Laboratory (上海人工智能实验室)." 'AI Laboratory (上海人工智能实验室).')
)
total_prompt = f"<s><|im_start|>system\n{meta_instruction}<|im_end|>\n" total_prompt = f"<s><|im_start|>system\n{meta_instruction}<|im_end|>\n"
for message in messages: for message in messages:
cur_content = message["content"] cur_content = message['content']
if message["role"] == "user": if message['role'] == 'user':
cur_prompt = user_prompt.format(user=cur_content) cur_prompt = user_prompt.format(user=cur_content)
elif message["role"] == "robot": elif message['role'] == 'robot':
cur_prompt = robot_prompt.format(robot=cur_content) cur_prompt = robot_prompt.format(robot=cur_content)
else: else:
raise RuntimeError raise RuntimeError
@ -205,57 +232,59 @@ def combine_history(prompt):
def main(): def main():
# torch.cuda.empty_cache() # torch.cuda.empty_cache()
print("load model begin.") print('load model begin.')
model, tokenizer = load_model() model, tokenizer = load_model()
print("load model end.") print('load model end.')
user_avator = "assets/user.png" user_avator = 'assets/user.png'
robot_avator = "assets/robot.png" robot_avator = 'assets/robot.png'
st.title("InternLM2-Chat-7B") st.title('InternLM2-Chat-7B')
generation_config = prepare_generation_config() generation_config = prepare_generation_config()
# Initialize chat history # Initialize chat history
if "messages" not in st.session_state: if 'messages' not in st.session_state:
st.session_state.messages = [] st.session_state.messages = []
# Display chat messages from history on app rerun # Display chat messages from history on app rerun
for message in st.session_state.messages: for message in st.session_state.messages:
with st.chat_message(message["role"], avatar=message.get("avatar")): with st.chat_message(message['role'], avatar=message.get('avatar')):
st.markdown(message["content"]) st.markdown(message['content'])
# Accept user input # Accept user input
if prompt := st.chat_input("What is up?"): if prompt := st.chat_input('What is up?'):
# Display user message in chat message container # Display user message in chat message container
with st.chat_message("user", avatar=user_avator): with st.chat_message('user', avatar=user_avator):
st.markdown(prompt) st.markdown(prompt)
real_prompt = combine_history(prompt) real_prompt = combine_history(prompt)
# Add user message to chat history # Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt, "avatar": user_avator}) st.session_state.messages.append({
'role': 'user',
'content': prompt,
'avatar': user_avator
})
with st.chat_message("robot", avatar=robot_avator): with st.chat_message('robot', avatar=robot_avator):
message_placeholder = st.empty() message_placeholder = st.empty()
for cur_response in generate_interactive( for cur_response in generate_interactive(
model=model, model=model,
tokenizer=tokenizer, tokenizer=tokenizer,
prompt=real_prompt, prompt=real_prompt,
additional_eos_token_id=92542, additional_eos_token_id=92542,
**asdict(generation_config), **asdict(generation_config),
): ):
# Display robot response in chat message container # Display robot response in chat message container
message_placeholder.markdown(cur_response + "") message_placeholder.markdown(cur_response + '')
message_placeholder.markdown(cur_response) # pylint: disable=undefined-loop-variable message_placeholder.markdown(cur_response)
# Add robot response to chat history # Add robot response to chat history
st.session_state.messages.append( st.session_state.messages.append({
{ 'role': 'robot',
"role": "robot", 'content': cur_response, # pylint: disable=undefined-loop-variable
"content": cur_response, # pylint: disable=undefined-loop-variable 'avatar': robot_avator,
"avatar": robot_avator, })
}
)
torch.cuda.empty_cache() torch.cuda.empty_cache()
if __name__ == "__main__": if __name__ == '__main__':
main() main()

View File

@ -97,4 +97,4 @@ xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-pa
## InternEvo ## InternEvo
[TODO] \[TODO\]

View File

@ -95,4 +95,4 @@ xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-pa
## InternEvo ## InternEvo
[TODO] \[TODO\]

View File

@ -16,12 +16,12 @@ The base model of InternLM2 has the following technical features:
## Model Zoo ## Model Zoo
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date | | Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 | | **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 | | **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 | | **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 | | **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
- `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo). - `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo).
@ -29,16 +29,15 @@ The base model of InternLM2 has the following technical features:
We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results. We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
| Dataset\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 | | Dataset\\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
| --- | --- | --- | --- | --- | --- | --- | | --------------- | ------------ | ----------------- | ------------- | ------------------ | ------- | ----- |
| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 | | MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 | | AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 | | BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 | | GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 | | MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 | | HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 | | MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass). - The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass). - The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).

View File

@ -16,13 +16,12 @@ The base model of InternLM2 has the following technical features:
## Model Zoo ## Model Zoo
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date | | ------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------- | ------------ |
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 | | **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 | | **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 | | **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
- `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo). - `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo).
@ -30,16 +29,15 @@ The base model of InternLM2 has the following technical features:
We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results. We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
| Dataset\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 | | Dataset\\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
| --- | --- | --- | --- | --- | --- | --- | | --------------- | ------------ | ----------------- | ------------- | ------------------ | ------- | ----- |
| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 | | MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 | | AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 | | BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 | | GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 | | MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 | | HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 | | MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass). - The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass). - The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).

View File

@ -13,45 +13,45 @@ In terms of model structure, InternLM-20B opted for a deeper architecture, with
## Model Zoo ## Model Zoo
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date | | Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | --------------------- | ------------------------------------------ | ----------------------------------------- | --------------------------------------- | --------------------------------------------- | ------------ |
| **InternLM Chat 20B** | [🤗internlm/internlm-chat-20b](https://huggingface.co/internlm/internlm-20b-chat) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b-chat/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b-original) | 2023-12-12 | | **InternLM Chat 20B** | [🤗internlm/internlm-chat-20b](https://huggingface.co/internlm/internlm-20b-chat) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b-chat/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b-original) | 2023-12-12 |
| **InternLM 20B** | [🤗internlm/internlm-20b](https://huggingface.co/internlm/internlm-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b-original) | 2023-09-20 | | **InternLM 20B** | [🤗internlm/internlm-20b](https://huggingface.co/internlm/internlm-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b-original) | 2023-09-20 |
## Performance Evaluation ## Performance Evaluation
On the 5 capability dimensions proposed by OpenCompass, InternLM-20B has achieved excellent results (the bolded scores represent the best performances within the 13B-33B parameter range). On the 5 capability dimensions proposed by OpenCompass, InternLM-20B has achieved excellent results (the bolded scores represent the best performances within the 13B-33B parameter range).
| Capability | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B | | Capability | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
|----------|-----------|------------|---------------|--------------|-----------|-----------|------------| | ------------- | --------- | ---------- | ------------- | ------------ | --------- | --------- | ---------- |
| Language | 42.5 | 47 | 47.5 | **55** | 44.6 | 47.1 | 51.6 | | Language | 42.5 | 47 | 47.5 | **55** | 44.6 | 47.1 | 51.6 |
| Knowledge | 58.2 | 58.3 | 48.9 | 60.1 | **64** | 66 | 67.7 | | Knowledge | 58.2 | 58.3 | 48.9 | 60.1 | **64** | 66 | 67.7 |
| Understanding | 45.5 | 50.9 | 58.1 | **67.3** | 50.6 | 54.2 | 60.8 | | Understanding | 45.5 | 50.9 | 58.1 | **67.3** | 50.6 | 54.2 | 60.8 |
| Reasoning | 42.7 | 43.6 | 44.2 | **54.9** | 46.4 | 49.8 | 55 | | Reasoning | 42.7 | 43.6 | 44.2 | **54.9** | 46.4 | 49.8 | 55 |
| Examination | 37.3 | 45.2 | 51.8 | **62.5** | 47.4 | 49.7 | 57.3 | | Examination | 37.3 | 45.2 | 51.8 | **62.5** | 47.4 | 49.7 | 57.3 |
| Overall | 43.8 | 47.3 | 49.4 | **59.2** | 48.9 | 51.9 | 57.4 | | Overall | 43.8 | 47.3 | 49.4 | **59.2** | 48.9 | 51.9 | 57.4 |
The table below compares the performance of mainstream open-source models on some influential and typical datasets. The table below compares the performance of mainstream open-source models on some influential and typical datasets.
| | Benchmarks | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B | | | Benchmarks | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
|------|------------------|-----------|------------|---------------|--------------|-----------|-----------|------------| | ------------- | ---------------- | --------- | ---------- | ------------- | ------------ | --------- | --------- | ---------- |
| Examination | MMLU | 47.73 | 54.99 | 59.55 | **62.05** | 58.73 | 63.71 | 69.75 | | Examination | MMLU | 47.73 | 54.99 | 59.55 | **62.05** | 58.73 | 63.71 | 69.75 |
| | C-Eval (val) | 31.83 | 41.4 | **59.01** | 58.8 | 37.47 | 40.36 | 50.13 | | | C-Eval (val) | 31.83 | 41.4 | **59.01** | 58.8 | 37.47 | 40.36 | 50.13 |
| | AGI-Eval | 22.03 | 30.93 | 37.37 | **44.58** | 33.53 | 33.92 | 40.02 | | | AGI-Eval | 22.03 | 30.93 | 37.37 | **44.58** | 33.53 | 33.92 | 40.02 |
| Knowledge | BoolQ | 78.75 | 82.42 | 67 | **87.46** | 84.43 | 86.61 | 87.74 | | Knowledge | BoolQ | 78.75 | 82.42 | 67 | **87.46** | 84.43 | 86.61 | 87.74 |
| | TriviaQA | 52.47 | 59.36 | 46.61 | 57.26 | **66.24** | 69.79 | 70.71 | | | TriviaQA | 52.47 | 59.36 | 46.61 | 57.26 | **66.24** | 69.79 | 70.71 |
| | NaturalQuestions | 20.17 | 24.85 | 16.32 | 25.15 | **30.89** | 33.41 | 34.16 | | | NaturalQuestions | 20.17 | 24.85 | 16.32 | 25.15 | **30.89** | 33.41 | 34.16 |
| Understanding | CMRC | 9.26 | 31.59 | 29.85 | **68.78** | 14.17 | 34.73 | 43.74 | | Understanding | CMRC | 9.26 | 31.59 | 29.85 | **68.78** | 14.17 | 34.73 | 43.74 |
| | CSL | 55 | 58.75 | 63.12 | **65.62** | 57.5 | 59.38 | 60 | | | CSL | 55 | 58.75 | 63.12 | **65.62** | 57.5 | 59.38 | 60 |
| | RACE (middle) | 53.41 | 63.02 | 68.94 | **86.35** | 64.55 | 72.35 | 81.55 | | | RACE (middle) | 53.41 | 63.02 | 68.94 | **86.35** | 64.55 | 72.35 | 81.55 |
| | RACE (high) | 47.63 | 58.86 | 67.18 | **83.28** | 62.61 | 68.01 | 79.93 | | | RACE (high) | 47.63 | 58.86 | 67.18 | **83.28** | 62.61 | 68.01 | 79.93 |
| | XSum | 20.37 | 23.37 | 25.23 | **35.54** | 20.55 | 19.91 | 25.38 | | | XSum | 20.37 | 23.37 | 25.23 | **35.54** | 20.55 | 19.91 | 25.38 |
| Reasoning | WinoGrande | 64.64 | 64.01 | 67.32 | **69.38** | 66.85 | 69.38 | 69.77 | | Reasoning | WinoGrande | 64.64 | 64.01 | 67.32 | **69.38** | 66.85 | 69.38 | 69.77 |
| | BBH | 37.93 | 45.62 | 48.98 | **52.51** | 49.98 | 58.38 | 64.91 | | | BBH | 37.93 | 45.62 | 48.98 | **52.51** | 49.98 | 58.38 | 64.91 |
| | GSM8K | 20.32 | 29.57 | **52.62** | **52.62** | 42.3 | 54.44 | 63.31 | | | GSM8K | 20.32 | 29.57 | **52.62** | **52.62** | 42.3 | 54.44 | 63.31 |
| | PIQA | 79.71 | 79.76 | 78.07 | 80.25 | **81.34** | 82.15 | 82.54 | | | PIQA | 79.71 | 79.76 | 78.07 | 80.25 | **81.34** | 82.15 | 82.54 |
| Programming | HumanEval | 14.02 | 18.9 | 17.07 | **25.61** | 17.68 | 18.9 | 26.22 | | Programming | HumanEval | 14.02 | 18.9 | 17.07 | **25.61** | 17.68 | 18.9 | 26.22 |
| | MBPP | 20.6 | 26.8 | 30.8 | **35.6** | 28.4 | 33.6 | 39.6 | | | MBPP | 20.6 | 26.8 | 30.8 | **35.6** | 28.4 | 33.6 | 39.6 |
Overall, InternLM-20B comprehensively outperforms open-source models in the 13B parameter range in terms of overall capabilities, and on inference evaluation sets, it approaches or even surpasses the performance of Llama-65B. Overall, InternLM-20B comprehensively outperforms open-source models in the 13B parameter range in terms of overall capabilities, and on inference evaluation sets, it approaches or even surpasses the performance of Llama-65B.

View File

@ -10,27 +10,27 @@ InternLM-7B contains a 7 billion parameter base model and a chat model tailored
## Model Zoo ## Model Zoo
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date | | Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------| | -------------------- | ------------------------------------------- | ----------------------------------------- | --------------------------------------- | --------------------------------------------- | ------------ |
| **InternLM Chat 7B** | [🤗internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-original) | 2023-12-12 | | **InternLM Chat 7B** | [🤗internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-original) | 2023-12-12 |
| **InternLM 7B** | [🤗internlm/internlm-7b](https://huggingface.co/internlm/internlm-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b-original) | 2023-07-06 | | **InternLM 7B** | [🤗internlm/internlm-7b](https://huggingface.co/internlm/internlm-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b-original) | 2023-07-06 |
## Performance Evaluation ## Performance Evaluation
We conducted a comprehensive evaluation of InternLM using the open-source evaluation tool [OpenCompass](https://github.com/internLM/OpenCompass/). The evaluation covered five dimensions of capabilities: disciplinary competence, language competence, knowledge competence, inference competence, and comprehension competence. Here are some of the evaluation results, and you can visit the [OpenCompass leaderboard](https://opencompass.org.cn/rank) for more evaluation results. We conducted a comprehensive evaluation of InternLM using the open-source evaluation tool [OpenCompass](https://github.com/internLM/OpenCompass/). The evaluation covered five dimensions of capabilities: disciplinary competence, language competence, knowledge competence, inference competence, and comprehension competence. Here are some of the evaluation results, and you can visit the [OpenCompass leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
| Datasets\Models | **InternLM-Chat-7B** | **InternLM-7B** | LLaMA-7B | Baichuan-7B | ChatGLM2-6B | Alpaca-7B | Vicuna-7B | | Datasets\\Models | **InternLM-Chat-7B** | **InternLM-7B** | LLaMA-7B | Baichuan-7B | ChatGLM2-6B | Alpaca-7B | Vicuna-7B |
| --------------- | -------------------------- | --------------------- | -------- | ----------- | ----------- | --------- | --------- | | ---------------- | -------------------- | --------------- | -------- | ----------- | ----------- | --------- | --------- |
| C-Eval(Val) | 52.0 | 53.4 | 24.2 | 42.7 | 50.9 | 28.9 | 31.2 | | C-Eval(Val) | 52.0 | 53.4 | 24.2 | 42.7 | 50.9 | 28.9 | 31.2 |
| MMLU | 52.6 | 51.0 | 35.2* | 41.5 | 46.0 | 39.7 | 47.3 | | MMLU | 52.6 | 51.0 | 35.2\* | 41.5 | 46.0 | 39.7 | 47.3 |
| AGIEval | 46.4 | 37.6 | 20.8 | 24.6 | 39.0 | 24.1 | 26.4 | | AGIEval | 46.4 | 37.6 | 20.8 | 24.6 | 39.0 | 24.1 | 26.4 |
| CommonSenseQA | 80.8 | 59.5 | 65.0 | 58.8 | 60.0 | 68.7 | 66.7 | | CommonSenseQA | 80.8 | 59.5 | 65.0 | 58.8 | 60.0 | 68.7 | 66.7 |
| BUSTM | 80.6 | 50.6 | 48.5 | 51.3 | 55.0 | 48.8 | 62.5 | | BUSTM | 80.6 | 50.6 | 48.5 | 51.3 | 55.0 | 48.8 | 62.5 |
| CLUEWSC | 81.8 | 59.1 | 50.3 | 52.8 | 59.8 | 50.3 | 52.2 | | CLUEWSC | 81.8 | 59.1 | 50.3 | 52.8 | 59.8 | 50.3 | 52.2 |
| MATH | 5.0 | 7.1 | 2.8 | 3.0 | 6.6 | 2.2 | 2.8 | | MATH | 5.0 | 7.1 | 2.8 | 3.0 | 6.6 | 2.2 | 2.8 |
| GSM8K | 36.2 | 31.2 | 10.1 | 9.7 | 29.2 | 6.0 | 15.3 | | GSM8K | 36.2 | 31.2 | 10.1 | 9.7 | 29.2 | 6.0 | 15.3 |
| HumanEval | 15.9 | 10.4 | 14.0 | 9.2 | 9.2 | 9.2 | 11.0 | | HumanEval | 15.9 | 10.4 | 14.0 | 9.2 | 9.2 | 9.2 | 11.0 |
| RACE(High) | 80.3 | 57.4 | 46.9* | 28.1 | 66.3 | 40.7 | 54.0 | | RACE(High) | 80.3 | 57.4 | 46.9\* | 28.1 | 66.3 | 40.7 | 54.0 |
- The evaluation results were obtained from [OpenCompass 20230706](https://github.com/internLM/OpenCompass/) (some data marked with *, which means come from the original papers), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/). - The evaluation results were obtained from [OpenCompass 20230706](https://github.com/internLM/OpenCompass/) (some data marked with \*, which means come from the original papers), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/).
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/internLM/OpenCompass/), so please refer to the latest evaluation results of [OpenCompass](https://github.com/internLM/OpenCompass/). - The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/internLM/OpenCompass/), so please refer to the latest evaluation results of [OpenCompass](https://github.com/internLM/OpenCompass/).

View File

@ -1,2 +1,2 @@
transformers>=4.34
sentencepiece sentencepiece
transformers>=4.34

View File

@ -2,33 +2,32 @@ import pytest
import torch import torch
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
prompts = ["你好", "what's your name"] prompts = ['你好', "what's your name"]
def assert_model(response): def assert_model(response):
assert len(response) != 0 assert len(response) != 0
assert "UNUSED_TOKEN" not in response assert 'UNUSED_TOKEN' not in response
class TestChat: class TestChat:
""" """Test cases for chat model."""
Test cases for chat model.
"""
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model_name", 'model_name',
[ [
"internlm/internlm2-chat-7b", 'internlm/internlm2-chat-7b',
"internlm/internlm2-chat-7b-sft", 'internlm/internlm2-chat-7b-sft',
], ],
) )
def test_demo_default(self, model_name): def test_demo_default(self, model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise # Set `torch_dtype=torch.float16` to load model in float16, otherwise
# it will be loaded as float32 and might cause OOM Error. # it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float16, trust_remote_code=True model_name, torch_dtype=torch.float16,
).cuda() trust_remote_code=True).cuda()
model = model.eval() model = model.eval()
for prompt in prompts: for prompt in prompts:
response, history = model.chat(tokenizer, prompt, history=[]) response, history = model.chat(tokenizer, prompt, history=[])
@ -37,43 +36,45 @@ class TestChat:
for prompt in prompts: for prompt in prompts:
length = 0 length = 0
for response, history in model.stream_chat(tokenizer, prompt, history=[]): for response, history in model.stream_chat(tokenizer,
print(response[length:], flush=True, end="") prompt,
history=[]):
print(response[length:], flush=True, end='')
length = len(response) length = len(response)
assert_model(response) assert_model(response)
class TestBase: class TestBase:
""" """Test cases for base model."""
Test cases for base model.
"""
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model_name", 'model_name',
[ [
"internlm/internlm2-7b", 'internlm/internlm2-7b',
"internlm/internlm2-base-7b", 'internlm/internlm2-base-7b',
], ],
) )
def test_demo_default(self, model_name): def test_demo_default(self, model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_name,
trust_remote_code=True)
# Set `torch_dtype=torch.float16` to load model in float16, otherwise # Set `torch_dtype=torch.float16` to load model in float16, otherwise
# it will be loaded as float32 and might cause OOM Error. # it will be loaded as float32 and might cause OOM Error.
model = AutoModelForCausalLM.from_pretrained( model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype=torch.float16, trust_remote_code=True model_name, torch_dtype=torch.float16,
).cuda() trust_remote_code=True).cuda()
for prompt in prompts: for prompt in prompts:
inputs = tokenizer(prompt, return_tensors="pt") inputs = tokenizer(prompt, return_tensors='pt')
for k, v in inputs.items(): for k, v in inputs.items():
inputs[k] = v.cuda() inputs[k] = v.cuda()
gen_kwargs = { gen_kwargs = {
"max_length": 128, 'max_length': 128,
"top_p": 10, 'top_p': 10,
"temperature": 1.0, 'temperature': 1.0,
"do_sample": True, 'do_sample': True,
"repetition_penalty": 1.0, 'repetition_penalty': 1.0,
} }
output = model.generate(**inputs, **gen_kwargs) output = model.generate(**inputs, **gen_kwargs)
output = tokenizer.decode(output[0].tolist(), skip_special_tokens=True) output = tokenizer.decode(output[0].tolist(),
skip_special_tokens=True)
print(output) print(output)
assert_model(output) assert_model(output)

View File

@ -5,6 +5,7 @@
We offer the `convert2llama.py`, designed to seamlessly transform InternLM2 (HF format) into LLaMA (HF format). Here, HF refers to the format used by HuggingFace Transformers. We offer the `convert2llama.py`, designed to seamlessly transform InternLM2 (HF format) into LLaMA (HF format). Here, HF refers to the format used by HuggingFace Transformers.
### Usage ### Usage
``` ```
python convert2llama.py --src /path/to/internlm2/ckpt --tgt /path/to/target/ckpt python convert2llama.py --src /path/to/internlm2/ckpt --tgt /path/to/target/ckpt
``` ```

View File

@ -12,18 +12,18 @@ from transformers import AutoConfig, LlamaConfig, LlamaTokenizer
def save_conifg(config, tgt): def save_conifg(config, tgt):
config_dict = config.to_dict() config_dict = config.to_dict()
unnecessary_keys = [ unnecessary_keys = [
"_name_or_path", '_name_or_path',
"auto_map", 'auto_map',
"transformers_version", 'transformers_version',
"model_type", 'model_type',
"architectures", 'architectures',
"tokenizer_class", 'tokenizer_class',
"attn_implementation", 'attn_implementation',
] ]
for k in unnecessary_keys: for k in unnecessary_keys:
config_dict.pop(k, None) config_dict.pop(k, None)
config_dict["attention_bias"] = config_dict.pop("bias") config_dict['attention_bias'] = config_dict.pop('bias')
config_dict["architectures"] = ["LlamaForCausalLM"] config_dict['architectures'] = ['LlamaForCausalLM']
llama_config = LlamaConfig(**config_dict) llama_config = LlamaConfig(**config_dict)
llama_config.save_pretrained(tgt) llama_config.save_pretrained(tgt)
@ -31,106 +31,109 @@ def save_conifg(config, tgt):
def convert(src, tgt): def convert(src, tgt):
"""Convert InternLM2 huggingface checkpoints to Llama-style.""" """Convert InternLM2 huggingface checkpoints to Llama-style."""
print("Convert InternLM2 huggingface checkpoints to Llama...") print('Convert InternLM2 huggingface checkpoints to Llama...')
config = AutoConfig.from_pretrained(src, trust_remote_code=True) config = AutoConfig.from_pretrained(src, trust_remote_code=True)
assert not config.bias, "Cannot convert InternLM Model with bias to LLaMA." assert not config.bias, 'Cannot convert InternLM Model with bias to LLaMA.'
head_dim = config.hidden_size // config.num_attention_heads head_dim = config.hidden_size // config.num_attention_heads
num_key_value_groups = config.num_attention_heads // config.num_key_value_heads num_key_value_groups = config.num_attention_heads \
// config.num_key_value_heads
# load index json file # load index json file
index_file = os.path.join(src, "pytorch_model.bin.index.json") index_file = os.path.join(src, 'pytorch_model.bin.index.json')
if os.path.exists(index_file): if os.path.exists(index_file):
with open(index_file) as fp: with open(index_file) as fp:
index_dict = json.load(fp) index_dict = json.load(fp)
index_dict["weight_map"] = {} index_dict['weight_map'] = {}
else: else:
index_dict = None index_dict = None
os.makedirs(tgt, exist_ok=True) os.makedirs(tgt, exist_ok=True)
for filename in tqdm(os.listdir(src)): for filename in tqdm(os.listdir(src)):
if not filename.endswith(".bin"): if not filename.endswith('.bin'):
continue continue
states = torch.load(os.path.join(src, filename)) states = torch.load(os.path.join(src, filename))
llama_states = {} llama_states = {}
for k, v in states.copy().items(): for k, v in states.copy().items():
if "wqkv" in k: if 'wqkv' in k:
v = rearrange( v = rearrange(
v, v,
"(h gs d) dim -> h gs d dim", '(h gs d) dim -> h gs d dim',
gs=2 + num_key_value_groups, gs=2 + num_key_value_groups,
d=head_dim, d=head_dim,
) )
wq, wk, wv = torch.split(v, [num_key_value_groups, 1, 1], dim=1) wq, wk, wv = torch.split(v, [num_key_value_groups, 1, 1],
wq = rearrange(wq, "h gs d dim -> (h gs d) dim") dim=1)
wk = rearrange(wk, "h gs d dim -> (h gs d) dim") wq = rearrange(wq, 'h gs d dim -> (h gs d) dim')
wv = rearrange(wv, "h gs d dim -> (h gs d) dim") wk = rearrange(wk, 'h gs d dim -> (h gs d) dim')
_prefix = k.split("attention")[0] wv = rearrange(wv, 'h gs d dim -> (h gs d) dim')
wq_key = _prefix + "self_attn.q_proj.weight" _prefix = k.split('attention')[0]
wk_key = _prefix + "self_attn.k_proj.weight" wq_key = _prefix + 'self_attn.q_proj.weight'
wv_key = _prefix + "self_attn.v_proj.weight" wk_key = _prefix + 'self_attn.k_proj.weight'
wv_key = _prefix + 'self_attn.v_proj.weight'
llama_states[wq_key] = wq.clone() llama_states[wq_key] = wq.clone()
llama_states[wk_key] = wk.clone() llama_states[wk_key] = wk.clone()
llama_states[wv_key] = wv.clone() llama_states[wv_key] = wv.clone()
elif "attention.wo" in k: elif 'attention.wo' in k:
new_k = k.replace("attention.wo", "self_attn.o_proj") new_k = k.replace('attention.wo', 'self_attn.o_proj')
llama_states[new_k] = v llama_states[new_k] = v
elif "feed_forward.w1" in k: elif 'feed_forward.w1' in k:
new_k = k.replace("feed_forward.w1", "mlp.gate_proj") new_k = k.replace('feed_forward.w1', 'mlp.gate_proj')
llama_states[new_k] = v llama_states[new_k] = v
elif "feed_forward.w2" in k: elif 'feed_forward.w2' in k:
new_k = k.replace("feed_forward.w2", "mlp.down_proj") new_k = k.replace('feed_forward.w2', 'mlp.down_proj')
llama_states[new_k] = v llama_states[new_k] = v
elif "feed_forward.w3" in k: elif 'feed_forward.w3' in k:
new_k = k.replace("feed_forward.w3", "mlp.up_proj") new_k = k.replace('feed_forward.w3', 'mlp.up_proj')
llama_states[new_k] = v llama_states[new_k] = v
elif "attention_norm" in k: elif 'attention_norm' in k:
new_k = k.replace("attention_norm", "input_layernorm") new_k = k.replace('attention_norm', 'input_layernorm')
llama_states[new_k] = v llama_states[new_k] = v
elif "ffn_norm" in k: elif 'ffn_norm' in k:
new_k = k.replace("ffn_norm", "post_attention_layernorm") new_k = k.replace('ffn_norm', 'post_attention_layernorm')
llama_states[new_k] = v llama_states[new_k] = v
elif "tok_embeddings" in k: elif 'tok_embeddings' in k:
llama_states["model.embed_tokens.weight"] = v llama_states['model.embed_tokens.weight'] = v
elif "output" in k: elif 'output' in k:
llama_states["lm_head.weight"] = v llama_states['lm_head.weight'] = v
else: else:
llama_states[k] = v llama_states[k] = v
if index_dict is not None: if index_dict is not None:
for k in llama_states: for k in llama_states:
index_dict["weight_map"][k] = filename index_dict['weight_map'][k] = filename
print(f"Saving to {os.path.join(tgt, filename)}...", flush=True) print(f"Saving to {os.path.join(tgt, filename)}...", flush=True)
torch.save(llama_states, os.path.join(tgt, filename)) torch.save(llama_states, os.path.join(tgt, filename))
del states del states
print("Saving config and tokenizer...") print('Saving config and tokenizer...')
# index.json # index.json
if index_dict is not None: if index_dict is not None:
with open(os.path.join(tgt, "pytorch_model.bin.index.json"), "w") as fp: with open(os.path.join(tgt, 'pytorch_model.bin.index.json'),
'w') as fp:
json.dump(index_dict, fp, indent=2) json.dump(index_dict, fp, indent=2)
# tokenizer # tokenizer
tokenizer = LlamaTokenizer.from_pretrained(src) tokenizer = LlamaTokenizer.from_pretrained(src)
tokenizer.init_kwargs.pop("auto_map", None) tokenizer.init_kwargs.pop('auto_map', None)
tokenizer.save_pretrained(tgt) tokenizer.save_pretrained(tgt)
# config # config
save_conifg(config, tgt) save_conifg(config, tgt)
print("Done!") print('Done!')
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--src", type=str, help="Input folder") parser.add_argument('--src', type=str, help='Input folder')
parser.add_argument("--tgt", type=str, help="Output folder") parser.add_argument('--tgt', type=str, help='Output folder')
args = parser.parse_args() args = parser.parse_args()
return args return args
if __name__ == "__main__": if __name__ == '__main__':
args = parse_args() args = parse_args()
convert(args.src, args.tgt) convert(args.src, args.tgt)