mirror of https://github.com/InternLM/InternLM
[CI]: fix and pass pre-commit hook (#666)
parent
1cb9870cb3
commit
78bcb07f0e
|
@ -26,8 +26,8 @@ jobs:
|
|||
pip install transformers
|
||||
pip install sentencepiece
|
||||
srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 pytest -s -v --color=yes ./tests/test_hf_model.py
|
||||
conda deactivate
|
||||
|
||||
conda deactivate
|
||||
|
||||
clear_env:
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [HF_model]
|
||||
|
|
|
@ -24,15 +24,3 @@ jobs:
|
|||
run: |
|
||||
pip install isort==5.12.0
|
||||
isort --check --profile=black .
|
||||
|
||||
- name: lint-black
|
||||
run: |
|
||||
pip install black==22.8.0
|
||||
BLACK_EXCLUDE_SETTINGS='\.venv/|\.local/|\.cache/|\.git/'
|
||||
black --line-length=120 --check --exclude $BLACK_EXCLUDE_SETTINGS ./chat/web_demo.py
|
||||
|
||||
- name: lint-pylint
|
||||
run: |
|
||||
pip install pylint==v2.17.2
|
||||
PYLINT_DISABLE_LIST="C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203"
|
||||
pylint --rcfile .pylintrc --disable=$PYLINT_DISABLE_LIST ./chat/web_demo.py
|
||||
|
|
|
@ -1,53 +1,44 @@
|
|||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: '22.8.0'
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 5.0.4
|
||||
hooks:
|
||||
- id: black
|
||||
args:
|
||||
- --line-length=120
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: '5.12.0'
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.11.5
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort
|
||||
files: "\\.(py)$"
|
||||
args:
|
||||
- --profile=black
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: '3.8.4'
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
rev: v0.32.0
|
||||
hooks:
|
||||
- id: flake8
|
||||
args:
|
||||
- --ignore=F403,F405,W504,W503,E203
|
||||
- --max-line-length=120
|
||||
- repo: https://github.com/pre-commit/pygrep-hooks
|
||||
rev: v1.9.0
|
||||
- id: yapf
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.2.1
|
||||
hooks:
|
||||
- id: python-check-blanket-noqa
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
- id: codespell
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=100',--enforce-all]
|
||||
- id: check-json
|
||||
- id: check-docstring-first
|
||||
- id: check-yaml
|
||||
- id: debug-statements
|
||||
- id: mixed-line-ending
|
||||
- repo: https://github.com/PyCQA/pylint/
|
||||
rev: v2.17.2
|
||||
- id: trailing-whitespace
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: requirements-txt-fixer
|
||||
- id: double-quote-string-fixer
|
||||
- id: check-merge-conflict
|
||||
- id: fix-encoding-pragma
|
||||
args: ["--remove"]
|
||||
- id: mixed-line-ending
|
||||
args: ["--fix=lf"]
|
||||
- repo: https://github.com/executablebooks/mdformat
|
||||
rev: 0.7.9
|
||||
hooks:
|
||||
- id: pylint
|
||||
name: pylint
|
||||
entry: pylint
|
||||
language: system
|
||||
types: [python]
|
||||
args:
|
||||
[
|
||||
'--rcfile=.pylintrc',
|
||||
'--disable=C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203'
|
||||
]
|
||||
- id: mdformat
|
||||
args: ["--number", "--table-width", "200"]
|
||||
additional_dependencies:
|
||||
- mdformat-openmmlab
|
||||
- mdformat_frontmatter
|
||||
- linkify-it-py
|
||||
- repo: https://github.com/myint/docformatter
|
||||
rev: v1.3.1
|
||||
hooks:
|
||||
- id: docformatter
|
||||
args: ["--in-place", "--wrap-descriptions", "79"]
|
||||
|
|
86
README.md
86
README.md
|
@ -16,7 +16,9 @@
|
|||
|
||||
[![license](./assets/license.svg)](./LICENSE)
|
||||
[![evaluation](./assets/compass_support.svg)](https://github.com/internLM/OpenCompass/)
|
||||
|
||||
<!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) -->
|
||||
|
||||
[📘Commercial Application](#license) |
|
||||
[🤗HuggingFace](https://huggingface.co/internlm) |
|
||||
[🆕Update News](#news) |
|
||||
|
@ -45,26 +47,26 @@ InternLM2 series are released with the following features:
|
|||
|
||||
## News
|
||||
|
||||
[2024.01.23] We release InternLM2-Math-7B and InternLM2-Math-20B with pretraining and SFT checkpoints. They surpass ChatGPT with small sizes. See [InternLM-Math](https://github.com/InternLM/internlm-math) for details and download.
|
||||
\[2024.01.23\] We release InternLM2-Math-7B and InternLM2-Math-20B with pretraining and SFT checkpoints. They surpass ChatGPT with small sizes. See [InternLM-Math](https://github.com/InternLM/internlm-math) for details and download.
|
||||
|
||||
[2024.01.17] We release InternLM2-7B and InternLM2-20B and their corresponding chat models with stronger capabilities in all dimensions. See [model zoo below](#model-zoo) for download or [model cards](./model_cards/) for more details.
|
||||
\[2024.01.17\] We release InternLM2-7B and InternLM2-20B and their corresponding chat models with stronger capabilities in all dimensions. See [model zoo below](#model-zoo) for download or [model cards](./model_cards/) for more details.
|
||||
|
||||
[2023.12.13] InternLM-7B-Chat and InternLM-20B-Chat checkpoints are updated. With an improved finetuning strategy, the new chat models can generate higher quality responses with greater stylistic diversity.
|
||||
\[2023.12.13\] InternLM-7B-Chat and InternLM-20B-Chat checkpoints are updated. With an improved finetuning strategy, the new chat models can generate higher quality responses with greater stylistic diversity.
|
||||
|
||||
[2023.09.20] InternLM-20B is released with base and chat versions.
|
||||
\[2023.09.20\] InternLM-20B is released with base and chat versions.
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
|
||||
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
|
||||
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
|
||||
|
||||
**Notes:**
|
||||
|
||||
|
@ -85,22 +87,22 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
|
|||
|
||||
### Objective Evaluation
|
||||
|
||||
| Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
|
||||
|-----------------------|-------------------|--------------------------|--------------|-------------------|-------------|---------------------|--------------------------------|---------------|---------------------|
|
||||
| MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 |
|
||||
| CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 |
|
||||
| AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 |
|
||||
| C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 |
|
||||
| TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 |
|
||||
| NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 |
|
||||
| C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 |
|
||||
| CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 |
|
||||
| WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 |
|
||||
| BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 |
|
||||
| GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 |
|
||||
| Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 |
|
||||
| HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 |
|
||||
| MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 |
|
||||
| Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
|
||||
| ---------------- | ----------------- | ------------------------ | ------------ | ----------------- | ----------- | ------------------ | -------------------------- | ------------- | ------------------ |
|
||||
| MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 |
|
||||
| CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 |
|
||||
| AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 |
|
||||
| C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 |
|
||||
| TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 |
|
||||
| NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 |
|
||||
| C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 |
|
||||
| CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 |
|
||||
| WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 |
|
||||
| BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 |
|
||||
| GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 |
|
||||
| Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 |
|
||||
| HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 |
|
||||
| MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 |
|
||||
|
||||
- Performance of MBPP is reported with MBPP(Sanitized)
|
||||
|
||||
|
@ -108,16 +110,16 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
|
|||
|
||||
- We have evaluated our model on [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) and InternLM2-Chat-20B surpass Claude 2, GPT-4(0613) and Gemini Pro.
|
||||
|
||||
| Model Name | Win Rate | Length |
|
||||
| ----------------------- | -------- | ------ |
|
||||
| GPT-4 Turbo | 50.00% | 2049 |
|
||||
| GPT-4 | 23.58% | 1365 |
|
||||
| GPT-4 0314 | 22.07% | 1371 |
|
||||
| Mistral Medium | 21.86% | 1500 |
|
||||
| XwinLM 70b V0.1 | 21.81% | 1775 |
|
||||
| InternLM2 Chat 20B | 21.75% | 2373 |
|
||||
| Model Name | Win Rate | Length |
|
||||
| ------------------ | -------- | ------ |
|
||||
| GPT-4 Turbo | 50.00% | 2049 |
|
||||
| GPT-4 | 23.58% | 1365 |
|
||||
| GPT-4 0314 | 22.07% | 1371 |
|
||||
| Mistral Medium | 21.86% | 1500 |
|
||||
| XwinLM 70b V0.1 | 21.81% | 1775 |
|
||||
| InternLM2 Chat 20B | 21.75% | 2373 |
|
||||
| Mixtral 8x7B v0.1 | 18.26% | 1465 |
|
||||
| Claude 2 | 17.19% | 1069 |
|
||||
| Claude 2 | 17.19% | 1069 |
|
||||
| Gemini Pro | 16.85% | 1315 |
|
||||
| GPT-4 0613 | 15.76% | 1140 |
|
||||
| Claude 2.1 | 15.73% | 1096 |
|
||||
|
@ -129,9 +131,11 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
|
|||
We briefly show the usages with [Transformers](#import-from-transformers), [ModelScope](#import-from-modelscope), and [Web demos](#dialogue).
|
||||
The chat models adopt [chatml format](./chat/chat_format.md) to support both chat and agent applications.
|
||||
To ensure a better usage effect, please make sure that the installed transformers library version meets the following requirements before performing inference with [Transformers](#import-from-transformers) or [ModelScope](#import-from-modelscope):
|
||||
|
||||
```
|
||||
transformers >= 4.34
|
||||
```
|
||||
|
||||
### Import from Transformers
|
||||
|
||||
To load the InternLM2-7B-Chat model using Transformers, use the following code:
|
||||
|
@ -143,7 +147,7 @@ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_re
|
|||
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
|
||||
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
|
||||
# (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
|
||||
# InternLM 7B in 4bit will cost nearly 8GB GPU memory.
|
||||
# InternLM 7B in 4bit will cost nearly 8GB GPU memory.
|
||||
# pip install -U bitsandbytes
|
||||
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||
|
@ -167,7 +171,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto", trust_re
|
|||
# Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
|
||||
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
|
||||
# (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
|
||||
# InternLM 7B in 4bit will cost nearly 8GB GPU memory.
|
||||
# InternLM 7B in 4bit will cost nearly 8GB GPU memory.
|
||||
# pip install -U bitsandbytes
|
||||
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
[![license](./assets//license.svg)](https://github.com/open-mmlab/mmdetection/blob/main/LICENSE)
|
||||
[![evaluation](./assets//compass_support.svg)](https://github.com/internLM/OpenCompass/)
|
||||
|
||||
<!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) -->
|
||||
|
||||
[📘商业授权](#开源许可证) |
|
||||
|
@ -43,26 +44,26 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
|
|||
|
||||
## 更新
|
||||
|
||||
[2024.01.23] 我们发布了 InternLM2-Math-7B 和 InternLM2-Math-20B 以及相关的对话模型。InternLM-Math以较小的尺寸超过了ChatGPT的表现。可以点击[InternLM-Math](https://github.com/InternLM/internlm-math)进行下载,并了解详情。
|
||||
\[2024.01.23\] 我们发布了 InternLM2-Math-7B 和 InternLM2-Math-20B 以及相关的对话模型。InternLM-Math以较小的尺寸超过了ChatGPT的表现。可以点击[InternLM-Math](https://github.com/InternLM/internlm-math)进行下载,并了解详情。
|
||||
|
||||
[2024.01.17] 我们发布了 InternLM2-7B 和 InternLM2-20B 以及相关的对话模型,InternLM2 在数理、代码、对话、创作等各方面能力都获得了长足进步,综合性能达到开源模型的领先水平。可以点击[下面的模型库](#model-zoo)进行下载或者[查看模型文档](./model_cards/)来了解更多细节.
|
||||
\[2024.01.17\] 我们发布了 InternLM2-7B 和 InternLM2-20B 以及相关的对话模型,InternLM2 在数理、代码、对话、创作等各方面能力都获得了长足进步,综合性能达到开源模型的领先水平。可以点击[下面的模型库](#model-zoo)进行下载或者[查看模型文档](./model_cards/)来了解更多细节.
|
||||
|
||||
[2023.12.13] 我们更新了 InternLM-7B-Chat 和 InternLM-20B-Chat 模型权重。通过改进微调数据和训练策略,新版对话模型生成的回复质量更高、语言风格更加多元。
|
||||
\[2023.12.13\] 我们更新了 InternLM-7B-Chat 和 InternLM-20B-Chat 模型权重。通过改进微调数据和训练策略,新版对话模型生成的回复质量更高、语言风格更加多元。
|
||||
|
||||
[2023.09.20] InternLM-20B 已发布,包括基础版和对话版。
|
||||
\[2023.09.20\] InternLM-20B 已发布,包括基础版和对话版。
|
||||
|
||||
## Model Zoo
|
||||
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
|
||||
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
|
||||
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
|
||||
|
||||
**模型说明:**
|
||||
|
||||
|
@ -83,22 +84,22 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
|
|||
|
||||
### 客观评测
|
||||
|
||||
| Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
|
||||
|-----------------------|-------------------|--------------------------|--------------|-------------------|-------------|---------------------|--------------------------------|---------------|---------------------|
|
||||
| MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 |
|
||||
| CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 |
|
||||
| AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 |
|
||||
| C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 |
|
||||
| TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 |
|
||||
| NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 |
|
||||
| C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 |
|
||||
| CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 |
|
||||
| WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 |
|
||||
| BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 |
|
||||
| GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 |
|
||||
| Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 |
|
||||
| HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 |
|
||||
| MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 |
|
||||
| Dataset | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
|
||||
| ---------------- | ----------------- | ------------------------ | ------------ | ----------------- | ----------- | ------------------ | -------------------------- | ------------- | ------------------ |
|
||||
| MMLU | 50.1 | 59.2 | 57.1 | 63.7 | 58.0 | 56.6 | 70.3 | 66.7 | 66.5 |
|
||||
| CMMLU | 53.4 | 42.0 | 57.9 | 63.0 | 57.8 | 54.8 | 50.6 | 68.1 | 65.1 |
|
||||
| AGIEval | 35.3 | 34.5 | 39.7 | 47.2 | 44.2 | 40.0 | 41.7 | 46.5 | 50.3 |
|
||||
| C-Eval | 53.9 | 42.4 | 59.8 | 60.8 | 59.1 | 56.3 | 54.0 | 71.5 | 63.0 |
|
||||
| TrivialQA | 37.6 | 35.0 | 46.1 | 50.8 | 38.1 | 40.3 | 57.7 | 54.5 | 53.9 |
|
||||
| NaturalQuestions | 12.8 | 8.1 | 18.6 | 24.1 | 14.0 | 12.7 | 22.5 | 22.9 | 25.9 |
|
||||
| C3 | 78.5 | 66.9 | 84.4 | 91.5 | 79.3 | 84.4 | 82.1 | 91.5 | 93.5 |
|
||||
| CMRC | 8.1 | 5.6 | 14.6 | 63.8 | 43.2 | 27.8 | 5.3 | 13.0 | 50.4 |
|
||||
| WinoGrande | 49.9 | 50.8 | 54.2 | 65.8 | 61.7 | 50.9 | 60.9 | 55.7 | 74.8 |
|
||||
| BBH | 35.9 | 46.5 | 45.5 | 61.2 | 56.0 | 42.5 | 57.3 | 55.8 | 68.3 |
|
||||
| GSM-8K | 32.4 | 48.3 | 44.1 | 70.7 | 53.8 | 56.0 | 71.7 | 57.7 | 79.6 |
|
||||
| Math | 5.7 | 8.6 | 12.0 | 23.0 | 20.4 | 4.3 | 22.5 | 27.6 | 31.9 |
|
||||
| HumanEval | 17.7 | 35.4 | 36.0 | 59.8 | 52.4 | 19.5 | 37.8 | 40.9 | 67.1 |
|
||||
| MBPP | 37.7 | 25.7 | 33.9 | 51.4 | 55.6 | 40.9 | 40.9 | 30.0 | 65.8 |
|
||||
|
||||
- MBPP性能使用的是MBPP(Sanitized)版本数据集
|
||||
|
||||
|
@ -106,16 +107,16 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
|
|||
|
||||
- 我们评测了InternLM2-Chat在[AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) 上的性能,结果表明InternLM2-Chat在AlpacaEval上已经超过了 Claude 2, GPT-4(0613) 和 Gemini Pro.
|
||||
|
||||
| Model Name | Win Rate | Length |
|
||||
| ----------------------- | -------- | ------ |
|
||||
| GPT-4 Turbo | 50.00% | 2049 |
|
||||
| GPT-4 | 23.58% | 1365 |
|
||||
| GPT-4 0314 | 22.07% | 1371 |
|
||||
| Mistral Medium | 21.86% | 1500 |
|
||||
| XwinLM 70b V0.1 | 21.81% | 1775 |
|
||||
| InternLM2 Chat 20B | 21.75% | 2373 |
|
||||
| Model Name | Win Rate | Length |
|
||||
| ------------------ | -------- | ------ |
|
||||
| GPT-4 Turbo | 50.00% | 2049 |
|
||||
| GPT-4 | 23.58% | 1365 |
|
||||
| GPT-4 0314 | 22.07% | 1371 |
|
||||
| Mistral Medium | 21.86% | 1500 |
|
||||
| XwinLM 70b V0.1 | 21.81% | 1775 |
|
||||
| InternLM2 Chat 20B | 21.75% | 2373 |
|
||||
| Mixtral 8x7B v0.1 | 18.26% | 1465 |
|
||||
| Claude 2 | 17.19% | 1069 |
|
||||
| Claude 2 | 17.19% | 1069 |
|
||||
| Gemini Pro | 16.85% | 1315 |
|
||||
| GPT-4 0613 | 15.76% | 1140 |
|
||||
| Claude 2.1 | 15.73% | 1096 |
|
||||
|
@ -127,9 +128,11 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性:
|
|||
接下来我们展示使用 [Transformers](#import-from-transformers),[ModelScope](#import-from-modelscope) 和 [Web demo](#dialogue) 进行推理。
|
||||
对话模型采用了 [chatml 格式](./chat/chat_format.md) 来支持通用对话和智能体应用。
|
||||
为了保障更好的使用效果,在用 [Transformers](#import-from-transformers) 或 [ModelScope](#import-from-modelscope) 进行推理前,请确保安装的 transformers 库版本满足以下要求:
|
||||
|
||||
```
|
||||
transformers >= 4.34
|
||||
```
|
||||
|
||||
### 通过 Transformers 加载
|
||||
|
||||
通过以下的代码从 Transformers 加载 InternLM2-7B-Chat 模型 (可修改模型名称替换不同的模型)
|
||||
|
@ -141,7 +144,7 @@ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_re
|
|||
# 设置`torch_dtype=torch.float16`来将模型精度指定为torch.float16,否则可能会因为您的硬件原因造成显存不足的问题。
|
||||
model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto",trust_remote_code=True, torch_dtype=torch.float16)
|
||||
# (可选) 如果在低资源设备上,可以通过bitsandbytes加载4-bit或8-bit量化的模型,进一步节省GPU显存.
|
||||
# 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存.
|
||||
# 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存.
|
||||
# pip install -U bitsandbytes
|
||||
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||
|
@ -164,7 +167,7 @@ model_dir = snapshot_download('Shanghai_AI_Laboratory/internlm2-chat-7b')
|
|||
tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto", trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
|
||||
# (可选) 如果在低资源设备上,可以通过bitsandbytes加载4-bit或8-bit量化的模型,进一步节省GPU显存.
|
||||
# 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存.
|
||||
# 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存.
|
||||
# pip install -U bitsandbytes
|
||||
# 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
|
||||
# 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
|
||||
|
|
|
@ -4,18 +4,18 @@ English | [简体中文](README_zh-CN.md)
|
|||
|
||||
## Introduction
|
||||
|
||||
InternLM-Chat-7B v1.1 has been released as the first open-source model with code interpreter capabilities, supportting external tools such as Python code interpreter and search engine.
|
||||
InternLM-Chat-7B v1.1 has been released as the first open-source model with code interpreter capabilities, supporting external tools such as Python code interpreter and search engine.
|
||||
|
||||
InternLM2-Chat, open sourced on January 17, 2024, further enhances its capabilities in code interpreter and general tool utilization. With improved and more generalized instruction understanding, tool selection, and reflection abilities, InternLM2-Chat can more reliably support complex agents and multi-step tool calling for more intricate tasks. InternLM2-Chat exhibits decent computational and reasoning abilities even without external tools, surpassing ChatGPT in mathematical performance. When combined with a code interpreter, InternLM2-Chat-20B obtains comparable results to GPT-4 on GSM8K and MATH. Leveraging strong foundational capabilities in mathematics and tools, InternLM2-Chat provides practical data analysis capabilities.
|
||||
|
||||
The results of InternLM2-Chat-20B on math code interpreter is as below:
|
||||
|
||||
| | GSM8K | MATH |
|
||||
| :---: | :---: | :--: |
|
||||
| InternLM2-Chat-20B | 79.6 | 32.5 |
|
||||
| InternLM2-Chat-20B with Code Interpreter | 84.5 | 51.2 |
|
||||
| ChatGPT (GPT-3.5) | 78.2 | 28.0 |
|
||||
| GPT-4 | 91.4 | 45.8 |
|
||||
| | GSM8K | MATH |
|
||||
| :--------------------------------------: | :---: | :--: |
|
||||
| InternLM2-Chat-20B | 79.6 | 32.5 |
|
||||
| InternLM2-Chat-20B with Code Interpreter | 84.5 | 51.2 |
|
||||
| ChatGPT (GPT-3.5) | 78.2 | 28.0 |
|
||||
| GPT-4 | 91.4 | 45.8 |
|
||||
|
||||
## Usages
|
||||
|
||||
|
|
|
@ -10,12 +10,12 @@ InternLM2-Chat 进一步提高了它在代码解释和通用工具调用方面
|
|||
|
||||
以下是 InternLM2-Chat-20B 在数学代码解释器上的结果。
|
||||
|
||||
| | GSM8K | MATH |
|
||||
| :---: | :---: | :--: |
|
||||
| InternLM2-Chat-20B 单纯依靠内在能力 | 79.6 | 32.5 |
|
||||
| InternLM2-Chat-20B 配合代码解释器 | 84.5 | 51.2 |
|
||||
| ChatGPT (GPT-3.5) | 78.2 | 28.0 |
|
||||
| GPT-4 | 91.4 | 45.8 |
|
||||
| | GSM8K | MATH |
|
||||
| :---------------------------------: | :---: | :--: |
|
||||
| InternLM2-Chat-20B 单纯依靠内在能力 | 79.6 | 32.5 |
|
||||
| InternLM2-Chat-20B 配合代码解释器 | 84.5 | 51.2 |
|
||||
| ChatGPT (GPT-3.5) | 78.2 | 28.0 |
|
||||
| GPT-4 | 91.4 | 45.8 |
|
||||
|
||||
## 体验
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ streamlit run examples/react_web_demo.py
|
|||
|
||||
## 用 InternLM-Chat 构建一个 ReAct 智能体
|
||||
|
||||
**注意:**如果你想要启动一个 HuggingFace 的模型,请先运行 pip install -e .[all]。
|
||||
\*\*注意:\*\*如果你想要启动一个 HuggingFace 的模型,请先运行 pip install -e .\[all\]。
|
||||
|
||||
```python
|
||||
# Import necessary modules and classes from the "lagent" library.
|
||||
|
|
|
@ -21,20 +21,21 @@ python pal_inference.py \
|
|||
```
|
||||
|
||||
Parameter explanation:
|
||||
| Parameter | Description |
|
||||
| :--------: | :--------------------: |
|
||||
| \<model\> | Path to the model used for inference |
|
||||
| \<out_dir\> | Generated code will be saved in the specified output folder |
|
||||
| --dataset <dataset> | Name of the dataset used for code generation (defaults to gsm8k) |
|
||||
| --max_length <length> | Maximum input token length for the model (defaults to 2048) |
|
||||
| --top_p <threshold> | Probability threshold for the sum of candidate tokens (defaults to 0.8) |
|
||||
| --eoh <end token> | User input end identifier (defaults to "") |
|
||||
| --eoa <end token> | Model input end identifier (defaults to "") |
|
||||
| --eos <end token> | System input end identifier (defaults to "") |
|
||||
| --temperature, -t <temp> | Sampling temperature during generation (defaults to 1.0) |
|
||||
| --time_out <time> | Maximum time (in seconds) for executing generated code (defaults to 100) |
|
||||
| --verbose, -v | Print code error messages (optional) |
|
||||
| --append, -a | Append output to historical results (optional) |
|
||||
|
||||
| Parameter | Description |
|
||||
| :-----------------------: | :----------------------------------------------------------------------: |
|
||||
| \<model> | Path to the model used for inference |
|
||||
| \<out_dir> | Generated code will be saved in the specified output folder |
|
||||
| --dataset <dataset> | Name of the dataset used for code generation (defaults to gsm8k) |
|
||||
| --max_length <length> | Maximum input token length for the model (defaults to 2048) |
|
||||
| --top_p <threshold> | Probability threshold for the sum of candidate tokens (defaults to 0.8) |
|
||||
| --eoh <end token> | User input end identifier (defaults to "") |
|
||||
| --eoa <end token> | Model input end identifier (defaults to "") |
|
||||
| --eos <end token> | System input end identifier (defaults to "") |
|
||||
| --temperature, -t <temp> | Sampling temperature during generation (defaults to 1.0) |
|
||||
| --time_out <time> | Maximum time (in seconds) for executing generated code (defaults to 100) |
|
||||
| --verbose, -v | Print code error messages (optional) |
|
||||
| --append, -a | Append output to historical results (optional) |
|
||||
|
||||
A simple usage example is as follows:
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# isort: skip_file
|
||||
import argparse
|
||||
import copy
|
||||
import json
|
||||
|
@ -31,68 +32,87 @@ import tqdm
|
|||
from datasets import load_dataset
|
||||
from torch import nn
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
|
||||
from transformers.generation.utils import (LogitsProcessorList,
|
||||
StoppingCriteriaList)
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="PAL Inference")
|
||||
parser.add_argument("model", type=str, help="Path to the pre-trained LLM used for inference.")
|
||||
parser = argparse.ArgumentParser(description='PAL Inference')
|
||||
parser.add_argument('model',
|
||||
type=str,
|
||||
help='Path to the pre-trained LLM used for inference.')
|
||||
parser.add_argument(
|
||||
"out_dir", type=str, help="Name of the output folder where generated code snippets will be saved."
|
||||
'out_dir',
|
||||
type=str,
|
||||
help=
|
||||
'Name of the output folder where generated code snippets will be saved.'
|
||||
)
|
||||
parser.add_argument("--dataset", default="gsm8k", type=str, help="Name of the dataset used for code generation.")
|
||||
parser.add_argument('--dataset',
|
||||
default='gsm8k',
|
||||
type=str,
|
||||
help='Name of the dataset used for code generation.')
|
||||
parser.add_argument(
|
||||
"--max_length",
|
||||
'--max_length',
|
||||
default=2048,
|
||||
type=int,
|
||||
help="Maximum input token length for the natural language description.",
|
||||
help='Maximum input token length for the natural language description.',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--top_p",
|
||||
'--top_p',
|
||||
default=0.8,
|
||||
type=float,
|
||||
help="Probability threshold to choose sample tokens during generation.",
|
||||
help='Probability threshold to choose sample tokens during generation.',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eoh",
|
||||
default="",
|
||||
'--eoh',
|
||||
default='',
|
||||
type=str,
|
||||
help="End of human (user) token.",
|
||||
help='End of human (user) token.',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eoa",
|
||||
default="",
|
||||
'--eoa',
|
||||
default='',
|
||||
type=str,
|
||||
help="End of assistant (bot) token.",
|
||||
help='End of assistant (bot) token.',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eos",
|
||||
default="",
|
||||
'--eos',
|
||||
default='',
|
||||
type=str,
|
||||
help="End of system token.",
|
||||
help='End of system token.',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--temperature", "-t", default=1.0, type=float, help="Temperature of token sampling during generation."
|
||||
'--temperature',
|
||||
'-t',
|
||||
default=1.0,
|
||||
type=float,
|
||||
help='Temperature of token sampling during generation.')
|
||||
parser.add_argument(
|
||||
'--time_out',
|
||||
default=100,
|
||||
type=float,
|
||||
help='Maximum time allowed for executing generated code.')
|
||||
parser.add_argument(
|
||||
'--verbose',
|
||||
'-v',
|
||||
action='store_true',
|
||||
help=
|
||||
'Print code error information when executing generated code (optional).',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--time_out", default=100, type=float, help="Maximum time allowed for executing generated code."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="store_true",
|
||||
help="Print code error information when executing generated code (optional).",
|
||||
)
|
||||
parser.add_argument("--append", "-a", action="store_true", help="Append output to the history results (optional).")
|
||||
'--append',
|
||||
'-a',
|
||||
action='store_true',
|
||||
help='Append output to the history results (optional).')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
class Timeout:
|
||||
"""Timer to execute code
|
||||
"""Timer to execute code.
|
||||
|
||||
Adapted from https://github.com/reasoning-machines/pal
|
||||
|
||||
|
@ -101,7 +121,7 @@ class Timeout:
|
|||
error_message (str)
|
||||
"""
|
||||
|
||||
def __init__(self, seconds=1, error_message="Timeout"):
|
||||
def __init__(self, seconds=1, error_message='Timeout'):
|
||||
self.seconds = seconds
|
||||
self.error_message = error_message
|
||||
|
||||
|
@ -133,15 +153,16 @@ def generate_interactive(
|
|||
generation_config: Optional[GenerationConfig] = None,
|
||||
logits_processor: Optional[LogitsProcessorList] = None,
|
||||
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
||||
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
|
||||
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
|
||||
List[int]]] = None,
|
||||
additional_eos_token_id: Optional[int] = None,
|
||||
**kwargs,
|
||||
):
|
||||
inputs = tokenizer([prompt], padding=True, return_tensors="pt")
|
||||
input_length = len(inputs["input_ids"][0])
|
||||
inputs = tokenizer([prompt], padding=True, return_tensors='pt')
|
||||
input_length = len(inputs['input_ids'][0])
|
||||
for k, v in inputs.items():
|
||||
inputs[k] = v.cuda()
|
||||
input_ids = inputs["input_ids"]
|
||||
input_ids = inputs['input_ids']
|
||||
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841 # pylint: disable=W0612
|
||||
if generation_config is None:
|
||||
generation_config = model.generation_config
|
||||
|
@ -155,12 +176,13 @@ def generate_interactive(
|
|||
eos_token_id = [eos_token_id]
|
||||
if additional_eos_token_id is not None:
|
||||
eos_token_id.append(additional_eos_token_id)
|
||||
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
|
||||
has_default_max_length = kwargs.get(
|
||||
'max_length') is None and generation_config.max_length is not None
|
||||
if has_default_max_length and generation_config.max_new_tokens is None:
|
||||
warnings.warn(
|
||||
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
|
||||
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
|
||||
" recommend using `max_new_tokens` to control the maximum length of the generation.",
|
||||
'This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we'
|
||||
' recommend using `max_new_tokens` to control the maximum length of the generation.',
|
||||
UserWarning,
|
||||
)
|
||||
elif generation_config.max_new_tokens is not None:
|
||||
|
@ -169,22 +191,23 @@ def generate_interactive(
|
|||
logger.warn( # pylint: disable=W4902
|
||||
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||
"Please refer to the documentation for more information. "
|
||||
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
|
||||
'Please refer to the documentation for more information. '
|
||||
'(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)',
|
||||
UserWarning,
|
||||
)
|
||||
|
||||
if input_ids_seq_length >= generation_config.max_length:
|
||||
input_ids_string = "input_ids"
|
||||
input_ids_string = 'input_ids'
|
||||
logger.warning(
|
||||
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
|
||||
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
||||
" increasing `max_new_tokens`."
|
||||
)
|
||||
' increasing `max_new_tokens`.')
|
||||
|
||||
# 2. Set generation parameters if not already defined
|
||||
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
|
||||
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
|
||||
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList(
|
||||
)
|
||||
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList(
|
||||
)
|
||||
|
||||
logits_processor = model._get_logits_processor(
|
||||
generation_config=generation_config,
|
||||
|
@ -195,14 +218,15 @@ def generate_interactive(
|
|||
)
|
||||
|
||||
stopping_criteria = model._get_stopping_criteria(
|
||||
generation_config=generation_config, stopping_criteria=stopping_criteria
|
||||
)
|
||||
generation_config=generation_config,
|
||||
stopping_criteria=stopping_criteria)
|
||||
logits_warper = model._get_logits_warper(generation_config)
|
||||
|
||||
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
|
||||
scores = None
|
||||
while True:
|
||||
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
|
||||
model_inputs = model.prepare_inputs_for_generation(
|
||||
input_ids, **model_kwargs)
|
||||
# forward pass to get next token
|
||||
outputs = model(
|
||||
**model_inputs,
|
||||
|
@ -226,8 +250,10 @@ def generate_interactive(
|
|||
|
||||
# update generated ids, model inputs, and length for next step
|
||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||
model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False)
|
||||
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
|
||||
model_kwargs = model._update_model_kwargs_for_generation(
|
||||
outputs, model_kwargs, is_encoder_decoder=False)
|
||||
unfinished_sequences = unfinished_sequences.mul(
|
||||
(min(next_tokens != i for i in eos_token_id)).long())
|
||||
|
||||
output_token_ids = input_ids[0].cpu().tolist()
|
||||
output_token_ids = output_token_ids[input_length:]
|
||||
|
@ -238,12 +264,13 @@ def generate_interactive(
|
|||
|
||||
yield response
|
||||
# stop when each sentence is finished, or if we exceed the maximum length
|
||||
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
|
||||
if unfinished_sequences.max() == 0 or stopping_criteria(
|
||||
input_ids, scores):
|
||||
break
|
||||
|
||||
|
||||
class GenericRuntime:
|
||||
"""Adapted from https://github.com/reasoning-machines/pal"""
|
||||
"""Adapted from https://github.com/reasoning-machines/pal."""
|
||||
|
||||
GLOBAL_DICT: dict = {}
|
||||
LOCAL_DICT = None
|
||||
|
@ -251,7 +278,8 @@ class GenericRuntime:
|
|||
|
||||
def __init__(self):
|
||||
self._global_vars = copy.copy(self.GLOBAL_DICT)
|
||||
self._local_vars = copy.copy(self.LOCAL_DICT) if self.LOCAL_DICT else None
|
||||
self._local_vars = copy.copy(
|
||||
self.LOCAL_DICT) if self.LOCAL_DICT else None
|
||||
|
||||
for c in self.HEADERS:
|
||||
self.exec_code(c)
|
||||
|
@ -268,7 +296,7 @@ class GenericRuntime:
|
|||
|
||||
@property
|
||||
def answer(self):
|
||||
return self._global_vars["answer"]
|
||||
return self._global_vars['answer']
|
||||
|
||||
|
||||
class PALInterface:
|
||||
|
@ -292,7 +320,7 @@ class PALInterface:
|
|||
tokenizer: AutoTokenizer,
|
||||
generation_config: GenerationConfig,
|
||||
additional_eos_token_id: int = 103028,
|
||||
get_answer_expr: str = "solution()",
|
||||
get_answer_expr: str = 'solution()',
|
||||
verbose: bool = False,
|
||||
):
|
||||
self.runtime = GenericRuntime()
|
||||
|
@ -308,11 +336,11 @@ class PALInterface:
|
|||
# The api will generate response word by word
|
||||
# we only need the last generation as the final results
|
||||
for cur_gen in generate_interactive(
|
||||
model=self.model,
|
||||
tokenizer=self.tokenizer,
|
||||
prompt=prompt,
|
||||
additional_eos_token_id=self.additional_eos_token_id,
|
||||
**asdict(self.generation_config),
|
||||
model=self.model,
|
||||
tokenizer=self.tokenizer,
|
||||
prompt=prompt,
|
||||
additional_eos_token_id=self.additional_eos_token_id,
|
||||
**asdict(self.generation_config),
|
||||
):
|
||||
continue
|
||||
# Get final response
|
||||
|
@ -322,11 +350,11 @@ class PALInterface:
|
|||
return code
|
||||
|
||||
def process_generation_to_code(self, gens: str):
|
||||
if "```python" in gens:
|
||||
gens = gens.split("```python")[1].split("```")[0]
|
||||
elif "```" in gens:
|
||||
gens = gens.split("```")[1].split("```")[0]
|
||||
code = gens.split("\n")
|
||||
if '```python' in gens:
|
||||
gens = gens.split('```python')[1].split('```')[0]
|
||||
elif '```' in gens:
|
||||
gens = gens.split('```')[1].split('```')[0]
|
||||
code = gens.split('\n')
|
||||
return code
|
||||
|
||||
def run(self, prompt, time_out: float = 100):
|
||||
|
@ -340,7 +368,7 @@ class PALInterface:
|
|||
return exec_result
|
||||
|
||||
def execute(self, code: List[str]):
|
||||
self.runtime.exec_code("\n".join(code))
|
||||
self.runtime.exec_code('\n'.join(code))
|
||||
return self.runtime.eval_code(self.answer_expr)
|
||||
|
||||
def clear_history(self):
|
||||
|
@ -348,21 +376,24 @@ class PALInterface:
|
|||
|
||||
|
||||
def load_model(args):
|
||||
model = AutoModelForCausalLM.from_pretrained(args.model, trust_remote_code=True).to(torch.bfloat16).cuda()
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(args.model,
|
||||
trust_remote_code=True).to(
|
||||
torch.bfloat16).cuda()
|
||||
tokenizer = AutoTokenizer.from_pretrained(args.model,
|
||||
trust_remote_code=True)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
def load_data(args):
|
||||
# Load data from huggingface dataset
|
||||
if args.dataset == "gsm8k":
|
||||
gsm8k = load_dataset(path=args.dataset, name="main")
|
||||
test_set = gsm8k["test"]
|
||||
if args.dataset == 'gsm8k':
|
||||
gsm8k = load_dataset(path=args.dataset, name='main')
|
||||
test_set = gsm8k['test']
|
||||
input_data = []
|
||||
for data in test_set:
|
||||
question = data["question"]
|
||||
target = float(data["answer"].split("#")[-1].replace(",", ""))
|
||||
input_data.append({"question": question, "target": target})
|
||||
question = data['question']
|
||||
target = float(data['answer'].split('#')[-1].replace(',', ''))
|
||||
input_data.append({'question': question, 'target': target})
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return input_data
|
||||
|
@ -419,52 +450,62 @@ def main():
|
|||
|
||||
args = parse_args()
|
||||
|
||||
print("load model begin.")
|
||||
print('load model begin.')
|
||||
model, tokenizer = load_model(args)
|
||||
print("load model end.")
|
||||
print('load model end.')
|
||||
|
||||
generation_config = GenerationConfig(max_length=args.max_length, top_p=args.top_p, temperature=args.temperature)
|
||||
generation_config = GenerationConfig(max_length=args.max_length,
|
||||
top_p=args.top_p,
|
||||
temperature=args.temperature)
|
||||
|
||||
verbose = args.verbose
|
||||
interface = PALInterface(model=model, tokenizer=tokenizer, generation_config=generation_config, verbose=verbose)
|
||||
interface = PALInterface(model=model,
|
||||
tokenizer=tokenizer,
|
||||
generation_config=generation_config,
|
||||
verbose=verbose)
|
||||
|
||||
if not os.path.exists(args.out_dir):
|
||||
os.makedirs(args.out_dir)
|
||||
savepath = os.path.join(args.out_dir, args.dataset + ".json")
|
||||
savepath = os.path.join(args.out_dir, args.dataset + '.json')
|
||||
|
||||
# Load from history results
|
||||
if args.append and os.path.exists(savepath):
|
||||
lines = open(savepath).readlines()
|
||||
num_skip_exps = len(lines)
|
||||
scores = [x["score"] for x in map(json.loads, lines)]
|
||||
scores = [x['score'] for x in map(json.loads, lines)]
|
||||
else:
|
||||
num_skip_exps = 0
|
||||
scores = []
|
||||
|
||||
examples = load_data(args)
|
||||
with open(savepath, "a" if args.append else "w") as f:
|
||||
pbar = tqdm.tqdm(examples[num_skip_exps:], initial=num_skip_exps, total=len(examples))
|
||||
with open(savepath, 'a' if args.append else 'w') as f:
|
||||
pbar = tqdm.tqdm(examples[num_skip_exps:],
|
||||
initial=num_skip_exps,
|
||||
total=len(examples))
|
||||
for x in pbar:
|
||||
question = x["question"]
|
||||
question = x['question']
|
||||
result = copy.copy(x)
|
||||
|
||||
try:
|
||||
answer = interface.run(
|
||||
prompt=PROMPT.format(question=question, eoh=args.eoh, eoa=args.eoa, eos=args.eos),
|
||||
prompt=PROMPT.format(question=question,
|
||||
eoh=args.eoh,
|
||||
eoa=args.eoa,
|
||||
eos=args.eos),
|
||||
time_out=args.time_out,
|
||||
)
|
||||
answer = float(answer)
|
||||
score = 1 if abs(answer - x["target"]) < 1e-3 else 0
|
||||
score = 1 if abs(answer - x['target']) < 1e-3 else 0
|
||||
except Exception as e:
|
||||
if verbose:
|
||||
print(e)
|
||||
answer = ""
|
||||
answer = ''
|
||||
score = 0
|
||||
scores.append(score)
|
||||
result["answer"] = answer
|
||||
result["score"] = score
|
||||
result["generation"] = interface.history
|
||||
f.write(json.dumps(result) + "\n")
|
||||
result['answer'] = answer
|
||||
result['score'] = score
|
||||
result['generation'] = interface.history
|
||||
f.write(json.dumps(result) + '\n')
|
||||
|
||||
interface.clear_history()
|
||||
f.flush()
|
||||
|
@ -473,5 +514,5 @@ def main():
|
|||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -21,20 +21,21 @@ python pal_inference.py \
|
|||
```
|
||||
|
||||
参数说明:
|
||||
| 参数 | 说明 |
|
||||
| :--------: | :--------------------: |
|
||||
| \<model\> | 用于推理的模型的路径 |
|
||||
| \<out_dir\> | 生成代码将保存在指定的输出文件夹中 |
|
||||
| --dataset <dataset> | 用于代码生成的数据集名称(默认:gsm8k) |
|
||||
| --max_length <length> | 模型最大输入 token 长度(默认:2048) |
|
||||
| --top_p <threshold> | 候选 token 相加的概率阈值(默认:0.8) |
|
||||
| --eoh <end token> | 用户输入结束标识符 (默认: "") |
|
||||
| --eoa <end token> | 模型输入结束标识符 (默认: "") |
|
||||
| --eos <end token> | 系统输入结束标识符. (默认: "") |
|
||||
| --temperature, -t <temp> | 生成过程中的采样温度(默认:1.0) |
|
||||
| --time_out <time> | 执行生成的代码的最大时间(秒)(默认:100) |
|
||||
| --verbose, -v | 打印代码错误信息(可选) |
|
||||
| --append, -a | 将输出追加到历史结果中(可选) |
|
||||
|
||||
| 参数 | 说明 |
|
||||
| :-----------------------: | :-----------------------------------------: |
|
||||
| \<model> | 用于推理的模型的路径 |
|
||||
| \<out_dir> | 生成代码将保存在指定的输出文件夹中 |
|
||||
| --dataset <dataset> | 用于代码生成的数据集名称(默认:gsm8k) |
|
||||
| --max_length <length> | 模型最大输入 token 长度(默认:2048) |
|
||||
| --top_p <threshold> | 候选 token 相加的概率阈值(默认:0.8) |
|
||||
| --eoh <end token> | 用户输入结束标识符 (默认: "") |
|
||||
| --eoa <end token> | 模型输入结束标识符 (默认: "") |
|
||||
| --eos <end token> | 系统输入结束标识符. (默认: "") |
|
||||
| --temperature, -t <temp> | 生成过程中的采样温度(默认:1.0) |
|
||||
| --time_out <time> | 执行生成的代码的最大时间(秒)(默认:100) |
|
||||
| --verbose, -v | 打印代码错误信息(可选) |
|
||||
| --append, -a | 将输出追加到历史结果中(可选) |
|
||||
|
||||
简单的使用示例如下:
|
||||
|
||||
|
|
|
@ -70,14 +70,14 @@ The weather in Shanghai is 22 celsius<|im_end|>
|
|||
```
|
||||
|
||||
1. First,there will be a new system prompt that describe the protocol of tools in json format. The content starts with `<|im_start|>system name=<|plugin|>\n` and ends with `<|im_end|>`. `name=<|plugin|>` indicates the system prompt comes from tools. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant.
|
||||
2. Model will call the tools in a streaming format after receving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the tools. The model will output `<|action_start|><|plugin|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|plugin|>` indicates it want to use common tools. Then the model will output the calling parameters in json format and end it with `<|action_end|>`.
|
||||
2. Model will call the tools in a streaming format after receiving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the tools. The model will output `<|action_start|><|plugin|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|plugin|>` indicates it want to use common tools. Then the model will output the calling parameters in json format and end it with `<|action_end|>`.
|
||||
3. The environment returns the function calling outputs, starts with `<|im_start|>environment name=<|plugin|>`, indicating it comes from the function calling in the environment. The content is in json format, `{"temperature": 22}`, ends with `<|im_end|>`.
|
||||
|
||||
### Code Interpreter
|
||||
|
||||
InternLM2-Chat support to use code interpreter to solve complex math problems and data analysis. The whole process is similar to the function calling, shown as below.
|
||||
|
||||
```
|
||||
````
|
||||
<|im_start|>system
|
||||
You are InternLM2-Chat, a harmless AI assistant<|im_end|>
|
||||
<|im_start|>system name=<|interpreter|>
|
||||
|
@ -130,17 +130,17 @@ Can you explain the code?<|im_end|>
|
|||
<|im_start|>assistant
|
||||
The code uses Python's Plotly library to create a polar plot that shows the relationship between wind direction and rainfall. It starts by creating a Plotly Figure object. Then, I added two `Scatterpolar` traces. The first trace shows the relationship between wind direction at 9 am and average rainfall, and the second trace shows the same for 3 pm. In these traces, the `r` parameter represents the average rainfall, and `theta` represents the wind direction. `fill='toself'` means the shape is filled to its own boundary. Next, I updated the chart layout, hid the radial axis, added a legend, set the title and its font, chose a dark theme, and set the chart's background color, width, and height. Finally, the chart was displayed using `fig.show()`.
|
||||
<|im_end|>
|
||||
```
|
||||
````
|
||||
|
||||
1. First,there will be a new system prompt that describe the usages of code interpreter. The content starts with `<|im_start|>system name=<|interpreter|>\n` and ends with `<|im_end|>`. `name=<|interpreter|>` indicates the system prompt comes from code interpreter. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant.
|
||||
2. Model will call the tools in a streaming format after receving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the code interpreter. The model will output `<|action_start|><|interpreter|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|interpreter|>` indicates it want to use code interpreter. Then the model will output the code in a markdown-style code block and end it with `<|action_end|>`.
|
||||
2. Model will call the tools in a streaming format after receiving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the code interpreter. The model will output `<|action_start|><|interpreter|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|interpreter|>` indicates it want to use code interpreter. Then the model will output the code in a markdown-style code block and end it with `<|action_end|>`.
|
||||
3. The environment will execute the code and return outputs, starts with `<|im_start|>environment name=<|interpreter|>`, indicating it comes from the code interpreter in the environment, and the content ends with `<|im_end|>`.
|
||||
|
||||
### Function Call + Code Interpreter
|
||||
|
||||
InternLM2-Chat can choose to use tools or code interpreter automatically. We only need to send the system prompt of both to the model respectively. An example is shown as below
|
||||
|
||||
```
|
||||
````
|
||||
<|im_start|>system
|
||||
You are InternLM2-Chat, a harmless AI assistant<|im_end|>
|
||||
<|im_start|>system name=<|interpreter|>
|
||||
|
@ -221,4 +221,4 @@ Sure, I will search for the weather of Shanghai.<|action_start|><|plugin|>
|
|||
{"temperature": 22}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
The weather in Shanghai is 22 celsius<|im_end|>
|
||||
```
|
||||
````
|
||||
|
|
|
@ -77,7 +77,7 @@ InternLM2-Chat 的通用工具调用(function call)支持多种格式,尤
|
|||
|
||||
InternLM2-Chat 支持使用代码解释器解决复杂数学问题和进行数据分析。整个调用流程和通用工具调用类似,如下所示
|
||||
|
||||
```
|
||||
````
|
||||
<|im_start|>system
|
||||
你是书生浦语2,一个无害的人工智能助手<|im_end|>
|
||||
<|im_start|>system name=<|interpreter|>
|
||||
|
@ -128,19 +128,18 @@ fig.show()
|
|||
<|im_start|>assistant
|
||||
我使用的是Python的Plotly库来创建一个极坐标图,以展示风向与降雨量之间的关系。代码中首先创建了一个Plotly的Figure对象。接着,我添加了两个`Scatterpolar`追踪。第一个追踪展示的是上午9点的风向与降雨量的平均值的关系,第二个追踪展示的是下午3点的风向与降雨量的平均值的关系。在这两个追踪中,`r`参数代表降雨量的平均值,`theta`参数代表风向。`fill='toself'`表示图形以自身为边界进行填充。然后我更新了图表的布局,隐藏了径向轴,添加了图例,设置了标题和标题字体,选择了暗色主题,并设定了图表的背景色、宽度和高度。最后,使用`fig.show()`展示了图表。
|
||||
<|im_end|>
|
||||
```
|
||||
````
|
||||
|
||||
1. 首先在系统提示中提供代码解释器的格式和字段描述。内容以 `<|im_start|>system name=<|interpreter|>\n`开头,`<|im_end|>` 结尾,`name=<|interpreter|>` 体现了这是来自代码解释器的指令。InternLM2-Chat 支持 system 角色对模型的提示和约束多次出现。所以我们会看到前面还有关于对话的要求。
|
||||
2. 用户可以上传一个文件,并对模型提出要求,文件的上传会以单独的形式向模型发出一条指令,以 `<|im_start|>user name=file` 开头,以 json 形式给出路径和文件大小`
|
||||
[{"path": "data.csv", size='10K'}]`,以 `<|im_end|>`结尾。
|
||||
2. 模型在接受到用户指令后,会以流式的形式调用工具,及自然地生成文字进行思考/回应用户,然后输出`<|action_start|><|interpreter|>`。`<|action_start|>`表示要调用外部插件,同时 `<|interpreter|>` 表示调用的是代码解释器。然后模型输出 markdown 中 python 代码块格式代码内容,再以 `<|action_end|>` 表示工具调用结束。
|
||||
3. 系统会执行代码块中的代码,然后返回调用结果,以 `<|im_start|>environment name=<|interpreter|>`开头,表示是来自环境关于代码解释器执行的输出,以`<|im_end|>`结尾。
|
||||
2. 用户可以上传一个文件,并对模型提出要求,文件的上传会以单独的形式向模型发出一条指令,以 `<|im_start|>user name=file` 开头,以 json 形式给出路径和文件大小` [{"path": "data.csv", size='10K'}]`,以 `<|im_end|>`结尾。
|
||||
3. 模型在接受到用户指令后,会以流式的形式调用工具,及自然地生成文字进行思考/回应用户,然后输出`<|action_start|><|interpreter|>`。`<|action_start|>`表示要调用外部插件,同时 `<|interpreter|>` 表示调用的是代码解释器。然后模型输出 markdown 中 python 代码块格式代码内容,再以 `<|action_end|>` 表示工具调用结束。
|
||||
4. 系统会执行代码块中的代码,然后返回调用结果,以 `<|im_start|>environment name=<|interpreter|>`开头,表示是来自环境关于代码解释器执行的输出,以`<|im_end|>`结尾。
|
||||
|
||||
### 同时使用工具和代码解释器
|
||||
|
||||
InternLM2-Chat 能够在一个对话过程中自主选择调用工具或代码解释器。在工具和代码解释器同时开启的情况下,只需要将各自的系统提示合并在一起给模型即可。一个调用工具和代码解释器的对话历史样例如下。
|
||||
|
||||
```
|
||||
````
|
||||
<|im_start|>system
|
||||
你是书生浦语2,一个无害的人工智能助手<|im_end|>
|
||||
<|im_start|>system name=<|interpreter|>
|
||||
|
@ -219,4 +218,4 @@ fig.show()
|
|||
{"temperature": 22}<|im_end|>
|
||||
<|im_start|>assistant
|
||||
上海的天气是 22 摄氏度<|im_end|>
|
||||
```
|
||||
````
|
||||
|
|
|
@ -6,7 +6,6 @@ English | [简体中文](lmdeploy_zh_cn.md)
|
|||
|
||||
This article primarily highlights the basic usage of LMDeploy. For a comprehensive understanding of the toolkit, we invite you to refer to [the tutorials](https://lmdeploy.readthedocs.io/en/latest/).
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
Install lmdeploy with pip (python 3.8+)
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
|
||||
本文主要介绍 LMDeploy 的基本用法,包括[安装](#安装)、[离线批处理](#离线批处理)和[推理服务](#推理服务)。更全面的介绍请参考 [LMDeploy 用户指南](https://lmdeploy.readthedocs.io/zh-cn/latest/)。
|
||||
|
||||
|
||||
## 安装
|
||||
|
||||
使用 pip(python 3.8+)安装 LMDeploy
|
||||
|
@ -27,6 +26,7 @@ print(response)
|
|||
```
|
||||
|
||||
LMDeploy 实现了 dynamic ntk,支持长文本外推。使用如下代码,可以把 InternLM2 的文本外推到 200K:
|
||||
|
||||
```python
|
||||
from lmdeploy import pipeline, TurbomindEngineConfig
|
||||
engine_config = TurbomindEngineConfig(session_len=200000,
|
||||
|
|
|
@ -1,37 +1,48 @@
|
|||
# Multi-Chats by OpenAOE
|
||||
|
||||
English | [简体中文](openaoe_zh_cn.md)
|
||||
|
||||
## Introduction
|
||||
|
||||
[OpenAOE](https://github.com/InternLM/OpenAOE) is a LLM-Group-Chat Framework, which can chat with multiple LLMs (commercial/open source LLMs) at the same time. OpenAOE provides both backend API and WEB-UI to meet different usage needs.
|
||||
|
||||
Currently already supported LLMs: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, Spark, etc.
|
||||
|
||||
## Quick Run
|
||||
> [!TIP]
|
||||
|
||||
> \[!TIP\]
|
||||
> Require python >= 3.9
|
||||
|
||||
We provide three different ways to run OpenAOE: `run by pip`, `run by docker` and `run by source code` as well.
|
||||
|
||||
### Run by pip
|
||||
|
||||
#### **Install**
|
||||
|
||||
```shell
|
||||
pip install -U openaoe
|
||||
```
|
||||
|
||||
#### **Start**
|
||||
|
||||
```shell
|
||||
openaoe -f /path/to/your/config-template.yaml
|
||||
```
|
||||
|
||||
### Run by docker
|
||||
|
||||
#### **Install**
|
||||
|
||||
There are two ways to get the OpenAOE docker image by:
|
||||
|
||||
1. pull the OpenAOE docker image
|
||||
|
||||
```shell
|
||||
docker pull opensealion/openaoe:latest
|
||||
```
|
||||
|
||||
2. or build a docker image
|
||||
|
||||
```shell
|
||||
git clone https://github.com/internlm/OpenAOE
|
||||
cd OpenAOE
|
||||
|
@ -39,32 +50,38 @@ docker build . -f docker/Dockerfile -t openaoe:latest
|
|||
```
|
||||
|
||||
#### **Start**
|
||||
|
||||
```shell
|
||||
docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest
|
||||
```
|
||||
|
||||
### Run by source code
|
||||
|
||||
#### **Install**
|
||||
|
||||
1. clone this project
|
||||
|
||||
```shell
|
||||
git clone https://github.com/internlm/OpenAOE
|
||||
```
|
||||
2. [_optional_] build the frontend project when the frontend codes are changed
|
||||
|
||||
2. \[_optional_\] build the frontend project when the frontend codes are changed
|
||||
|
||||
```shell
|
||||
cd OpenAOE/openaoe/frontend
|
||||
npm install
|
||||
npm run build
|
||||
```
|
||||
|
||||
|
||||
#### **Start**
|
||||
|
||||
```shell
|
||||
cd OpenAOE
|
||||
pip install -r openaoe/backend/requirements.txt
|
||||
python -m openaoe.main -f /path/to/your/config-template.yaml
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> \[!TIP\]
|
||||
> `/path/to/your/config-tempalte.yaml` is the configuration file loaded by OpenAOE at startup,
|
||||
> which contains the relevant configuration information for the LLMs,
|
||||
> including: API URLs, AKSKs, Tokens, etc.
|
||||
|
|
|
@ -2,37 +2,47 @@
|
|||
|
||||
[English](openaoe.md) | 简体中文
|
||||
|
||||
|
||||
## 介绍
|
||||
|
||||
[OpenAOE](https://github.com/InternLM/OpenAOE) 是一个 LLM-Group-Chat 框架,可以同时与多个商业大模型或开源大模型进行聊天。 OpenAOE还提供后端API和WEB-UI以满足不同的使用需求。
|
||||
|
||||
目前已经支持的大模型有: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, 讯飞星火等。
|
||||
|
||||
|
||||
## 快速安装
|
||||
|
||||
我们将提供 3 种不同的方式安装:基于 pip、基于 docker 以及基于源代码,实现开箱即用。
|
||||
|
||||
### 基于 pip
|
||||
> [!TIP]
|
||||
|
||||
> \[!TIP\]
|
||||
> 需要 python >= 3.9
|
||||
|
||||
#### **安装**
|
||||
|
||||
```shell
|
||||
pip install -U openaoe
|
||||
```
|
||||
|
||||
#### **运行**
|
||||
|
||||
```shell
|
||||
openaoe -f /path/to/your/config-template.yaml
|
||||
```
|
||||
|
||||
### 基于 docker
|
||||
|
||||
#### **安装**
|
||||
|
||||
有两种方式获取 OpenAOE 的 docker 镜像:
|
||||
|
||||
1. 官方拉取
|
||||
|
||||
```shell
|
||||
docker pull opensealion/openaoe:latest
|
||||
```
|
||||
|
||||
2. 本地构建
|
||||
|
||||
```shell
|
||||
git clone https://github.com/internlm/OpenAOE
|
||||
cd OpenAOE
|
||||
|
@ -40,31 +50,37 @@ docker build . -f docker/Dockerfile -t openaoe:latest
|
|||
```
|
||||
|
||||
#### **运行**
|
||||
|
||||
```shell
|
||||
docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest
|
||||
```
|
||||
|
||||
### 基于源代码
|
||||
|
||||
#### **安装**
|
||||
|
||||
1. 克隆项目
|
||||
|
||||
```shell
|
||||
git clone https://github.com/internlm/OpenAOE
|
||||
```
|
||||
2. [_可选_] (如果前端代码发生变动)重新构建前端项目
|
||||
|
||||
2. \[_可选_\] (如果前端代码发生变动)重新构建前端项目
|
||||
|
||||
```shell
|
||||
cd OpenAOE/openaoe/frontend
|
||||
npm install
|
||||
npm run build
|
||||
```
|
||||
|
||||
|
||||
#### **运行**
|
||||
|
||||
```shell
|
||||
cd OpenAOE
|
||||
pip install -r openaoe/backend/requirements.txt
|
||||
python -m openaoe.main -f /path/to/your/config-template.yaml
|
||||
``````
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> \[!TIP\]
|
||||
> `/path/to/your/config-template.yaml` 是 OpenAOE 启动时读取的配置文件,里面包含了大模型的相关配置信息,
|
||||
> 包括:调用API地址、AKSK、Token等信息,是 OpenAOE 启动的必备文件。模板文件可以在 `openaoe/backend/config/config-template.yaml` 中找到。
|
||||
|
|
197
chat/web_demo.py
197
chat/web_demo.py
|
@ -1,13 +1,20 @@
|
|||
"""
|
||||
This script refers to the dialogue example of streamlit, the interactive generation code of chatglm2 and transformers.
|
||||
We mainly modified part of the code logic to adapt to the generation of our model.
|
||||
"""This script refers to the dialogue example of streamlit, the interactive
|
||||
generation code of chatglm2 and transformers.
|
||||
|
||||
We mainly modified part of the code logic to adapt to the
|
||||
generation of our model.
|
||||
Please refer to these links below for more information:
|
||||
1. streamlit chat example: https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
|
||||
2. chatglm2: https://github.com/THUDM/ChatGLM2-6B
|
||||
3. transformers: https://github.com/huggingface/transformers
|
||||
Please run with the command `streamlit run path/to/web_demo.py --server.address=0.0.0.0 --server.port 7860`.
|
||||
1. streamlit chat example:
|
||||
https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
|
||||
2. chatglm2:
|
||||
https://github.com/THUDM/ChatGLM2-6B
|
||||
3. transformers:
|
||||
https://github.com/huggingface/transformers
|
||||
Please run with the command `streamlit run path/to/web_demo.py
|
||||
--server.address=0.0.0.0 --server.port 7860`.
|
||||
Using `python path/to/web_demo.py` may cause unknown problems.
|
||||
"""
|
||||
# isort: skip_file
|
||||
import copy
|
||||
import warnings
|
||||
from dataclasses import asdict, dataclass
|
||||
|
@ -16,7 +23,8 @@ from typing import Callable, List, Optional
|
|||
import streamlit as st
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
|
||||
from transformers.generation.utils import (LogitsProcessorList,
|
||||
StoppingCriteriaList)
|
||||
from transformers.utils import logging
|
||||
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM # isort: skip
|
||||
|
@ -42,16 +50,17 @@ def generate_interactive(
|
|||
generation_config: Optional[GenerationConfig] = None,
|
||||
logits_processor: Optional[LogitsProcessorList] = None,
|
||||
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
||||
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
|
||||
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
|
||||
List[int]]] = None,
|
||||
additional_eos_token_id: Optional[int] = None,
|
||||
**kwargs,
|
||||
):
|
||||
inputs = tokenizer([prompt], padding=True, return_tensors="pt")
|
||||
input_length = len(inputs["input_ids"][0])
|
||||
inputs = tokenizer([prompt], padding=True, return_tensors='pt')
|
||||
input_length = len(inputs['input_ids'][0])
|
||||
for k, v in inputs.items():
|
||||
inputs[k] = v.cuda()
|
||||
input_ids = inputs["input_ids"]
|
||||
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1] # noqa: F841 # pylint: disable=W0612
|
||||
input_ids = inputs['input_ids']
|
||||
_, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
|
||||
if generation_config is None:
|
||||
generation_config = model.generation_config
|
||||
generation_config = copy.deepcopy(generation_config)
|
||||
|
@ -64,36 +73,45 @@ def generate_interactive(
|
|||
eos_token_id = [eos_token_id]
|
||||
if additional_eos_token_id is not None:
|
||||
eos_token_id.append(additional_eos_token_id)
|
||||
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
|
||||
has_default_max_length = kwargs.get(
|
||||
'max_length') is None and generation_config.max_length is not None
|
||||
if has_default_max_length and generation_config.max_new_tokens is None:
|
||||
warnings.warn(
|
||||
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
|
||||
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
|
||||
" recommend using `max_new_tokens` to control the maximum length of the generation.",
|
||||
f"Using 'max_length''s default ({repr(generation_config.max_length)}) \
|
||||
to control the generation length. "
|
||||
'This behaviour is deprecated and will be removed from the \
|
||||
config in v5 of Transformers -- we'
|
||||
' recommend using `max_new_tokens` to control the maximum \
|
||||
length of the generation.',
|
||||
UserWarning,
|
||||
)
|
||||
elif generation_config.max_new_tokens is not None:
|
||||
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
|
||||
generation_config.max_length = generation_config.max_new_tokens + \
|
||||
input_ids_seq_length
|
||||
if not has_default_max_length:
|
||||
logger.warn( # pylint: disable=W4902
|
||||
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||
"Please refer to the documentation for more information. "
|
||||
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
|
||||
f"Both 'max_new_tokens' (={generation_config.max_new_tokens}) "
|
||||
f"and 'max_length'(={generation_config.max_length}) seem to "
|
||||
"have been set. 'max_new_tokens' will take precedence. "
|
||||
'Please refer to the documentation for more information. '
|
||||
'(https://huggingface.co/docs/transformers/main/'
|
||||
'en/main_classes/text_generation)',
|
||||
UserWarning,
|
||||
)
|
||||
|
||||
if input_ids_seq_length >= generation_config.max_length:
|
||||
input_ids_string = "input_ids"
|
||||
input_ids_string = 'input_ids'
|
||||
logger.warning(
|
||||
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
|
||||
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
||||
" increasing `max_new_tokens`."
|
||||
)
|
||||
f"Input length of {input_ids_string} is {input_ids_seq_length}, "
|
||||
f"but 'max_length' is set to {generation_config.max_length}. "
|
||||
'This can lead to unexpected behavior. You should consider'
|
||||
" increasing 'max_new_tokens'.")
|
||||
|
||||
# 2. Set generation parameters if not already defined
|
||||
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
|
||||
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
|
||||
logits_processor = logits_processor if logits_processor is not None \
|
||||
else LogitsProcessorList()
|
||||
stopping_criteria = stopping_criteria if stopping_criteria is not None \
|
||||
else StoppingCriteriaList()
|
||||
|
||||
logits_processor = model._get_logits_processor(
|
||||
generation_config=generation_config,
|
||||
|
@ -104,14 +122,15 @@ def generate_interactive(
|
|||
)
|
||||
|
||||
stopping_criteria = model._get_stopping_criteria(
|
||||
generation_config=generation_config, stopping_criteria=stopping_criteria
|
||||
)
|
||||
generation_config=generation_config,
|
||||
stopping_criteria=stopping_criteria)
|
||||
logits_warper = model._get_logits_warper(generation_config)
|
||||
|
||||
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
|
||||
scores = None
|
||||
while True:
|
||||
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
|
||||
model_inputs = model.prepare_inputs_for_generation(
|
||||
input_ids, **model_kwargs)
|
||||
# forward pass to get next token
|
||||
outputs = model(
|
||||
**model_inputs,
|
||||
|
@ -135,8 +154,10 @@ def generate_interactive(
|
|||
|
||||
# update generated ids, model inputs, and length for next step
|
||||
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||
model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False)
|
||||
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
|
||||
model_kwargs = model._update_model_kwargs_for_generation(
|
||||
outputs, model_kwargs, is_encoder_decoder=False)
|
||||
unfinished_sequences = unfinished_sequences.mul(
|
||||
(min(next_tokens != i for i in eos_token_id)).long())
|
||||
|
||||
output_token_ids = input_ids[0].cpu().tolist()
|
||||
output_token_ids = output_token_ids[input_length:]
|
||||
|
@ -146,8 +167,10 @@ def generate_interactive(
|
|||
response = tokenizer.decode(output_token_ids)
|
||||
|
||||
yield response
|
||||
# stop when each sentence is finished, or if we exceed the maximum length
|
||||
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
|
||||
# stop when each sentence is finished
|
||||
# or if we exceed the maximum length
|
||||
if unfinished_sequences.max() == 0 or stopping_criteria(
|
||||
input_ids, scores):
|
||||
break
|
||||
|
||||
|
||||
|
@ -157,44 +180,48 @@ def on_btn_click():
|
|||
|
||||
@st.cache_resource
|
||||
def load_model():
|
||||
model = (
|
||||
AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True)
|
||||
.to(torch.bfloat16)
|
||||
.cuda()
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True)
|
||||
model = (AutoModelForCausalLM.from_pretrained('internlm/internlm2-chat-7b',
|
||||
trust_remote_code=True).to(
|
||||
torch.bfloat16).cuda())
|
||||
tokenizer = AutoTokenizer.from_pretrained('internlm/internlm2-chat-7b',
|
||||
trust_remote_code=True)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
def prepare_generation_config():
|
||||
with st.sidebar:
|
||||
max_length = st.slider("Max Length", min_value=8, max_value=32768, value=32768)
|
||||
top_p = st.slider("Top P", 0.0, 1.0, 0.8, step=0.01)
|
||||
temperature = st.slider("Temperature", 0.0, 1.0, 0.7, step=0.01)
|
||||
st.button("Clear Chat History", on_click=on_btn_click)
|
||||
max_length = st.slider('Max Length',
|
||||
min_value=8,
|
||||
max_value=32768,
|
||||
value=32768)
|
||||
top_p = st.slider('Top P', 0.0, 1.0, 0.8, step=0.01)
|
||||
temperature = st.slider('Temperature', 0.0, 1.0, 0.7, step=0.01)
|
||||
st.button('Clear Chat History', on_click=on_btn_click)
|
||||
|
||||
generation_config = GenerationConfig(max_length=max_length, top_p=top_p, temperature=temperature)
|
||||
generation_config = GenerationConfig(max_length=max_length,
|
||||
top_p=top_p,
|
||||
temperature=temperature)
|
||||
|
||||
return generation_config
|
||||
|
||||
|
||||
user_prompt = "<|im_start|>user\n{user}<|im_end|>\n"
|
||||
robot_prompt = "<|im_start|>assistant\n{robot}<|im_end|>\n"
|
||||
cur_query_prompt = "<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"
|
||||
user_prompt = '<|im_start|>user\n{user}<|im_end|>\n'
|
||||
robot_prompt = '<|im_start|>assistant\n{robot}<|im_end|>\n'
|
||||
cur_query_prompt = '<|im_start|>user\n{user}<|im_end|>\n\
|
||||
<|im_start|>assistant\n'
|
||||
|
||||
|
||||
def combine_history(prompt):
|
||||
messages = st.session_state.messages
|
||||
meta_instruction = (
|
||||
"You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai "
|
||||
"AI Laboratory (上海人工智能实验室)."
|
||||
)
|
||||
meta_instruction = ('You are InternLM (书生·浦语), a helpful, honest, '
|
||||
'and harmless AI assistant developed by Shanghai '
|
||||
'AI Laboratory (上海人工智能实验室).')
|
||||
total_prompt = f"<s><|im_start|>system\n{meta_instruction}<|im_end|>\n"
|
||||
for message in messages:
|
||||
cur_content = message["content"]
|
||||
if message["role"] == "user":
|
||||
cur_content = message['content']
|
||||
if message['role'] == 'user':
|
||||
cur_prompt = user_prompt.format(user=cur_content)
|
||||
elif message["role"] == "robot":
|
||||
elif message['role'] == 'robot':
|
||||
cur_prompt = robot_prompt.format(robot=cur_content)
|
||||
else:
|
||||
raise RuntimeError
|
||||
|
@ -205,57 +232,59 @@ def combine_history(prompt):
|
|||
|
||||
def main():
|
||||
# torch.cuda.empty_cache()
|
||||
print("load model begin.")
|
||||
print('load model begin.')
|
||||
model, tokenizer = load_model()
|
||||
print("load model end.")
|
||||
print('load model end.')
|
||||
|
||||
user_avator = "assets/user.png"
|
||||
robot_avator = "assets/robot.png"
|
||||
user_avator = 'assets/user.png'
|
||||
robot_avator = 'assets/robot.png'
|
||||
|
||||
st.title("InternLM2-Chat-7B")
|
||||
st.title('InternLM2-Chat-7B')
|
||||
|
||||
generation_config = prepare_generation_config()
|
||||
|
||||
# Initialize chat history
|
||||
if "messages" not in st.session_state:
|
||||
if 'messages' not in st.session_state:
|
||||
st.session_state.messages = []
|
||||
|
||||
# Display chat messages from history on app rerun
|
||||
for message in st.session_state.messages:
|
||||
with st.chat_message(message["role"], avatar=message.get("avatar")):
|
||||
st.markdown(message["content"])
|
||||
with st.chat_message(message['role'], avatar=message.get('avatar')):
|
||||
st.markdown(message['content'])
|
||||
|
||||
# Accept user input
|
||||
if prompt := st.chat_input("What is up?"):
|
||||
if prompt := st.chat_input('What is up?'):
|
||||
# Display user message in chat message container
|
||||
with st.chat_message("user", avatar=user_avator):
|
||||
with st.chat_message('user', avatar=user_avator):
|
||||
st.markdown(prompt)
|
||||
real_prompt = combine_history(prompt)
|
||||
# Add user message to chat history
|
||||
st.session_state.messages.append({"role": "user", "content": prompt, "avatar": user_avator})
|
||||
st.session_state.messages.append({
|
||||
'role': 'user',
|
||||
'content': prompt,
|
||||
'avatar': user_avator
|
||||
})
|
||||
|
||||
with st.chat_message("robot", avatar=robot_avator):
|
||||
with st.chat_message('robot', avatar=robot_avator):
|
||||
message_placeholder = st.empty()
|
||||
for cur_response in generate_interactive(
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
prompt=real_prompt,
|
||||
additional_eos_token_id=92542,
|
||||
**asdict(generation_config),
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
prompt=real_prompt,
|
||||
additional_eos_token_id=92542,
|
||||
**asdict(generation_config),
|
||||
):
|
||||
# Display robot response in chat message container
|
||||
message_placeholder.markdown(cur_response + "▌")
|
||||
message_placeholder.markdown(cur_response) # pylint: disable=undefined-loop-variable
|
||||
message_placeholder.markdown(cur_response + '▌')
|
||||
message_placeholder.markdown(cur_response)
|
||||
# Add robot response to chat history
|
||||
st.session_state.messages.append(
|
||||
{
|
||||
"role": "robot",
|
||||
"content": cur_response, # pylint: disable=undefined-loop-variable
|
||||
"avatar": robot_avator,
|
||||
}
|
||||
)
|
||||
st.session_state.messages.append({
|
||||
'role': 'robot',
|
||||
'content': cur_response, # pylint: disable=undefined-loop-variable
|
||||
'avatar': robot_avator,
|
||||
})
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -97,4 +97,4 @@ xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-pa
|
|||
|
||||
## InternEvo
|
||||
|
||||
[TODO]
|
||||
\[TODO\]
|
||||
|
|
|
@ -95,4 +95,4 @@ xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-pa
|
|||
|
||||
## InternEvo
|
||||
|
||||
[TODO]
|
||||
\[TODO\]
|
||||
|
|
|
@ -16,12 +16,12 @@ The base model of InternLM2 has the following technical features:
|
|||
|
||||
## Model Zoo
|
||||
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
|
||||
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
|
||||
| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-20B** | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17 |
|
||||
|
||||
- `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo).
|
||||
|
||||
|
@ -29,16 +29,15 @@ The base model of InternLM2 has the following technical features:
|
|||
|
||||
We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
|
||||
|
||||
| Dataset\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
|
||||
| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
|
||||
| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
|
||||
| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
|
||||
| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
|
||||
| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
|
||||
| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
|
||||
|
||||
| Dataset\\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
|
||||
| --------------- | ------------ | ----------------- | ------------- | ------------------ | ------- | ----- |
|
||||
| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
|
||||
| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
|
||||
| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
|
||||
| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
|
||||
| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
|
||||
| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
|
||||
| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
|
||||
|
||||
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
|
||||
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
|
||||
|
|
|
@ -16,13 +16,12 @@ The base model of InternLM2 has the following technical features:
|
|||
|
||||
## Model Zoo
|
||||
|
||||
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
|
||||
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
|
||||
| ------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------- | ------------ |
|
||||
| **InternLM2-Base-7B** | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-7B** | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17 |
|
||||
| **InternLM2-Chat-7B** | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17 |
|
||||
|
||||
- `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo).
|
||||
|
||||
|
@ -30,16 +29,15 @@ The base model of InternLM2 has the following technical features:
|
|||
|
||||
We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
|
||||
|
||||
| Dataset\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
|
||||
| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
|
||||
| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
|
||||
| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
|
||||
| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
|
||||
| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
|
||||
| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
|
||||
|
||||
| Dataset\\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
|
||||
| --------------- | ------------ | ----------------- | ------------- | ------------------ | ------- | ----- |
|
||||
| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
|
||||
| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
|
||||
| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
|
||||
| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
|
||||
| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
|
||||
| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
|
||||
| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
|
||||
|
||||
- The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
|
||||
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
|
||||
|
|
|
@ -13,45 +13,45 @@ In terms of model structure, InternLM-20B opted for a deeper architecture, with
|
|||
|
||||
## Model Zoo
|
||||
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
|
||||
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
|
||||
| **InternLM Chat 20B** | [🤗internlm/internlm-chat-20b](https://huggingface.co/internlm/internlm-20b-chat) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b-chat/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b-original) | 2023-12-12 |
|
||||
| **InternLM 20B** | [🤗internlm/internlm-20b](https://huggingface.co/internlm/internlm-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b-original) | 2023-09-20 |
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
|
||||
| --------------------- | ------------------------------------------ | ----------------------------------------- | --------------------------------------- | --------------------------------------------- | ------------ |
|
||||
| **InternLM Chat 20B** | [🤗internlm/internlm-chat-20b](https://huggingface.co/internlm/internlm-20b-chat) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b-chat/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b-original) | 2023-12-12 |
|
||||
| **InternLM 20B** | [🤗internlm/internlm-20b](https://huggingface.co/internlm/internlm-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b-original) | 2023-09-20 |
|
||||
|
||||
## Performance Evaluation
|
||||
|
||||
On the 5 capability dimensions proposed by OpenCompass, InternLM-20B has achieved excellent results (the bolded scores represent the best performances within the 13B-33B parameter range).
|
||||
|
||||
| Capability | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
|
||||
|----------|-----------|------------|---------------|--------------|-----------|-----------|------------|
|
||||
| Language | 42.5 | 47 | 47.5 | **55** | 44.6 | 47.1 | 51.6 |
|
||||
| Knowledge | 58.2 | 58.3 | 48.9 | 60.1 | **64** | 66 | 67.7 |
|
||||
| Understanding | 45.5 | 50.9 | 58.1 | **67.3** | 50.6 | 54.2 | 60.8 |
|
||||
| Reasoning | 42.7 | 43.6 | 44.2 | **54.9** | 46.4 | 49.8 | 55 |
|
||||
| Examination | 37.3 | 45.2 | 51.8 | **62.5** | 47.4 | 49.7 | 57.3 |
|
||||
| Overall | 43.8 | 47.3 | 49.4 | **59.2** | 48.9 | 51.9 | 57.4 |
|
||||
| Capability | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
|
||||
| ------------- | --------- | ---------- | ------------- | ------------ | --------- | --------- | ---------- |
|
||||
| Language | 42.5 | 47 | 47.5 | **55** | 44.6 | 47.1 | 51.6 |
|
||||
| Knowledge | 58.2 | 58.3 | 48.9 | 60.1 | **64** | 66 | 67.7 |
|
||||
| Understanding | 45.5 | 50.9 | 58.1 | **67.3** | 50.6 | 54.2 | 60.8 |
|
||||
| Reasoning | 42.7 | 43.6 | 44.2 | **54.9** | 46.4 | 49.8 | 55 |
|
||||
| Examination | 37.3 | 45.2 | 51.8 | **62.5** | 47.4 | 49.7 | 57.3 |
|
||||
| Overall | 43.8 | 47.3 | 49.4 | **59.2** | 48.9 | 51.9 | 57.4 |
|
||||
|
||||
The table below compares the performance of mainstream open-source models on some influential and typical datasets.
|
||||
|
||||
| | Benchmarks | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
|
||||
|------|------------------|-----------|------------|---------------|--------------|-----------|-----------|------------|
|
||||
| Examination | MMLU | 47.73 | 54.99 | 59.55 | **62.05** | 58.73 | 63.71 | 69.75 |
|
||||
| | C-Eval (val) | 31.83 | 41.4 | **59.01** | 58.8 | 37.47 | 40.36 | 50.13 |
|
||||
| | AGI-Eval | 22.03 | 30.93 | 37.37 | **44.58** | 33.53 | 33.92 | 40.02 |
|
||||
| Knowledge | BoolQ | 78.75 | 82.42 | 67 | **87.46** | 84.43 | 86.61 | 87.74 |
|
||||
| | TriviaQA | 52.47 | 59.36 | 46.61 | 57.26 | **66.24** | 69.79 | 70.71 |
|
||||
| | NaturalQuestions | 20.17 | 24.85 | 16.32 | 25.15 | **30.89** | 33.41 | 34.16 |
|
||||
| Understanding | CMRC | 9.26 | 31.59 | 29.85 | **68.78** | 14.17 | 34.73 | 43.74 |
|
||||
| | CSL | 55 | 58.75 | 63.12 | **65.62** | 57.5 | 59.38 | 60 |
|
||||
| | RACE (middle) | 53.41 | 63.02 | 68.94 | **86.35** | 64.55 | 72.35 | 81.55 |
|
||||
| | RACE (high) | 47.63 | 58.86 | 67.18 | **83.28** | 62.61 | 68.01 | 79.93 |
|
||||
| | XSum | 20.37 | 23.37 | 25.23 | **35.54** | 20.55 | 19.91 | 25.38 |
|
||||
| Reasoning | WinoGrande | 64.64 | 64.01 | 67.32 | **69.38** | 66.85 | 69.38 | 69.77 |
|
||||
| | BBH | 37.93 | 45.62 | 48.98 | **52.51** | 49.98 | 58.38 | 64.91 |
|
||||
| | GSM8K | 20.32 | 29.57 | **52.62** | **52.62** | 42.3 | 54.44 | 63.31 |
|
||||
| | PIQA | 79.71 | 79.76 | 78.07 | 80.25 | **81.34** | 82.15 | 82.54 |
|
||||
| Programming | HumanEval | 14.02 | 18.9 | 17.07 | **25.61** | 17.68 | 18.9 | 26.22 |
|
||||
| | MBPP | 20.6 | 26.8 | 30.8 | **35.6** | 28.4 | 33.6 | 39.6 |
|
||||
| | Benchmarks | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
|
||||
| ------------- | ---------------- | --------- | ---------- | ------------- | ------------ | --------- | --------- | ---------- |
|
||||
| Examination | MMLU | 47.73 | 54.99 | 59.55 | **62.05** | 58.73 | 63.71 | 69.75 |
|
||||
| | C-Eval (val) | 31.83 | 41.4 | **59.01** | 58.8 | 37.47 | 40.36 | 50.13 |
|
||||
| | AGI-Eval | 22.03 | 30.93 | 37.37 | **44.58** | 33.53 | 33.92 | 40.02 |
|
||||
| Knowledge | BoolQ | 78.75 | 82.42 | 67 | **87.46** | 84.43 | 86.61 | 87.74 |
|
||||
| | TriviaQA | 52.47 | 59.36 | 46.61 | 57.26 | **66.24** | 69.79 | 70.71 |
|
||||
| | NaturalQuestions | 20.17 | 24.85 | 16.32 | 25.15 | **30.89** | 33.41 | 34.16 |
|
||||
| Understanding | CMRC | 9.26 | 31.59 | 29.85 | **68.78** | 14.17 | 34.73 | 43.74 |
|
||||
| | CSL | 55 | 58.75 | 63.12 | **65.62** | 57.5 | 59.38 | 60 |
|
||||
| | RACE (middle) | 53.41 | 63.02 | 68.94 | **86.35** | 64.55 | 72.35 | 81.55 |
|
||||
| | RACE (high) | 47.63 | 58.86 | 67.18 | **83.28** | 62.61 | 68.01 | 79.93 |
|
||||
| | XSum | 20.37 | 23.37 | 25.23 | **35.54** | 20.55 | 19.91 | 25.38 |
|
||||
| Reasoning | WinoGrande | 64.64 | 64.01 | 67.32 | **69.38** | 66.85 | 69.38 | 69.77 |
|
||||
| | BBH | 37.93 | 45.62 | 48.98 | **52.51** | 49.98 | 58.38 | 64.91 |
|
||||
| | GSM8K | 20.32 | 29.57 | **52.62** | **52.62** | 42.3 | 54.44 | 63.31 |
|
||||
| | PIQA | 79.71 | 79.76 | 78.07 | 80.25 | **81.34** | 82.15 | 82.54 |
|
||||
| Programming | HumanEval | 14.02 | 18.9 | 17.07 | **25.61** | 17.68 | 18.9 | 26.22 |
|
||||
| | MBPP | 20.6 | 26.8 | 30.8 | **35.6** | 28.4 | 33.6 | 39.6 |
|
||||
|
||||
Overall, InternLM-20B comprehensively outperforms open-source models in the 13B parameter range in terms of overall capabilities, and on inference evaluation sets, it approaches or even surpasses the performance of Llama-65B.
|
||||
|
||||
|
|
|
@ -10,27 +10,27 @@ InternLM-7B contains a 7 billion parameter base model and a chat model tailored
|
|||
|
||||
## Model Zoo
|
||||
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
|
||||
|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
|
||||
| **InternLM Chat 7B** | [🤗internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-original) | 2023-12-12 |
|
||||
| **InternLM 7B** | [🤗internlm/internlm-7b](https://huggingface.co/internlm/internlm-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b-original) | 2023-07-06 |
|
||||
| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
|
||||
| -------------------- | ------------------------------------------- | ----------------------------------------- | --------------------------------------- | --------------------------------------------- | ------------ |
|
||||
| **InternLM Chat 7B** | [🤗internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-original) | 2023-12-12 |
|
||||
| **InternLM 7B** | [🤗internlm/internlm-7b](https://huggingface.co/internlm/internlm-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b-original) | 2023-07-06 |
|
||||
|
||||
## Performance Evaluation
|
||||
|
||||
We conducted a comprehensive evaluation of InternLM using the open-source evaluation tool [OpenCompass](https://github.com/internLM/OpenCompass/). The evaluation covered five dimensions of capabilities: disciplinary competence, language competence, knowledge competence, inference competence, and comprehension competence. Here are some of the evaluation results, and you can visit the [OpenCompass leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
|
||||
|
||||
| Datasets\Models | **InternLM-Chat-7B** | **InternLM-7B** | LLaMA-7B | Baichuan-7B | ChatGLM2-6B | Alpaca-7B | Vicuna-7B |
|
||||
| --------------- | -------------------------- | --------------------- | -------- | ----------- | ----------- | --------- | --------- |
|
||||
| C-Eval(Val) | 52.0 | 53.4 | 24.2 | 42.7 | 50.9 | 28.9 | 31.2 |
|
||||
| MMLU | 52.6 | 51.0 | 35.2* | 41.5 | 46.0 | 39.7 | 47.3 |
|
||||
| AGIEval | 46.4 | 37.6 | 20.8 | 24.6 | 39.0 | 24.1 | 26.4 |
|
||||
| CommonSenseQA | 80.8 | 59.5 | 65.0 | 58.8 | 60.0 | 68.7 | 66.7 |
|
||||
| BUSTM | 80.6 | 50.6 | 48.5 | 51.3 | 55.0 | 48.8 | 62.5 |
|
||||
| CLUEWSC | 81.8 | 59.1 | 50.3 | 52.8 | 59.8 | 50.3 | 52.2 |
|
||||
| MATH | 5.0 | 7.1 | 2.8 | 3.0 | 6.6 | 2.2 | 2.8 |
|
||||
| GSM8K | 36.2 | 31.2 | 10.1 | 9.7 | 29.2 | 6.0 | 15.3 |
|
||||
| HumanEval | 15.9 | 10.4 | 14.0 | 9.2 | 9.2 | 9.2 | 11.0 |
|
||||
| RACE(High) | 80.3 | 57.4 | 46.9* | 28.1 | 66.3 | 40.7 | 54.0 |
|
||||
| Datasets\\Models | **InternLM-Chat-7B** | **InternLM-7B** | LLaMA-7B | Baichuan-7B | ChatGLM2-6B | Alpaca-7B | Vicuna-7B |
|
||||
| ---------------- | -------------------- | --------------- | -------- | ----------- | ----------- | --------- | --------- |
|
||||
| C-Eval(Val) | 52.0 | 53.4 | 24.2 | 42.7 | 50.9 | 28.9 | 31.2 |
|
||||
| MMLU | 52.6 | 51.0 | 35.2\* | 41.5 | 46.0 | 39.7 | 47.3 |
|
||||
| AGIEval | 46.4 | 37.6 | 20.8 | 24.6 | 39.0 | 24.1 | 26.4 |
|
||||
| CommonSenseQA | 80.8 | 59.5 | 65.0 | 58.8 | 60.0 | 68.7 | 66.7 |
|
||||
| BUSTM | 80.6 | 50.6 | 48.5 | 51.3 | 55.0 | 48.8 | 62.5 |
|
||||
| CLUEWSC | 81.8 | 59.1 | 50.3 | 52.8 | 59.8 | 50.3 | 52.2 |
|
||||
| MATH | 5.0 | 7.1 | 2.8 | 3.0 | 6.6 | 2.2 | 2.8 |
|
||||
| GSM8K | 36.2 | 31.2 | 10.1 | 9.7 | 29.2 | 6.0 | 15.3 |
|
||||
| HumanEval | 15.9 | 10.4 | 14.0 | 9.2 | 9.2 | 9.2 | 11.0 |
|
||||
| RACE(High) | 80.3 | 57.4 | 46.9\* | 28.1 | 66.3 | 40.7 | 54.0 |
|
||||
|
||||
- The evaluation results were obtained from [OpenCompass 20230706](https://github.com/internLM/OpenCompass/) (some data marked with *, which means come from the original papers), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/).
|
||||
- The evaluation results were obtained from [OpenCompass 20230706](https://github.com/internLM/OpenCompass/) (some data marked with \*, which means come from the original papers), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/).
|
||||
- The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/internLM/OpenCompass/), so please refer to the latest evaluation results of [OpenCompass](https://github.com/internLM/OpenCompass/).
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
transformers>=4.34
|
||||
sentencepiece
|
||||
transformers>=4.34
|
||||
|
|
|
@ -2,33 +2,32 @@ import pytest
|
|||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
prompts = ["你好", "what's your name"]
|
||||
prompts = ['你好', "what's your name"]
|
||||
|
||||
|
||||
def assert_model(response):
|
||||
assert len(response) != 0
|
||||
assert "UNUSED_TOKEN" not in response
|
||||
assert 'UNUSED_TOKEN' not in response
|
||||
|
||||
|
||||
class TestChat:
|
||||
"""
|
||||
Test cases for chat model.
|
||||
"""
|
||||
"""Test cases for chat model."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
'model_name',
|
||||
[
|
||||
"internlm/internlm2-chat-7b",
|
||||
"internlm/internlm2-chat-7b-sft",
|
||||
'internlm/internlm2-chat-7b',
|
||||
'internlm/internlm2-chat-7b-sft',
|
||||
],
|
||||
)
|
||||
def test_demo_default(self, model_name):
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
||||
trust_remote_code=True)
|
||||
# Set `torch_dtype=torch.float16` to load model in float16, otherwise
|
||||
# it will be loaded as float32 and might cause OOM Error.
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name, torch_dtype=torch.float16, trust_remote_code=True
|
||||
).cuda()
|
||||
model_name, torch_dtype=torch.float16,
|
||||
trust_remote_code=True).cuda()
|
||||
model = model.eval()
|
||||
for prompt in prompts:
|
||||
response, history = model.chat(tokenizer, prompt, history=[])
|
||||
|
@ -37,43 +36,45 @@ class TestChat:
|
|||
|
||||
for prompt in prompts:
|
||||
length = 0
|
||||
for response, history in model.stream_chat(tokenizer, prompt, history=[]):
|
||||
print(response[length:], flush=True, end="")
|
||||
for response, history in model.stream_chat(tokenizer,
|
||||
prompt,
|
||||
history=[]):
|
||||
print(response[length:], flush=True, end='')
|
||||
length = len(response)
|
||||
assert_model(response)
|
||||
|
||||
|
||||
class TestBase:
|
||||
"""
|
||||
Test cases for base model.
|
||||
"""
|
||||
"""Test cases for base model."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_name",
|
||||
'model_name',
|
||||
[
|
||||
"internlm/internlm2-7b",
|
||||
"internlm/internlm2-base-7b",
|
||||
'internlm/internlm2-7b',
|
||||
'internlm/internlm2-base-7b',
|
||||
],
|
||||
)
|
||||
def test_demo_default(self, model_name):
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
||||
trust_remote_code=True)
|
||||
# Set `torch_dtype=torch.float16` to load model in float16, otherwise
|
||||
# it will be loaded as float32 and might cause OOM Error.
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_name, torch_dtype=torch.float16, trust_remote_code=True
|
||||
).cuda()
|
||||
model_name, torch_dtype=torch.float16,
|
||||
trust_remote_code=True).cuda()
|
||||
for prompt in prompts:
|
||||
inputs = tokenizer(prompt, return_tensors="pt")
|
||||
inputs = tokenizer(prompt, return_tensors='pt')
|
||||
for k, v in inputs.items():
|
||||
inputs[k] = v.cuda()
|
||||
gen_kwargs = {
|
||||
"max_length": 128,
|
||||
"top_p": 10,
|
||||
"temperature": 1.0,
|
||||
"do_sample": True,
|
||||
"repetition_penalty": 1.0,
|
||||
'max_length': 128,
|
||||
'top_p': 10,
|
||||
'temperature': 1.0,
|
||||
'do_sample': True,
|
||||
'repetition_penalty': 1.0,
|
||||
}
|
||||
output = model.generate(**inputs, **gen_kwargs)
|
||||
output = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
|
||||
output = tokenizer.decode(output[0].tolist(),
|
||||
skip_special_tokens=True)
|
||||
print(output)
|
||||
assert_model(output)
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
We offer the `convert2llama.py`, designed to seamlessly transform InternLM2 (HF format) into LLaMA (HF format). Here, HF refers to the format used by HuggingFace Transformers.
|
||||
|
||||
### Usage
|
||||
|
||||
```
|
||||
python convert2llama.py --src /path/to/internlm2/ckpt --tgt /path/to/target/ckpt
|
||||
```
|
||||
|
|
|
@ -12,18 +12,18 @@ from transformers import AutoConfig, LlamaConfig, LlamaTokenizer
|
|||
def save_conifg(config, tgt):
|
||||
config_dict = config.to_dict()
|
||||
unnecessary_keys = [
|
||||
"_name_or_path",
|
||||
"auto_map",
|
||||
"transformers_version",
|
||||
"model_type",
|
||||
"architectures",
|
||||
"tokenizer_class",
|
||||
"attn_implementation",
|
||||
'_name_or_path',
|
||||
'auto_map',
|
||||
'transformers_version',
|
||||
'model_type',
|
||||
'architectures',
|
||||
'tokenizer_class',
|
||||
'attn_implementation',
|
||||
]
|
||||
for k in unnecessary_keys:
|
||||
config_dict.pop(k, None)
|
||||
config_dict["attention_bias"] = config_dict.pop("bias")
|
||||
config_dict["architectures"] = ["LlamaForCausalLM"]
|
||||
config_dict['attention_bias'] = config_dict.pop('bias')
|
||||
config_dict['architectures'] = ['LlamaForCausalLM']
|
||||
llama_config = LlamaConfig(**config_dict)
|
||||
llama_config.save_pretrained(tgt)
|
||||
|
||||
|
@ -31,106 +31,109 @@ def save_conifg(config, tgt):
|
|||
def convert(src, tgt):
|
||||
"""Convert InternLM2 huggingface checkpoints to Llama-style."""
|
||||
|
||||
print("Convert InternLM2 huggingface checkpoints to Llama...")
|
||||
print('Convert InternLM2 huggingface checkpoints to Llama...')
|
||||
|
||||
config = AutoConfig.from_pretrained(src, trust_remote_code=True)
|
||||
assert not config.bias, "Cannot convert InternLM Model with bias to LLaMA."
|
||||
assert not config.bias, 'Cannot convert InternLM Model with bias to LLaMA.'
|
||||
|
||||
head_dim = config.hidden_size // config.num_attention_heads
|
||||
num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
|
||||
num_key_value_groups = config.num_attention_heads \
|
||||
// config.num_key_value_heads
|
||||
|
||||
# load index json file
|
||||
index_file = os.path.join(src, "pytorch_model.bin.index.json")
|
||||
index_file = os.path.join(src, 'pytorch_model.bin.index.json')
|
||||
if os.path.exists(index_file):
|
||||
with open(index_file) as fp:
|
||||
index_dict = json.load(fp)
|
||||
index_dict["weight_map"] = {}
|
||||
index_dict['weight_map'] = {}
|
||||
else:
|
||||
index_dict = None
|
||||
|
||||
os.makedirs(tgt, exist_ok=True)
|
||||
for filename in tqdm(os.listdir(src)):
|
||||
if not filename.endswith(".bin"):
|
||||
if not filename.endswith('.bin'):
|
||||
continue
|
||||
states = torch.load(os.path.join(src, filename))
|
||||
llama_states = {}
|
||||
for k, v in states.copy().items():
|
||||
if "wqkv" in k:
|
||||
if 'wqkv' in k:
|
||||
v = rearrange(
|
||||
v,
|
||||
"(h gs d) dim -> h gs d dim",
|
||||
'(h gs d) dim -> h gs d dim',
|
||||
gs=2 + num_key_value_groups,
|
||||
d=head_dim,
|
||||
)
|
||||
wq, wk, wv = torch.split(v, [num_key_value_groups, 1, 1], dim=1)
|
||||
wq = rearrange(wq, "h gs d dim -> (h gs d) dim")
|
||||
wk = rearrange(wk, "h gs d dim -> (h gs d) dim")
|
||||
wv = rearrange(wv, "h gs d dim -> (h gs d) dim")
|
||||
_prefix = k.split("attention")[0]
|
||||
wq_key = _prefix + "self_attn.q_proj.weight"
|
||||
wk_key = _prefix + "self_attn.k_proj.weight"
|
||||
wv_key = _prefix + "self_attn.v_proj.weight"
|
||||
wq, wk, wv = torch.split(v, [num_key_value_groups, 1, 1],
|
||||
dim=1)
|
||||
wq = rearrange(wq, 'h gs d dim -> (h gs d) dim')
|
||||
wk = rearrange(wk, 'h gs d dim -> (h gs d) dim')
|
||||
wv = rearrange(wv, 'h gs d dim -> (h gs d) dim')
|
||||
_prefix = k.split('attention')[0]
|
||||
wq_key = _prefix + 'self_attn.q_proj.weight'
|
||||
wk_key = _prefix + 'self_attn.k_proj.weight'
|
||||
wv_key = _prefix + 'self_attn.v_proj.weight'
|
||||
llama_states[wq_key] = wq.clone()
|
||||
llama_states[wk_key] = wk.clone()
|
||||
llama_states[wv_key] = wv.clone()
|
||||
|
||||
elif "attention.wo" in k:
|
||||
new_k = k.replace("attention.wo", "self_attn.o_proj")
|
||||
elif 'attention.wo' in k:
|
||||
new_k = k.replace('attention.wo', 'self_attn.o_proj')
|
||||
llama_states[new_k] = v
|
||||
elif "feed_forward.w1" in k:
|
||||
new_k = k.replace("feed_forward.w1", "mlp.gate_proj")
|
||||
elif 'feed_forward.w1' in k:
|
||||
new_k = k.replace('feed_forward.w1', 'mlp.gate_proj')
|
||||
llama_states[new_k] = v
|
||||
elif "feed_forward.w2" in k:
|
||||
new_k = k.replace("feed_forward.w2", "mlp.down_proj")
|
||||
elif 'feed_forward.w2' in k:
|
||||
new_k = k.replace('feed_forward.w2', 'mlp.down_proj')
|
||||
llama_states[new_k] = v
|
||||
elif "feed_forward.w3" in k:
|
||||
new_k = k.replace("feed_forward.w3", "mlp.up_proj")
|
||||
elif 'feed_forward.w3' in k:
|
||||
new_k = k.replace('feed_forward.w3', 'mlp.up_proj')
|
||||
llama_states[new_k] = v
|
||||
elif "attention_norm" in k:
|
||||
new_k = k.replace("attention_norm", "input_layernorm")
|
||||
elif 'attention_norm' in k:
|
||||
new_k = k.replace('attention_norm', 'input_layernorm')
|
||||
llama_states[new_k] = v
|
||||
elif "ffn_norm" in k:
|
||||
new_k = k.replace("ffn_norm", "post_attention_layernorm")
|
||||
elif 'ffn_norm' in k:
|
||||
new_k = k.replace('ffn_norm', 'post_attention_layernorm')
|
||||
llama_states[new_k] = v
|
||||
elif "tok_embeddings" in k:
|
||||
llama_states["model.embed_tokens.weight"] = v
|
||||
elif "output" in k:
|
||||
llama_states["lm_head.weight"] = v
|
||||
elif 'tok_embeddings' in k:
|
||||
llama_states['model.embed_tokens.weight'] = v
|
||||
elif 'output' in k:
|
||||
llama_states['lm_head.weight'] = v
|
||||
else:
|
||||
llama_states[k] = v
|
||||
|
||||
if index_dict is not None:
|
||||
for k in llama_states:
|
||||
index_dict["weight_map"][k] = filename
|
||||
index_dict['weight_map'][k] = filename
|
||||
print(f"Saving to {os.path.join(tgt, filename)}...", flush=True)
|
||||
torch.save(llama_states, os.path.join(tgt, filename))
|
||||
del states
|
||||
|
||||
print("Saving config and tokenizer...")
|
||||
print('Saving config and tokenizer...')
|
||||
# index.json
|
||||
if index_dict is not None:
|
||||
with open(os.path.join(tgt, "pytorch_model.bin.index.json"), "w") as fp:
|
||||
with open(os.path.join(tgt, 'pytorch_model.bin.index.json'),
|
||||
'w') as fp:
|
||||
json.dump(index_dict, fp, indent=2)
|
||||
# tokenizer
|
||||
tokenizer = LlamaTokenizer.from_pretrained(src)
|
||||
tokenizer.init_kwargs.pop("auto_map", None)
|
||||
tokenizer.init_kwargs.pop('auto_map', None)
|
||||
tokenizer.save_pretrained(tgt)
|
||||
# config
|
||||
save_conifg(config, tgt)
|
||||
print("Done!")
|
||||
print('Done!')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--src", type=str, help="Input folder")
|
||||
parser.add_argument("--tgt", type=str, help="Output folder")
|
||||
parser.add_argument('--src', type=str, help='Input folder')
|
||||
parser.add_argument('--tgt', type=str, help='Output folder')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
|
||||
convert(args.src, args.tgt)
|
||||
|
|
Loading…
Reference in New Issue