diff --git a/.github/workflows/daily_tests.yaml b/.github/workflows/daily_tests.yaml
index 2bc64dd..d088c96 100644
--- a/.github/workflows/daily_tests.yaml
+++ b/.github/workflows/daily_tests.yaml
@@ -26,8 +26,8 @@ jobs:
         pip install transformers
         pip install sentencepiece
         srun -p ${SLURM_PARTITION} --kill-on-bad-exit=1 --job-name=${GITHUB_RUN_ID}-${GITHUB_JOB} --gpus-per-task=2 pytest -s -v --color=yes ./tests/test_hf_model.py
-        conda deactivate 
-    
+        conda deactivate
+
   clear_env:
       if: ${{ !cancelled() }}
       needs: [HF_model]
diff --git a/.github/workflows/lint_check.yaml b/.github/workflows/lint_check.yaml
index e661e80..ed042f4 100644
--- a/.github/workflows/lint_check.yaml
+++ b/.github/workflows/lint_check.yaml
@@ -24,15 +24,3 @@ jobs:
         run: |
           pip install isort==5.12.0
           isort --check --profile=black .
-
-      - name: lint-black
-        run: |
-          pip install black==22.8.0
-          BLACK_EXCLUDE_SETTINGS='\.venv/|\.local/|\.cache/|\.git/'
-          black --line-length=120 --check --exclude $BLACK_EXCLUDE_SETTINGS ./chat/web_demo.py
-
-      - name: lint-pylint
-        run: |
-          pip install pylint==v2.17.2
-          PYLINT_DISABLE_LIST="C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203"
-          pylint --rcfile .pylintrc --disable=$PYLINT_DISABLE_LIST ./chat/web_demo.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8a43efd..6f29f08 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,53 +1,44 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
 repos:
--   repo: https://github.com/psf/black
-    rev: '22.8.0'
+  - repo: https://github.com/PyCQA/flake8
+    rev: 5.0.4
     hooks:
-    -   id: black
-        args:
-        - --line-length=120
--   repo: https://github.com/pycqa/isort
-    rev: '5.12.0'
+      - id: flake8
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.11.5
     hooks:
-    -   id: isort
-        name: isort
-        files: "\\.(py)$"
-        args:
-        - --profile=black
--   repo: https://github.com/PyCQA/flake8
-    rev: '3.8.4'
+      - id: isort
+  - repo: https://github.com/pre-commit/mirrors-yapf
+    rev: v0.32.0
     hooks:
-    -   id: flake8
-        args:
-        - --ignore=F403,F405,W504,W503,E203
-        - --max-line-length=120
--   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.9.0
+      - id: yapf
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.1
     hooks:
-    -   id: python-check-blanket-noqa
--   repo: https://github.com/pre-commit/pre-commit-hooks
+      - id: codespell
+  - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.3.0
     hooks:
-    -   id: trailing-whitespace
-    -   id: end-of-file-fixer
-    -   id: check-added-large-files
-        args: ['--maxkb=100',--enforce-all]
-    -   id: check-json
-    -   id: check-docstring-first
-    -   id: check-yaml
-    -   id: debug-statements
-    -   id: mixed-line-ending
--   repo: https://github.com/PyCQA/pylint/
-    rev: v2.17.2
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: ["--remove"]
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.9
     hooks:
-    -   id: pylint
-        name: pylint
-        entry: pylint
-        language: system
-        types: [python]
-        args:
-            [
-                '--rcfile=.pylintrc',
-                '--disable=C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203'
-            ]
+      - id: mdformat
+        args: ["--number", "--table-width", "200"]
+        additional_dependencies:
+          - mdformat-openmmlab
+          - mdformat_frontmatter
+          - linkify-it-py
+  - repo: https://github.com/myint/docformatter
+    rev: v1.3.1
+    hooks:
+      - id: docformatter
+        args: ["--in-place", "--wrap-descriptions", "79"]
diff --git a/README.md b/README.md
index da3ed09..bfb84d0 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,9 @@
 
 [![license](./assets/license.svg)](./LICENSE)
 [![evaluation](./assets/compass_support.svg)](https://github.com/internLM/OpenCompass/)
+
 <!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) -->
+
 [📘Commercial Application](#license) |
 [🤗HuggingFace](https://huggingface.co/internlm) |
 [🆕Update News](#news) |
@@ -45,26 +47,26 @@ InternLM2 series are released with the following features:
 
 ## News
 
-[2024.01.23] We release InternLM2-Math-7B and InternLM2-Math-20B with pretraining and SFT checkpoints. They surpass ChatGPT with small sizes. See [InternLM-Math](https://github.com/InternLM/internlm-math) for details and download.
+\[2024.01.23\] We release InternLM2-Math-7B and InternLM2-Math-20B with pretraining and SFT checkpoints. They surpass ChatGPT with small sizes. See [InternLM-Math](https://github.com/InternLM/internlm-math) for details and download.
 
-[2024.01.17] We release InternLM2-7B and InternLM2-20B and their corresponding chat models with stronger capabilities in all dimensions. See [model zoo below](#model-zoo) for download or [model cards](./model_cards/) for more details.
+\[2024.01.17\] We release InternLM2-7B and InternLM2-20B and their corresponding chat models with stronger capabilities in all dimensions. See [model zoo below](#model-zoo) for download or [model cards](./model_cards/) for more details.
 
-[2023.12.13] InternLM-7B-Chat and InternLM-20B-Chat checkpoints are updated. With an improved finetuning strategy, the new chat models can generate higher quality responses with greater stylistic diversity.
+\[2023.12.13\] InternLM-7B-Chat and InternLM-20B-Chat checkpoints are updated. With an improved finetuning strategy, the new chat models can generate higher quality responses with greater stylistic diversity.
 
-[2023.09.20] InternLM-20B is released with base and chat versions.
+\[2023.09.20\] InternLM-20B is released with base and chat versions.
 
 ## Model Zoo
 
-| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
-|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
-| **InternLM2-Base-7B**           | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)                     | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) |  2024-01-17   |
-| **InternLM2-7B**           | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b)                     | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original)          |  2024-01-17   |
-| **InternLM2-Chat-7B-SFT**      | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)           | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original)     | 2024-01-17  |
-| **InternLM2-Chat-7B**      | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)           | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original)     | 2024-01-17  |
-| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
-| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
-| **InternLM2-Chat-20B-SFT**     | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)         | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original)    | 2024-01-17   |
-| **InternLM2-Chat-20B**     | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)         | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original)     | 2024-01-17   |
+| Model                      | Transformers(HF)                           | ModelScope(HF)                           | OpenXLab(HF)                           | OpenXLab(Origin)                           | Release Date |
+| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
+| **InternLM2-Base-7B**      | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17   |
+| **InternLM2-7B**           | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17   |
+| **InternLM2-Chat-7B-SFT**  | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17   |
+| **InternLM2-Chat-7B**      | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17   |
+| **InternLM2-Base-20B**     | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17   |
+| **InternLM2-20B**          | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17   |
+| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17   |
+| **InternLM2-Chat-20B**     | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17   |
 
 **Notes:**
 
@@ -85,22 +87,22 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
 
 ### Objective Evaluation
 
-| Dataset                | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
-|-----------------------|-------------------|--------------------------|--------------|-------------------|-------------|---------------------|--------------------------------|---------------|---------------------|
-| MMLU                  | 50.1              | 59.2                     | 57.1         | 63.7              | 58.0        | 56.6                | 70.3                          | 66.7          | 66.5                |
-| CMMLU                 | 53.4              | 42.0                     | 57.9         | 63.0              | 57.8        | 54.8                | 50.6                          | 68.1          | 65.1                |
-| AGIEval               | 35.3              | 34.5                     | 39.7         | 47.2              | 44.2        | 40.0                | 41.7                          | 46.5          | 50.3                |
-| C-Eval                | 53.9              | 42.4                     | 59.8         | 60.8              | 59.1        | 56.3                | 54.0                          | 71.5          | 63.0                |
-| TrivialQA             | 37.6              | 35.0                     | 46.1         | 50.8              | 38.1        | 40.3                | 57.7                          | 54.5          | 53.9                |
-| NaturalQuestions      | 12.8              | 8.1                      | 18.6         | 24.1              | 14.0        | 12.7                | 22.5                          | 22.9          | 25.9                |
-| C3                    | 78.5              | 66.9                     | 84.4         | 91.5              | 79.3        | 84.4                | 82.1                          | 91.5          | 93.5                |
-| CMRC                  | 8.1               | 5.6                      | 14.6         | 63.8              | 43.2        | 27.8                | 5.3                           | 13.0          | 50.4                |
-| WinoGrande            | 49.9              | 50.8                     | 54.2         | 65.8              | 61.7        | 50.9                | 60.9                          | 55.7          | 74.8                |
-| BBH                   | 35.9              | 46.5                     | 45.5         | 61.2              | 56.0        | 42.5                | 57.3                          | 55.8          | 68.3                |
-| GSM-8K                | 32.4              | 48.3                     | 44.1         | 70.7              | 53.8        | 56.0                | 71.7                          | 57.7          | 79.6                |
-| Math                  | 5.7               | 8.6                      | 12.0         | 23.0              | 20.4        | 4.3                 | 22.5                          | 27.6          | 31.9                |
-| HumanEval              | 17.7              | 35.4                     | 36.0         | 59.8              | 52.4        | 19.5                | 37.8                          | 40.9          | 67.1                |
-| MBPP                  | 37.7              | 25.7                     | 33.9         | 51.4              | 55.6        | 40.9                | 40.9                          | 30.0          | 65.8                |
+| Dataset          | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
+| ---------------- | ----------------- | ------------------------ | ------------ | ----------------- | ----------- | ------------------ | -------------------------- | ------------- | ------------------ |
+| MMLU             | 50.1              | 59.2                     | 57.1         | 63.7              | 58.0        | 56.6               | 70.3                       | 66.7          | 66.5               |
+| CMMLU            | 53.4              | 42.0                     | 57.9         | 63.0              | 57.8        | 54.8               | 50.6                       | 68.1          | 65.1               |
+| AGIEval          | 35.3              | 34.5                     | 39.7         | 47.2              | 44.2        | 40.0               | 41.7                       | 46.5          | 50.3               |
+| C-Eval           | 53.9              | 42.4                     | 59.8         | 60.8              | 59.1        | 56.3               | 54.0                       | 71.5          | 63.0               |
+| TrivialQA        | 37.6              | 35.0                     | 46.1         | 50.8              | 38.1        | 40.3               | 57.7                       | 54.5          | 53.9               |
+| NaturalQuestions | 12.8              | 8.1                      | 18.6         | 24.1              | 14.0        | 12.7               | 22.5                       | 22.9          | 25.9               |
+| C3               | 78.5              | 66.9                     | 84.4         | 91.5              | 79.3        | 84.4               | 82.1                       | 91.5          | 93.5               |
+| CMRC             | 8.1               | 5.6                      | 14.6         | 63.8              | 43.2        | 27.8               | 5.3                        | 13.0          | 50.4               |
+| WinoGrande       | 49.9              | 50.8                     | 54.2         | 65.8              | 61.7        | 50.9               | 60.9                       | 55.7          | 74.8               |
+| BBH              | 35.9              | 46.5                     | 45.5         | 61.2              | 56.0        | 42.5               | 57.3                       | 55.8          | 68.3               |
+| GSM-8K           | 32.4              | 48.3                     | 44.1         | 70.7              | 53.8        | 56.0               | 71.7                       | 57.7          | 79.6               |
+| Math             | 5.7               | 8.6                      | 12.0         | 23.0              | 20.4        | 4.3                | 22.5                       | 27.6          | 31.9               |
+| HumanEval        | 17.7              | 35.4                     | 36.0         | 59.8              | 52.4        | 19.5               | 37.8                       | 40.9          | 67.1               |
+| MBPP             | 37.7              | 25.7                     | 33.9         | 51.4              | 55.6        | 40.9               | 40.9                       | 30.0          | 65.8               |
 
 - Performance of MBPP is reported with MBPP(Sanitized)
 
@@ -108,16 +110,16 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
 
 - We have evaluated our model on [AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) and InternLM2-Chat-20B surpass Claude 2, GPT-4(0613) and Gemini Pro.
 
-| Model Name              | Win Rate | Length |
-| ----------------------- | -------- | ------ |
-| GPT-4 Turbo      | 50.00%   | 2049   |
-| GPT-4         | 23.58%   | 1365   |
-| GPT-4 0314             | 22.07%   | 1371   |
-| Mistral Medium      | 21.86%   | 1500   |
-| XwinLM 70b V0.1   | 21.81%   | 1775   |
-| InternLM2 Chat 20B  | 21.75%   | 2373   |
+| Model Name         | Win Rate | Length |
+| ------------------ | -------- | ------ |
+| GPT-4 Turbo        | 50.00%   | 2049   |
+| GPT-4              | 23.58%   | 1365   |
+| GPT-4 0314         | 22.07%   | 1371   |
+| Mistral Medium     | 21.86%   | 1500   |
+| XwinLM 70b V0.1    | 21.81%   | 1775   |
+| InternLM2 Chat 20B | 21.75%   | 2373   |
 | Mixtral 8x7B v0.1  | 18.26%   | 1465   |
-| Claude 2            | 17.19%   | 1069   |
+| Claude 2           | 17.19%   | 1069   |
 | Gemini Pro         | 16.85%   | 1315   |
 | GPT-4 0613         | 15.76%   | 1140   |
 | Claude 2.1         | 15.73%   | 1096   |
@@ -129,9 +131,11 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models
 We briefly show the usages with [Transformers](#import-from-transformers), [ModelScope](#import-from-modelscope), and [Web demos](#dialogue).
 The chat models adopt [chatml format](./chat/chat_format.md) to support both chat and agent applications.
 To ensure a better usage effect, please make sure that the installed transformers library version meets the following requirements before performing inference with [Transformers](#import-from-transformers) or [ModelScope](#import-from-modelscope):
+
 ```
 transformers >= 4.34
 ```
+
 ### Import from Transformers
 
 To load the InternLM2-7B-Chat model using Transformers, use the following code:
@@ -143,7 +147,7 @@ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_re
 # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
 model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
 # (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
-  # InternLM 7B in 4bit will cost nearly 8GB GPU memory. 
+  # InternLM 7B in 4bit will cost nearly 8GB GPU memory.
   # pip install -U bitsandbytes
   # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
   # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
@@ -167,7 +171,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto", trust_re
 # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error.
 model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
 # (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes.
-  # InternLM 7B in 4bit will cost nearly 8GB GPU memory. 
+  # InternLM 7B in 4bit will cost nearly 8GB GPU memory.
   # pip install -U bitsandbytes
   # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
   # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 0eb6942..49cf811 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -16,6 +16,7 @@
 
 [![license](./assets//license.svg)](https://github.com/open-mmlab/mmdetection/blob/main/LICENSE)
 [![evaluation](./assets//compass_support.svg)](https://github.com/internLM/OpenCompass/)
+
 <!-- [![Documentation Status](https://readthedocs.org/projects/internlm/badge/?version=latest)](https://internlm.readthedocs.io/zh_CN/latest/?badge=latest) -->
 
 [📘商业授权](#开源许可证) |
@@ -43,26 +44,26 @@ InternLM2 系列模型在本仓库正式发布，具有如下特性：
 
 ## 更新
 
-[2024.01.23] 我们发布了 InternLM2-Math-7B 和 InternLM2-Math-20B 以及相关的对话模型。InternLM-Math以较小的尺寸超过了ChatGPT的表现。可以点击[InternLM-Math](https://github.com/InternLM/internlm-math)进行下载，并了解详情。
+\[2024.01.23\] 我们发布了 InternLM2-Math-7B 和 InternLM2-Math-20B 以及相关的对话模型。InternLM-Math以较小的尺寸超过了ChatGPT的表现。可以点击[InternLM-Math](https://github.com/InternLM/internlm-math)进行下载，并了解详情。
 
-[2024.01.17] 我们发布了 InternLM2-7B 和 InternLM2-20B 以及相关的对话模型，InternLM2 在数理、代码、对话、创作等各方面能力都获得了长足进步，综合性能达到开源模型的领先水平。可以点击[下面的模型库](#model-zoo)进行下载或者[查看模型文档](./model_cards/)来了解更多细节.
+\[2024.01.17\] 我们发布了 InternLM2-7B 和 InternLM2-20B 以及相关的对话模型，InternLM2 在数理、代码、对话、创作等各方面能力都获得了长足进步，综合性能达到开源模型的领先水平。可以点击[下面的模型库](#model-zoo)进行下载或者[查看模型文档](./model_cards/)来了解更多细节.
 
-[2023.12.13] 我们更新了 InternLM-7B-Chat 和 InternLM-20B-Chat 模型权重。通过改进微调数据和训练策略，新版对话模型生成的回复质量更高、语言风格更加多元。
+\[2023.12.13\] 我们更新了 InternLM-7B-Chat 和 InternLM-20B-Chat 模型权重。通过改进微调数据和训练策略，新版对话模型生成的回复质量更高、语言风格更加多元。
 
-[2023.09.20] InternLM-20B 已发布，包括基础版和对话版。
+\[2023.09.20\] InternLM-20B 已发布，包括基础版和对话版。
 
 ## Model Zoo
 
-| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
-|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
-| **InternLM2-Base-7B**           | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)                     | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) |  2024-01-17   |
-| **InternLM2-7B**           | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b)                     | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original)          |  2024-01-17   |
-| **InternLM2-Chat-7B-SFT**      | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)           | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original)     | 2024-01-17  |
-| **InternLM2-Chat-7B**      | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)           | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original)     | 2024-01-17  |
-| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
-| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
-| **InternLM2-Chat-20B-SFT**     | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)         | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original)    | 2024-01-17   |
-| **InternLM2-Chat-20B**     | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)         | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original)     | 2024-01-17   |
+| Model                      | Transformers(HF)                           | ModelScope(HF)                           | OpenXLab(HF)                           | OpenXLab(Origin)                           | Release Date |
+| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
+| **InternLM2-Base-7B**      | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17   |
+| **InternLM2-7B**           | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17   |
+| **InternLM2-Chat-7B-SFT**  | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17   |
+| **InternLM2-Chat-7B**      | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17   |
+| **InternLM2-Base-20B**     | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17   |
+| **InternLM2-20B**          | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17   |
+| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17   |
+| **InternLM2-Chat-20B**     | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="./assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17   |
 
 **模型说明：**
 
@@ -83,22 +84,22 @@ InternLM2 系列模型在本仓库正式发布，具有如下特性：
 
 ### 客观评测
 
-| Dataset                | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
-|-----------------------|-------------------|--------------------------|--------------|-------------------|-------------|---------------------|--------------------------------|---------------|---------------------|
-| MMLU                  | 50.1              | 59.2                     | 57.1         | 63.7              | 58.0        | 56.6                | 70.3                          | 66.7          | 66.5                |
-| CMMLU                 | 53.4              | 42.0                     | 57.9         | 63.0              | 57.8        | 54.8                | 50.6                          | 68.1          | 65.1                |
-| AGIEval               | 35.3              | 34.5                     | 39.7         | 47.2              | 44.2        | 40.0                | 41.7                          | 46.5          | 50.3                |
-| C-Eval                | 53.9              | 42.4                     | 59.8         | 60.8              | 59.1        | 56.3                | 54.0                          | 71.5          | 63.0                |
-| TrivialQA             | 37.6              | 35.0                     | 46.1         | 50.8              | 38.1        | 40.3                | 57.7                          | 54.5          | 53.9                |
-| NaturalQuestions      | 12.8              | 8.1                      | 18.6         | 24.1              | 14.0        | 12.7                | 22.5                          | 22.9          | 25.9                |
-| C3                    | 78.5              | 66.9                     | 84.4         | 91.5              | 79.3        | 84.4                | 82.1                          | 91.5          | 93.5                |
-| CMRC                  | 8.1               | 5.6                      | 14.6         | 63.8              | 43.2        | 27.8                | 5.3                           | 13.0          | 50.4                |
-| WinoGrande            | 49.9              | 50.8                     | 54.2         | 65.8              | 61.7        | 50.9                | 60.9                          | 55.7          | 74.8                |
-| BBH                   | 35.9              | 46.5                     | 45.5         | 61.2              | 56.0        | 42.5                | 57.3                          | 55.8          | 68.3                |
-| GSM-8K                | 32.4              | 48.3                     | 44.1         | 70.7              | 53.8        | 56.0                | 71.7                          | 57.7          | 79.6                |
-| Math                  | 5.7               | 8.6                      | 12.0         | 23.0              | 20.4        | 4.3                 | 22.5                          | 27.6          | 31.9                |
-| HumanEval              | 17.7              | 35.4                     | 36.0         | 59.8              | 52.4        | 19.5                | 37.8                          | 40.9          | 67.1                |
-| MBPP                  | 37.7              | 25.7                     | 33.9         | 51.4              | 55.6        | 40.9                | 40.9                          | 30.0          | 65.8                |
+| Dataset          | Baichuan2-7B-Chat | Mistral-7B-Instruct-v0.2 | Qwen-7B-Chat | InternLM2-Chat-7B | ChatGLM3-6B | Baichuan2-13B-Chat | Mixtral-8x7B-Instruct-v0.1 | Qwen-14B-Chat | InternLM2-Chat-20B |
+| ---------------- | ----------------- | ------------------------ | ------------ | ----------------- | ----------- | ------------------ | -------------------------- | ------------- | ------------------ |
+| MMLU             | 50.1              | 59.2                     | 57.1         | 63.7              | 58.0        | 56.6               | 70.3                       | 66.7          | 66.5               |
+| CMMLU            | 53.4              | 42.0                     | 57.9         | 63.0              | 57.8        | 54.8               | 50.6                       | 68.1          | 65.1               |
+| AGIEval          | 35.3              | 34.5                     | 39.7         | 47.2              | 44.2        | 40.0               | 41.7                       | 46.5          | 50.3               |
+| C-Eval           | 53.9              | 42.4                     | 59.8         | 60.8              | 59.1        | 56.3               | 54.0                       | 71.5          | 63.0               |
+| TrivialQA        | 37.6              | 35.0                     | 46.1         | 50.8              | 38.1        | 40.3               | 57.7                       | 54.5          | 53.9               |
+| NaturalQuestions | 12.8              | 8.1                      | 18.6         | 24.1              | 14.0        | 12.7               | 22.5                       | 22.9          | 25.9               |
+| C3               | 78.5              | 66.9                     | 84.4         | 91.5              | 79.3        | 84.4               | 82.1                       | 91.5          | 93.5               |
+| CMRC             | 8.1               | 5.6                      | 14.6         | 63.8              | 43.2        | 27.8               | 5.3                        | 13.0          | 50.4               |
+| WinoGrande       | 49.9              | 50.8                     | 54.2         | 65.8              | 61.7        | 50.9               | 60.9                       | 55.7          | 74.8               |
+| BBH              | 35.9              | 46.5                     | 45.5         | 61.2              | 56.0        | 42.5               | 57.3                       | 55.8          | 68.3               |
+| GSM-8K           | 32.4              | 48.3                     | 44.1         | 70.7              | 53.8        | 56.0               | 71.7                       | 57.7          | 79.6               |
+| Math             | 5.7               | 8.6                      | 12.0         | 23.0              | 20.4        | 4.3                | 22.5                       | 27.6          | 31.9               |
+| HumanEval        | 17.7              | 35.4                     | 36.0         | 59.8              | 52.4        | 19.5               | 37.8                       | 40.9          | 67.1               |
+| MBPP             | 37.7              | 25.7                     | 33.9         | 51.4              | 55.6        | 40.9               | 40.9                       | 30.0          | 65.8               |
 
 - MBPP性能使用的是MBPP(Sanitized)版本数据集
 
@@ -106,16 +107,16 @@ InternLM2 系列模型在本仓库正式发布，具有如下特性：
 
 - 我们评测了InternLM2-Chat在[AlpacaEval 2.0](https://tatsu-lab.github.io/alpaca_eval/) 上的性能，结果表明InternLM2-Chat在AlpacaEval上已经超过了 Claude 2, GPT-4(0613) 和  Gemini Pro.
 
-| Model Name              | Win Rate | Length |
-| ----------------------- | -------- | ------ |
-| GPT-4 Turbo      | 50.00%   | 2049   |
-| GPT-4         | 23.58%   | 1365   |
-| GPT-4 0314             | 22.07%   | 1371   |
-| Mistral Medium      | 21.86%   | 1500   |
-| XwinLM 70b V0.1   | 21.81%   | 1775   |
-| InternLM2 Chat 20B  | 21.75%   | 2373   |
+| Model Name         | Win Rate | Length |
+| ------------------ | -------- | ------ |
+| GPT-4 Turbo        | 50.00%   | 2049   |
+| GPT-4              | 23.58%   | 1365   |
+| GPT-4 0314         | 22.07%   | 1371   |
+| Mistral Medium     | 21.86%   | 1500   |
+| XwinLM 70b V0.1    | 21.81%   | 1775   |
+| InternLM2 Chat 20B | 21.75%   | 2373   |
 | Mixtral 8x7B v0.1  | 18.26%   | 1465   |
-| Claude 2            | 17.19%   | 1069   |
+| Claude 2           | 17.19%   | 1069   |
 | Gemini Pro         | 16.85%   | 1315   |
 | GPT-4 0613         | 15.76%   | 1140   |
 | Claude 2.1         | 15.73%   | 1096   |
@@ -127,9 +128,11 @@ InternLM2 系列模型在本仓库正式发布，具有如下特性：
 接下来我们展示使用 [Transformers](#import-from-transformers)，[ModelScope](#import-from-modelscope) 和 [Web demo](#dialogue) 进行推理。
 对话模型采用了 [chatml 格式](./chat/chat_format.md) 来支持通用对话和智能体应用。
 为了保障更好的使用效果，在用 [Transformers](#import-from-transformers) 或 [ModelScope](#import-from-modelscope) 进行推理前，请确保安装的 transformers 库版本满足以下要求：
+
 ```
 transformers >= 4.34
 ```
+
 ### 通过 Transformers 加载
 
 通过以下的代码从 Transformers 加载 InternLM2-7B-Chat 模型 （可修改模型名称替换不同的模型）
@@ -141,7 +144,7 @@ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_re
 # 设置`torch_dtype=torch.float16`来将模型精度指定为torch.float16，否则可能会因为您的硬件原因造成显存不足的问题。
 model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto",trust_remote_code=True, torch_dtype=torch.float16)
 # (可选) 如果在低资源设备上，可以通过bitsandbytes加载4-bit或8-bit量化的模型，进一步节省GPU显存.
-  # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. 
+  # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存.
   # pip install -U bitsandbytes
   # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
   # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
@@ -164,7 +167,7 @@ model_dir = snapshot_download('Shanghai_AI_Laboratory/internlm2-chat-7b')
 tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16)
 # (可选) 如果在低资源设备上，可以通过bitsandbytes加载4-bit或8-bit量化的模型，进一步节省GPU显存.
-  # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. 
+  # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存.
   # pip install -U bitsandbytes
   # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True)
   # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True)
diff --git a/agent/README.md b/agent/README.md
index 370415b..693c841 100644
--- a/agent/README.md
+++ b/agent/README.md
@@ -4,18 +4,18 @@ English | [简体中文](README_zh-CN.md)
 
 ## Introduction
 
-InternLM-Chat-7B v1.1 has been released as the first open-source model with code interpreter capabilities, supportting external tools such as Python code interpreter and search engine.
+InternLM-Chat-7B v1.1 has been released as the first open-source model with code interpreter capabilities, supporting external tools such as Python code interpreter and search engine.
 
 InternLM2-Chat, open sourced on January 17, 2024, further enhances its capabilities in code interpreter and general tool utilization. With improved and more generalized instruction understanding, tool selection, and reflection abilities, InternLM2-Chat can more reliably support complex agents and multi-step tool calling for more intricate tasks. InternLM2-Chat exhibits decent computational and reasoning abilities even without external tools, surpassing ChatGPT in mathematical performance. When combined with a code interpreter, InternLM2-Chat-20B obtains comparable results to GPT-4 on GSM8K and MATH. Leveraging strong foundational capabilities in mathematics and tools, InternLM2-Chat provides practical data analysis capabilities.
 
 The results of InternLM2-Chat-20B on math code interpreter is as below:
 
-|       | GSM8K | MATH |
-| :---: | :---: | :--: |
-| InternLM2-Chat-20B | 79.6 | 32.5 |
-| InternLM2-Chat-20B with Code Interpreter  | 84.5 | 51.2 |
-| ChatGPT (GPT-3.5) | 78.2 | 28.0 |
-| GPT-4 | 91.4 | 45.8 |
+|                                          | GSM8K | MATH |
+| :--------------------------------------: | :---: | :--: |
+|            InternLM2-Chat-20B            | 79.6  | 32.5 |
+| InternLM2-Chat-20B with Code Interpreter | 84.5  | 51.2 |
+|            ChatGPT (GPT-3.5)             | 78.2  | 28.0 |
+|                  GPT-4                   | 91.4  | 45.8 |
 
 ## Usages
 
diff --git a/agent/README_zh-CN.md b/agent/README_zh-CN.md
index 7f8a240..3b198b1 100644
--- a/agent/README_zh-CN.md
+++ b/agent/README_zh-CN.md
@@ -10,12 +10,12 @@ InternLM2-Chat 进一步提高了它在代码解释和通用工具调用方面
 
 以下是 InternLM2-Chat-20B 在数学代码解释器上的结果。
 
-|       | GSM8K | MATH |
-| :---: | :---: | :--: |
-| InternLM2-Chat-20B 单纯依靠内在能力 | 79.6 | 32.5 |
-| InternLM2-Chat-20B 配合代码解释器  | 84.5 | 51.2 |
-| ChatGPT (GPT-3.5) | 78.2 | 28.0 |
-| GPT-4 | 91.4 | 45.8 |
+|                                     | GSM8K | MATH |
+| :---------------------------------: | :---: | :--: |
+| InternLM2-Chat-20B 单纯依靠内在能力 | 79.6  | 32.5 |
+|  InternLM2-Chat-20B 配合代码解释器  | 84.5  | 51.2 |
+|          ChatGPT (GPT-3.5)          | 78.2  | 28.0 |
+|                GPT-4                | 91.4  | 45.8 |
 
 ## 体验
 
diff --git a/agent/lagent_zh-CN.md b/agent/lagent_zh-CN.md
index 0365969..141a782 100644
--- a/agent/lagent_zh-CN.md
+++ b/agent/lagent_zh-CN.md
@@ -40,7 +40,7 @@ streamlit run examples/react_web_demo.py
 
 ## 用 InternLM-Chat 构建一个 ReAct 智能体
 
-**注意：**如果你想要启动一个 HuggingFace 的模型，请先运行 pip install -e .[all]。
+\*\*注意：\*\*如果你想要启动一个 HuggingFace 的模型，请先运行 pip install -e .\[all\]。
 
 ```python
 # Import necessary modules and classes from the "lagent" library.
diff --git a/agent/pal_inference.md b/agent/pal_inference.md
index c2f874c..82f7aaf 100644
--- a/agent/pal_inference.md
+++ b/agent/pal_inference.md
@@ -21,20 +21,21 @@ python pal_inference.py \
 ```
 
 Parameter explanation:
-|   Parameter   |        Description        |
-| :--------: | :--------------------: |
-|   \<model\>                     | Path to the model used for inference |
-|   \<out_dir\>                   | Generated code will be saved in the specified output folder |
-|   --dataset <dataset>         | Name of the dataset used for code generation (defaults to gsm8k) |
-|   --max_length <length>       | Maximum input token length for the model (defaults to 2048) |
-|   --top_p <threshold>         | Probability threshold for the sum of candidate tokens (defaults to 0.8) |
-|   --eoh <end token>           | User input end identifier (defaults to "")  |
-|   --eoa <end token>           | Model input end identifier (defaults to "")  |
-|   --eos <end token>           | System input end identifier (defaults to "")  |
-|   --temperature， -t <temp>   | Sampling temperature during generation (defaults to 1.0) |
-|   --time_out <time>           | Maximum time (in seconds) for executing generated code (defaults to 100) |
-|   --verbose, -v               | Print code error messages (optional) |
-|   --append, -a                | Append output to historical results (optional) |
+
+|         Parameter         |                               Description                                |
+| :-----------------------: | :----------------------------------------------------------------------: |
+|         \<model>          |                   Path to the model used for inference                   |
+|        \<out_dir>         |       Generated code will be saved in the specified output folder        |
+|    --dataset <dataset>    |     Name of the dataset used for code generation (defaults to gsm8k)     |
+|   --max_length <length>   |       Maximum input token length for the model (defaults to 2048)        |
+|    --top_p <threshold>    | Probability threshold for the sum of candidate tokens (defaults to 0.8)  |
+|     --eoh <end token>     |                User input end identifier (defaults to "")                |
+|     --eoa <end token>     |               Model input end identifier (defaults to "")                |
+|     --eos <end token>     |               System input end identifier (defaults to "")               |
+| --temperature， -t <temp> |         Sampling temperature during generation (defaults to 1.0)         |
+|     --time_out <time>     | Maximum time (in seconds) for executing generated code (defaults to 100) |
+|       --verbose, -v       |                   Print code error messages (optional)                   |
+|       --append, -a        |              Append output to historical results (optional)              |
 
 A simple usage example is as follows:
 
diff --git a/agent/pal_inference.py b/agent/pal_inference.py
index a27e96c..ed55390 100644
--- a/agent/pal_inference.py
+++ b/agent/pal_inference.py
@@ -17,6 +17,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# isort: skip_file
 import argparse
 import copy
 import json
@@ -31,68 +32,87 @@ import tqdm
 from datasets import load_dataset
 from torch import nn
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
+from transformers.generation.utils import (LogitsProcessorList,
+                                           StoppingCriteriaList)
 from transformers.utils import logging
 
 logger = logging.get_logger(__name__)
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description="PAL Inference")
-    parser.add_argument("model", type=str, help="Path to the pre-trained LLM used for inference.")
+    parser = argparse.ArgumentParser(description='PAL Inference')
+    parser.add_argument('model',
+                        type=str,
+                        help='Path to the pre-trained LLM used for inference.')
     parser.add_argument(
-        "out_dir", type=str, help="Name of the output folder where generated code snippets will be saved."
+        'out_dir',
+        type=str,
+        help=
+        'Name of the output folder where generated code snippets will be saved.'
     )
-    parser.add_argument("--dataset", default="gsm8k", type=str, help="Name of the dataset used for code generation.")
+    parser.add_argument('--dataset',
+                        default='gsm8k',
+                        type=str,
+                        help='Name of the dataset used for code generation.')
     parser.add_argument(
-        "--max_length",
+        '--max_length',
         default=2048,
         type=int,
-        help="Maximum input token length for the natural language description.",
+        help='Maximum input token length for the natural language description.',
     )
     parser.add_argument(
-        "--top_p",
+        '--top_p',
         default=0.8,
         type=float,
-        help="Probability threshold to choose sample tokens during generation.",
+        help='Probability threshold to choose sample tokens during generation.',
     )
     parser.add_argument(
-        "--eoh",
-        default="",
+        '--eoh',
+        default='',
         type=str,
-        help="End of human (user) token.",
+        help='End of human (user) token.',
     )
     parser.add_argument(
-        "--eoa",
-        default="",
+        '--eoa',
+        default='',
         type=str,
-        help="End of assistant (bot) token.",
+        help='End of assistant (bot) token.',
     )
     parser.add_argument(
-        "--eos",
-        default="",
+        '--eos',
+        default='',
         type=str,
-        help="End of system token.",
+        help='End of system token.',
     )
     parser.add_argument(
-        "--temperature", "-t", default=1.0, type=float, help="Temperature of token sampling during generation."
+        '--temperature',
+        '-t',
+        default=1.0,
+        type=float,
+        help='Temperature of token sampling during generation.')
+    parser.add_argument(
+        '--time_out',
+        default=100,
+        type=float,
+        help='Maximum time allowed for executing generated code.')
+    parser.add_argument(
+        '--verbose',
+        '-v',
+        action='store_true',
+        help=
+        'Print code error information when executing generated code (optional).',
     )
     parser.add_argument(
-        "--time_out", default=100, type=float, help="Maximum time allowed for executing generated code."
-    )
-    parser.add_argument(
-        "--verbose",
-        "-v",
-        action="store_true",
-        help="Print code error information when executing generated code (optional).",
-    )
-    parser.add_argument("--append", "-a", action="store_true", help="Append output to the history results (optional).")
+        '--append',
+        '-a',
+        action='store_true',
+        help='Append output to the history results (optional).')
     args = parser.parse_args()
     return args
 
 
 class Timeout:
-    """Timer to execute code
+    """Timer to execute code.
 
     Adapted from https://github.com/reasoning-machines/pal
 
@@ -101,7 +121,7 @@ class Timeout:
         error_message (str)
     """
 
-    def __init__(self, seconds=1, error_message="Timeout"):
+    def __init__(self, seconds=1, error_message='Timeout'):
         self.seconds = seconds
         self.error_message = error_message
 
@@ -133,15 +153,16 @@ def generate_interactive(
     generation_config: Optional[GenerationConfig] = None,
     logits_processor: Optional[LogitsProcessorList] = None,
     stopping_criteria: Optional[StoppingCriteriaList] = None,
-    prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
+    prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
+                                                List[int]]] = None,
     additional_eos_token_id: Optional[int] = None,
     **kwargs,
 ):
-    inputs = tokenizer([prompt], padding=True, return_tensors="pt")
-    input_length = len(inputs["input_ids"][0])
+    inputs = tokenizer([prompt], padding=True, return_tensors='pt')
+    input_length = len(inputs['input_ids'][0])
     for k, v in inputs.items():
         inputs[k] = v.cuda()
-    input_ids = inputs["input_ids"]
+    input_ids = inputs['input_ids']
     batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]  # noqa: F841  # pylint: disable=W0612
     if generation_config is None:
         generation_config = model.generation_config
@@ -155,12 +176,13 @@ def generate_interactive(
         eos_token_id = [eos_token_id]
     if additional_eos_token_id is not None:
         eos_token_id.append(additional_eos_token_id)
-    has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
+    has_default_max_length = kwargs.get(
+        'max_length') is None and generation_config.max_length is not None
     if has_default_max_length and generation_config.max_new_tokens is None:
         warnings.warn(
             f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
-            "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
-            " recommend using `max_new_tokens` to control the maximum length of the generation.",
+            'This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we'
+            ' recommend using `max_new_tokens` to control the maximum length of the generation.',
             UserWarning,
         )
     elif generation_config.max_new_tokens is not None:
@@ -169,22 +191,23 @@ def generate_interactive(
             logger.warn(  # pylint: disable=W4902
                 f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                 f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
-                "Please refer to the documentation for more information. "
-                "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
+                'Please refer to the documentation for more information. '
+                '(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)',
                 UserWarning,
             )
 
     if input_ids_seq_length >= generation_config.max_length:
-        input_ids_string = "input_ids"
+        input_ids_string = 'input_ids'
         logger.warning(
             f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
             f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
-            " increasing `max_new_tokens`."
-        )
+            ' increasing `max_new_tokens`.')
 
     # 2. Set generation parameters if not already defined
-    logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
-    stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
+    logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList(
+    )
+    stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList(
+    )
 
     logits_processor = model._get_logits_processor(
         generation_config=generation_config,
@@ -195,14 +218,15 @@ def generate_interactive(
     )
 
     stopping_criteria = model._get_stopping_criteria(
-        generation_config=generation_config, stopping_criteria=stopping_criteria
-    )
+        generation_config=generation_config,
+        stopping_criteria=stopping_criteria)
     logits_warper = model._get_logits_warper(generation_config)
 
     unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
     scores = None
     while True:
-        model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
+        model_inputs = model.prepare_inputs_for_generation(
+            input_ids, **model_kwargs)
         # forward pass to get next token
         outputs = model(
             **model_inputs,
@@ -226,8 +250,10 @@ def generate_interactive(
 
         # update generated ids, model inputs, and length for next step
         input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
-        model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False)
-        unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
+        model_kwargs = model._update_model_kwargs_for_generation(
+            outputs, model_kwargs, is_encoder_decoder=False)
+        unfinished_sequences = unfinished_sequences.mul(
+            (min(next_tokens != i for i in eos_token_id)).long())
 
         output_token_ids = input_ids[0].cpu().tolist()
         output_token_ids = output_token_ids[input_length:]
@@ -238,12 +264,13 @@ def generate_interactive(
 
         yield response
         # stop when each sentence is finished, or if we exceed the maximum length
-        if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
+        if unfinished_sequences.max() == 0 or stopping_criteria(
+                input_ids, scores):
             break
 
 
 class GenericRuntime:
-    """Adapted from https://github.com/reasoning-machines/pal"""
+    """Adapted from https://github.com/reasoning-machines/pal."""
 
     GLOBAL_DICT: dict = {}
     LOCAL_DICT = None
@@ -251,7 +278,8 @@ class GenericRuntime:
 
     def __init__(self):
         self._global_vars = copy.copy(self.GLOBAL_DICT)
-        self._local_vars = copy.copy(self.LOCAL_DICT) if self.LOCAL_DICT else None
+        self._local_vars = copy.copy(
+            self.LOCAL_DICT) if self.LOCAL_DICT else None
 
         for c in self.HEADERS:
             self.exec_code(c)
@@ -268,7 +296,7 @@ class GenericRuntime:
 
     @property
     def answer(self):
-        return self._global_vars["answer"]
+        return self._global_vars['answer']
 
 
 class PALInterface:
@@ -292,7 +320,7 @@ class PALInterface:
         tokenizer: AutoTokenizer,
         generation_config: GenerationConfig,
         additional_eos_token_id: int = 103028,
-        get_answer_expr: str = "solution()",
+        get_answer_expr: str = 'solution()',
         verbose: bool = False,
     ):
         self.runtime = GenericRuntime()
@@ -308,11 +336,11 @@ class PALInterface:
         # The api will generate response word by word
         # we only need the last generation as the final results
         for cur_gen in generate_interactive(
-            model=self.model,
-            tokenizer=self.tokenizer,
-            prompt=prompt,
-            additional_eos_token_id=self.additional_eos_token_id,
-            **asdict(self.generation_config),
+                model=self.model,
+                tokenizer=self.tokenizer,
+                prompt=prompt,
+                additional_eos_token_id=self.additional_eos_token_id,
+                **asdict(self.generation_config),
         ):
             continue
         # Get final response
@@ -322,11 +350,11 @@ class PALInterface:
         return code
 
     def process_generation_to_code(self, gens: str):
-        if "```python" in gens:
-            gens = gens.split("```python")[1].split("```")[0]
-        elif "```" in gens:
-            gens = gens.split("```")[1].split("```")[0]
-        code = gens.split("\n")
+        if '```python' in gens:
+            gens = gens.split('```python')[1].split('```')[0]
+        elif '```' in gens:
+            gens = gens.split('```')[1].split('```')[0]
+        code = gens.split('\n')
         return code
 
     def run(self, prompt, time_out: float = 100):
@@ -340,7 +368,7 @@ class PALInterface:
         return exec_result
 
     def execute(self, code: List[str]):
-        self.runtime.exec_code("\n".join(code))
+        self.runtime.exec_code('\n'.join(code))
         return self.runtime.eval_code(self.answer_expr)
 
     def clear_history(self):
@@ -348,21 +376,24 @@ class PALInterface:
 
 
 def load_model(args):
-    model = AutoModelForCausalLM.from_pretrained(args.model, trust_remote_code=True).to(torch.bfloat16).cuda()
-    tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(args.model,
+                                                 trust_remote_code=True).to(
+                                                     torch.bfloat16).cuda()
+    tokenizer = AutoTokenizer.from_pretrained(args.model,
+                                              trust_remote_code=True)
     return model, tokenizer
 
 
 def load_data(args):
     # Load data from huggingface dataset
-    if args.dataset == "gsm8k":
-        gsm8k = load_dataset(path=args.dataset, name="main")
-        test_set = gsm8k["test"]
+    if args.dataset == 'gsm8k':
+        gsm8k = load_dataset(path=args.dataset, name='main')
+        test_set = gsm8k['test']
         input_data = []
         for data in test_set:
-            question = data["question"]
-            target = float(data["answer"].split("#")[-1].replace(",", ""))
-            input_data.append({"question": question, "target": target})
+            question = data['question']
+            target = float(data['answer'].split('#')[-1].replace(',', ''))
+            input_data.append({'question': question, 'target': target})
     else:
         raise NotImplementedError
     return input_data
@@ -419,52 +450,62 @@ def main():
 
     args = parse_args()
 
-    print("load model begin.")
+    print('load model begin.')
     model, tokenizer = load_model(args)
-    print("load model end.")
+    print('load model end.')
 
-    generation_config = GenerationConfig(max_length=args.max_length, top_p=args.top_p, temperature=args.temperature)
+    generation_config = GenerationConfig(max_length=args.max_length,
+                                         top_p=args.top_p,
+                                         temperature=args.temperature)
 
     verbose = args.verbose
-    interface = PALInterface(model=model, tokenizer=tokenizer, generation_config=generation_config, verbose=verbose)
+    interface = PALInterface(model=model,
+                             tokenizer=tokenizer,
+                             generation_config=generation_config,
+                             verbose=verbose)
 
     if not os.path.exists(args.out_dir):
         os.makedirs(args.out_dir)
-    savepath = os.path.join(args.out_dir, args.dataset + ".json")
+    savepath = os.path.join(args.out_dir, args.dataset + '.json')
 
     # Load from history results
     if args.append and os.path.exists(savepath):
         lines = open(savepath).readlines()
         num_skip_exps = len(lines)
-        scores = [x["score"] for x in map(json.loads, lines)]
+        scores = [x['score'] for x in map(json.loads, lines)]
     else:
         num_skip_exps = 0
         scores = []
 
     examples = load_data(args)
-    with open(savepath, "a" if args.append else "w") as f:
-        pbar = tqdm.tqdm(examples[num_skip_exps:], initial=num_skip_exps, total=len(examples))
+    with open(savepath, 'a' if args.append else 'w') as f:
+        pbar = tqdm.tqdm(examples[num_skip_exps:],
+                         initial=num_skip_exps,
+                         total=len(examples))
         for x in pbar:
-            question = x["question"]
+            question = x['question']
             result = copy.copy(x)
 
             try:
                 answer = interface.run(
-                    prompt=PROMPT.format(question=question, eoh=args.eoh, eoa=args.eoa, eos=args.eos),
+                    prompt=PROMPT.format(question=question,
+                                         eoh=args.eoh,
+                                         eoa=args.eoa,
+                                         eos=args.eos),
                     time_out=args.time_out,
                 )
                 answer = float(answer)
-                score = 1 if abs(answer - x["target"]) < 1e-3 else 0
+                score = 1 if abs(answer - x['target']) < 1e-3 else 0
             except Exception as e:
                 if verbose:
                     print(e)
-                answer = ""
+                answer = ''
                 score = 0
             scores.append(score)
-            result["answer"] = answer
-            result["score"] = score
-            result["generation"] = interface.history
-            f.write(json.dumps(result) + "\n")
+            result['answer'] = answer
+            result['score'] = score
+            result['generation'] = interface.history
+            f.write(json.dumps(result) + '\n')
 
             interface.clear_history()
             f.flush()
@@ -473,5 +514,5 @@ def main():
     torch.cuda.empty_cache()
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     main()
diff --git a/agent/pal_inference_zh-CN.md b/agent/pal_inference_zh-CN.md
index ebc7998..5603176 100644
--- a/agent/pal_inference_zh-CN.md
+++ b/agent/pal_inference_zh-CN.md
@@ -21,20 +21,21 @@ python pal_inference.py \
 ```
 
 参数说明:
-|   参数   |        说明        |
-| :--------: | :--------------------: |
-|   \<model\>                     | 用于推理的模型的路径 |
-|   \<out_dir\>                   | 生成代码将保存在指定的输出文件夹中 |
-|   --dataset <dataset>         | 用于代码生成的数据集名称（默认：gsm8k） |
-|   --max_length <length>       | 模型最大输入 token 长度（默认：2048） |
-|   --top_p <threshold>         | 候选 token 相加的概率阈值（默认：0.8） |
-|   --eoh <end token>           | 用户输入结束标识符 (默认: "")  |
-|   --eoa <end token>           | 模型输入结束标识符 (默认: "")  |
-|   --eos <end token>           | 系统输入结束标识符. (默认: "")  |
-|   --temperature， -t <temp>   | 生成过程中的采样温度（默认：1.0） |
-|   --time_out <time>           | 执行生成的代码的最大时间（秒）（默认：100） |
-|   --verbose, -v               | 打印代码错误信息（可选） |
-|   --append, -a                | 将输出追加到历史结果中（可选） |
+
+|           参数            |                    说明                     |
+| :-----------------------: | :-----------------------------------------: |
+|         \<model>          |            用于推理的模型的路径             |
+|        \<out_dir>         |     生成代码将保存在指定的输出文件夹中      |
+|    --dataset <dataset>    |   用于代码生成的数据集名称（默认：gsm8k）   |
+|   --max_length <length>   |    模型最大输入 token 长度（默认：2048）    |
+|    --top_p <threshold>    |   候选 token 相加的概率阈值（默认：0.8）    |
+|     --eoh <end token>     |        用户输入结束标识符 (默认: "")        |
+|     --eoa <end token>     |        模型输入结束标识符 (默认: "")        |
+|     --eos <end token>     |       系统输入结束标识符. (默认: "")        |
+| --temperature， -t <temp> |      生成过程中的采样温度（默认：1.0）      |
+|     --time_out <time>     | 执行生成的代码的最大时间（秒）（默认：100） |
+|       --verbose, -v       |          打印代码错误信息（可选）           |
+|       --append, -a        |       将输出追加到历史结果中（可选）        |
 
 简单的使用示例如下：
 
diff --git a/chat/chat_format.md b/chat/chat_format.md
index 303d99d..0c7f89c 100644
--- a/chat/chat_format.md
+++ b/chat/chat_format.md
@@ -70,14 +70,14 @@ The weather in Shanghai is 22 celsius<|im_end|>
 ```
 
 1. First，there will be a new system prompt that describe the protocol of tools in json format. The content starts with `<|im_start|>system name=<|plugin|>\n` and ends with `<|im_end|>`. `name=<|plugin|>` indicates the system prompt comes from tools. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant.
-2. Model will call the tools in a streaming format after receving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the tools. The model will output `<|action_start|><|plugin|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|plugin|>` indicates it want to use common tools. Then the model will output the calling parameters in json format and end it with `<|action_end|>`.
+2. Model will call the tools in a streaming format after receiving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the tools. The model will output `<|action_start|><|plugin|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|plugin|>` indicates it want to use common tools. Then the model will output the calling parameters in json format and end it with `<|action_end|>`.
 3. The environment returns the function calling outputs, starts with `<|im_start|>environment name=<|plugin|>`, indicating it comes from the function calling in the environment. The content is in json format, `{"temperature": 22}`, ends with `<|im_end|>`.
 
 ### Code Interpreter
 
 InternLM2-Chat support to use code interpreter to solve complex math problems and data analysis. The whole process is similar to the function calling, shown as below.
 
-```
+````
 <|im_start|>system
 You are InternLM2-Chat, a harmless AI assistant<|im_end|>
 <|im_start|>system name=<|interpreter|>
@@ -130,17 +130,17 @@ Can you explain the code?<|im_end|>
 <|im_start|>assistant
 The code uses Python's Plotly library to create a polar plot that shows the relationship between wind direction and rainfall. It starts by creating a Plotly Figure object. Then, I added two `Scatterpolar` traces. The first trace shows the relationship between wind direction at 9 am and average rainfall, and the second trace shows the same for 3 pm. In these traces, the `r` parameter represents the average rainfall, and `theta` represents the wind direction. `fill='toself'` means the shape is filled to its own boundary. Next, I updated the chart layout, hid the radial axis, added a legend, set the title and its font, chose a dark theme, and set the chart's background color, width, and height. Finally, the chart was displayed using `fig.show()`.
 <|im_end|>
-```
+````
 
 1. First，there will be a new system prompt that describe the usages of code interpreter. The content starts with `<|im_start|>system name=<|interpreter|>\n` and ends with `<|im_end|>`. `name=<|interpreter|>` indicates the system prompt comes from code interpreter. InternLM2-Chat supports and follows multiple system prompts in the chat history, so we can also see the system prompt ask the model to be helpful assistant.
-2. Model will call the tools in a streaming format after receving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the code interpreter. The model will output `<|action_start|><|interpreter|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|interpreter|>` indicates it want to use code interpreter. Then the model will output the code in a markdown-style code block and end it with `<|action_end|>`.
+2. Model will call the tools in a streaming format after receiving the user prompt, i.e., it will naturally speak something (thoughts, helpful response) then call the code interpreter. The model will output `<|action_start|><|interpreter|>`, where `<|action_start|>` indicates the model needs to call extensions and `<|interpreter|>` indicates it want to use code interpreter. Then the model will output the code in a markdown-style code block and end it with `<|action_end|>`.
 3. The environment will execute the code and return outputs, starts with `<|im_start|>environment name=<|interpreter|>`, indicating it comes from the code interpreter in the environment, and the content ends with `<|im_end|>`.
 
 ### Function Call + Code Interpreter
 
 InternLM2-Chat can choose to use tools or code interpreter automatically. We only need to send the system prompt of both to the model respectively. An example is shown as below
 
-```
+````
 <|im_start|>system
 You are InternLM2-Chat, a harmless AI assistant<|im_end|>
 <|im_start|>system name=<|interpreter|>
@@ -221,4 +221,4 @@ Sure, I will search for the weather of Shanghai.<|action_start|><|plugin|>
 {"temperature": 22}<|im_end|>
 <|im_start|>assistant
 The weather in Shanghai is 22 celsius<|im_end|>
-```
+````
diff --git a/chat/chat_format_zh-CN.md b/chat/chat_format_zh-CN.md
index 5852f15..3246089 100644
--- a/chat/chat_format_zh-CN.md
+++ b/chat/chat_format_zh-CN.md
@@ -77,7 +77,7 @@ InternLM2-Chat 的通用工具调用（function call）支持多种格式，尤
 
 InternLM2-Chat 支持使用代码解释器解决复杂数学问题和进行数据分析。整个调用流程和通用工具调用类似，如下所示
 
-```
+````
 <|im_start|>system
 你是书生浦语2，一个无害的人工智能助手<|im_end|>
 <|im_start|>system name=<|interpreter|>
@@ -128,19 +128,18 @@ fig.show()
 <|im_start|>assistant
 我使用的是Python的Plotly库来创建一个极坐标图，以展示风向与降雨量之间的关系。代码中首先创建了一个Plotly的Figure对象。接着，我添加了两个`Scatterpolar`追踪。第一个追踪展示的是上午9点的风向与降雨量的平均值的关系，第二个追踪展示的是下午3点的风向与降雨量的平均值的关系。在这两个追踪中，`r`参数代表降雨量的平均值，`theta`参数代表风向。`fill='toself'`表示图形以自身为边界进行填充。然后我更新了图表的布局，隐藏了径向轴，添加了图例，设置了标题和标题字体，选择了暗色主题，并设定了图表的背景色、宽度和高度。最后，使用`fig.show()`展示了图表。
 <|im_end|>
-```
+````
 
 1. 首先在系统提示中提供代码解释器的格式和字段描述。内容以 `<|im_start|>system name=<|interpreter|>\n`开头，`<|im_end|>` 结尾，`name=<|interpreter|>` 体现了这是来自代码解释器的指令。InternLM2-Chat 支持 system 角色对模型的提示和约束多次出现。所以我们会看到前面还有关于对话的要求。
-2. 用户可以上传一个文件，并对模型提出要求，文件的上传会以单独的形式向模型发出一条指令，以 `<|im_start|>user name=file` 开头，以 json 形式给出路径和文件大小`
-[{"path": "data.csv", size='10K'}]`，以 `<|im_end|>`结尾。
-2. 模型在接受到用户指令后，会以流式的形式调用工具，及自然地生成文字进行思考/回应用户，然后输出`<|action_start|><|interpreter|>`。`<|action_start|>`表示要调用外部插件，同时 `<|interpreter|>` 表示调用的是代码解释器。然后模型输出 markdown 中 python 代码块格式代码内容，再以 `<|action_end|>` 表示工具调用结束。
-3. 系统会执行代码块中的代码，然后返回调用结果，以 `<|im_start|>environment name=<|interpreter|>`开头，表示是来自环境关于代码解释器执行的输出，以`<|im_end|>`结尾。
+2. 用户可以上传一个文件，并对模型提出要求，文件的上传会以单独的形式向模型发出一条指令，以 `<|im_start|>user name=file` 开头，以 json 形式给出路径和文件大小` [{"path": "data.csv", size='10K'}]`，以 `<|im_end|>`结尾。
+3. 模型在接受到用户指令后，会以流式的形式调用工具，及自然地生成文字进行思考/回应用户，然后输出`<|action_start|><|interpreter|>`。`<|action_start|>`表示要调用外部插件，同时 `<|interpreter|>` 表示调用的是代码解释器。然后模型输出 markdown 中 python 代码块格式代码内容，再以 `<|action_end|>` 表示工具调用结束。
+4. 系统会执行代码块中的代码，然后返回调用结果，以 `<|im_start|>environment name=<|interpreter|>`开头，表示是来自环境关于代码解释器执行的输出，以`<|im_end|>`结尾。
 
 ### 同时使用工具和代码解释器
 
 InternLM2-Chat 能够在一个对话过程中自主选择调用工具或代码解释器。在工具和代码解释器同时开启的情况下，只需要将各自的系统提示合并在一起给模型即可。一个调用工具和代码解释器的对话历史样例如下。
 
-```
+````
 <|im_start|>system
 你是书生浦语2，一个无害的人工智能助手<|im_end|>
 <|im_start|>system name=<|interpreter|>
@@ -219,4 +218,4 @@ fig.show()
 {"temperature": 22}<|im_end|>
 <|im_start|>assistant
 上海的天气是 22 摄氏度<|im_end|>
-```
+````
diff --git a/chat/lmdeploy.md b/chat/lmdeploy.md
index 36c7a16..80fe42c 100644
--- a/chat/lmdeploy.md
+++ b/chat/lmdeploy.md
@@ -6,7 +6,6 @@ English | [简体中文](lmdeploy_zh_cn.md)
 
 This article primarily highlights the basic usage of LMDeploy. For a comprehensive understanding of the toolkit, we invite you to refer to [the tutorials](https://lmdeploy.readthedocs.io/en/latest/).
 
-
 ## Installation
 
 Install lmdeploy with pip (python 3.8+)
diff --git a/chat/lmdeploy_zh_cn.md b/chat/lmdeploy_zh_cn.md
index 1df7e54..7b47d7d 100644
--- a/chat/lmdeploy_zh_cn.md
+++ b/chat/lmdeploy_zh_cn.md
@@ -6,7 +6,6 @@
 
 本文主要介绍 LMDeploy 的基本用法，包括[安装](#安装)、[离线批处理](#离线批处理)和[推理服务](#推理服务)。更全面的介绍请参考 [LMDeploy 用户指南](https://lmdeploy.readthedocs.io/zh-cn/latest/)。
 
-
 ## 安装
 
 使用 pip（python 3.8+）安装 LMDeploy
@@ -27,6 +26,7 @@ print(response)
 ```
 
 LMDeploy 实现了 dynamic ntk，支持长文本外推。使用如下代码，可以把 InternLM2 的文本外推到 200K：
+
 ```python
 from lmdeploy import pipeline, TurbomindEngineConfig
 engine_config = TurbomindEngineConfig(session_len=200000,
diff --git a/chat/openaoe.md b/chat/openaoe.md
index aec3861..ce7c47a 100644
--- a/chat/openaoe.md
+++ b/chat/openaoe.md
@@ -1,37 +1,48 @@
 # Multi-Chats by OpenAOE
 
 English | [简体中文](openaoe_zh_cn.md)
+
 ## Introduction
+
 [OpenAOE](https://github.com/InternLM/OpenAOE) is a LLM-Group-Chat Framework, which can chat with multiple LLMs (commercial/open source LLMs) at the same time. OpenAOE provides both backend API and WEB-UI to meet different usage needs.
 
 Currently already supported LLMs: [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, Spark, etc.
 
 ## Quick Run
-> [!TIP]
+
+> \[!TIP\]
 > Require python >= 3.9
 
 We provide three different ways to run OpenAOE: `run by pip`， `run by docker` and `run by source code` as well.
 
 ### Run by pip
+
 #### **Install**
+
 ```shell
 pip install -U openaoe
 ```
+
 #### **Start**
+
 ```shell
 openaoe -f /path/to/your/config-template.yaml
 ```
 
 ### Run by docker
+
 #### **Install**
 
 There are two ways to get the OpenAOE docker image by:
+
 1. pull the OpenAOE docker image
+
 ```shell
 docker pull opensealion/openaoe:latest
 ```
 
 2. or build a docker image
+
 ```shell
 git clone https://github.com/internlm/OpenAOE
 cd OpenAOE
@@ -39,32 +50,38 @@ docker build . -f docker/Dockerfile -t openaoe:latest
 ```
 
 #### **Start**
+
 ```shell
 docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest
 ```
 
 ### Run by source code
+
 #### **Install**
+
 1. clone this project
+
 ```shell
 git clone https://github.com/internlm/OpenAOE
 ```
-2. [_optional_] build the frontend project when the frontend codes are changed
+
+2. \[_optional_\] build the frontend project when the frontend codes are changed
+
 ```shell
 cd OpenAOE/openaoe/frontend
 npm install
 npm run build
 ```
 
-
 #### **Start**
+
 ```shell
 cd OpenAOE
 pip install -r openaoe/backend/requirements.txt
 python -m openaoe.main -f /path/to/your/config-template.yaml
 ```
 
-> [!TIP]
+> \[!TIP\]
 > `/path/to/your/config-tempalte.yaml` is the configuration file loaded by OpenAOE at startup,
 > which contains the relevant configuration information for the LLMs,
 > including: API URLs, AKSKs, Tokens, etc.
diff --git a/chat/openaoe_zh_cn.md b/chat/openaoe_zh_cn.md
index 36b3240..48add0d 100644
--- a/chat/openaoe_zh_cn.md
+++ b/chat/openaoe_zh_cn.md
@@ -2,37 +2,47 @@
 
 [English](openaoe.md) | 简体中文
 
-
 ## 介绍
+
 [OpenAOE](https://github.com/InternLM/OpenAOE) 是一个 LLM-Group-Chat 框架，可以同时与多个商业大模型或开源大模型进行聊天。 OpenAOE还提供后端API和WEB-UI以满足不同的使用需求。
 
 目前已经支持的大模型有：  [InternLM2-Chat-7B](https://huggingface.co/internlm/internlm2-chat-7b), [IntenLM-Chat-7B](https://huggingface.co/internlm/internlm-chat-7b), GPT-3.5, GPT-4, Google PaLM, MiniMax, Claude, 讯飞星火等。
 
-
 ## 快速安装
+
 我们将提供 3 种不同的方式安装：基于 pip、基于 docker 以及基于源代码，实现开箱即用。
 
 ### 基于 pip
-> [!TIP]
+
+> \[!TIP\]
 > 需要 python >= 3.9
+
 #### **安装**
+
 ```shell
 pip install -U openaoe
 ```
+
 #### **运行**
+
 ```shell
 openaoe -f /path/to/your/config-template.yaml
 ```
 
 ### 基于 docker
+
 #### **安装**
+
 有两种方式获取 OpenAOE 的 docker 镜像：
+
 1. 官方拉取
+
 ```shell
 docker pull opensealion/openaoe:latest
 ```
 
 2. 本地构建
+
 ```shell
 git clone https://github.com/internlm/OpenAOE
 cd OpenAOE
@@ -40,31 +50,37 @@ docker build . -f docker/Dockerfile -t openaoe:latest
 ```
 
 #### **运行**
+
 ```shell
 docker run -p 10099:10099 -v /path/to/your/config-template.yaml:/app/config.yaml --name OpenAOE opensealion/openaoe:latest
 ```
 
 ### 基于源代码
+
 #### **安装**
+
 1. 克隆项目
+
 ```shell
 git clone https://github.com/internlm/OpenAOE
 ```
-2. [_可选_] （如果前端代码发生变动）重新构建前端项目
+
+2. \[_可选_\] （如果前端代码发生变动）重新构建前端项目
+
 ```shell
 cd OpenAOE/openaoe/frontend
 npm install
 npm run build
 ```
 
-
 #### **运行**
+
 ```shell
 cd OpenAOE
 pip install -r openaoe/backend/requirements.txt
 python -m openaoe.main -f /path/to/your/config-template.yaml
-``````
+```
 
-> [!TIP]
+> \[!TIP\]
 > `/path/to/your/config-template.yaml` 是 OpenAOE 启动时读取的配置文件，里面包含了大模型的相关配置信息，
 > 包括：调用API地址、AKSK、Token等信息，是 OpenAOE 启动的必备文件。模板文件可以在 `openaoe/backend/config/config-template.yaml` 中找到。
diff --git a/chat/web_demo.py b/chat/web_demo.py
index 1368fc4..5d37a2e 100644
--- a/chat/web_demo.py
+++ b/chat/web_demo.py
@@ -1,13 +1,20 @@
-"""
-This script refers to the dialogue example of streamlit, the interactive generation code of chatglm2 and transformers.
-We mainly modified part of the code logic to adapt to the generation of our model.
+"""This script refers to the dialogue example of streamlit, the interactive
+generation code of chatglm2 and transformers.
+
+We mainly modified part of the code logic to adapt to the
+generation of our model.
 Please refer to these links below for more information:
-    1. streamlit chat example: https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
-    2. chatglm2: https://github.com/THUDM/ChatGLM2-6B
-    3. transformers: https://github.com/huggingface/transformers
-Please run with the command `streamlit run path/to/web_demo.py --server.address=0.0.0.0 --server.port 7860`.
+    1. streamlit chat example:
+        https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
+    2. chatglm2:
+        https://github.com/THUDM/ChatGLM2-6B
+    3. transformers:
+        https://github.com/huggingface/transformers
+Please run with the command `streamlit run path/to/web_demo.py
+    --server.address=0.0.0.0 --server.port 7860`.
 Using `python path/to/web_demo.py` may cause unknown problems.
 """
+# isort: skip_file
 import copy
 import warnings
 from dataclasses import asdict, dataclass
@@ -16,7 +23,8 @@ from typing import Callable, List, Optional
 import streamlit as st
 import torch
 from torch import nn
-from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList
+from transformers.generation.utils import (LogitsProcessorList,
+                                           StoppingCriteriaList)
 from transformers.utils import logging
 
 from transformers import AutoTokenizer, AutoModelForCausalLM  # isort: skip
@@ -42,16 +50,17 @@ def generate_interactive(
     generation_config: Optional[GenerationConfig] = None,
     logits_processor: Optional[LogitsProcessorList] = None,
     stopping_criteria: Optional[StoppingCriteriaList] = None,
-    prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
+    prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor],
+                                                List[int]]] = None,
     additional_eos_token_id: Optional[int] = None,
     **kwargs,
 ):
-    inputs = tokenizer([prompt], padding=True, return_tensors="pt")
-    input_length = len(inputs["input_ids"][0])
+    inputs = tokenizer([prompt], padding=True, return_tensors='pt')
+    input_length = len(inputs['input_ids'][0])
     for k, v in inputs.items():
         inputs[k] = v.cuda()
-    input_ids = inputs["input_ids"]
-    batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]  # noqa: F841  # pylint: disable=W0612
+    input_ids = inputs['input_ids']
+    _, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
     if generation_config is None:
         generation_config = model.generation_config
     generation_config = copy.deepcopy(generation_config)
@@ -64,36 +73,45 @@ def generate_interactive(
         eos_token_id = [eos_token_id]
     if additional_eos_token_id is not None:
         eos_token_id.append(additional_eos_token_id)
-    has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
+    has_default_max_length = kwargs.get(
+        'max_length') is None and generation_config.max_length is not None
     if has_default_max_length and generation_config.max_new_tokens is None:
         warnings.warn(
-            f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
-            "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
-            " recommend using `max_new_tokens` to control the maximum length of the generation.",
+            f"Using 'max_length''s default ({repr(generation_config.max_length)}) \
+                to control the generation length. "
+            'This behaviour is deprecated and will be removed from the \
+                config in v5 of Transformers -- we'
+            ' recommend using `max_new_tokens` to control the maximum \
+                length of the generation.',
             UserWarning,
         )
     elif generation_config.max_new_tokens is not None:
-        generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
+        generation_config.max_length = generation_config.max_new_tokens + \
+            input_ids_seq_length
         if not has_default_max_length:
             logger.warn(  # pylint: disable=W4902
-                f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
-                f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
-                "Please refer to the documentation for more information. "
-                "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
+                f"Both 'max_new_tokens' (={generation_config.max_new_tokens}) "
+                f"and 'max_length'(={generation_config.max_length}) seem to "
+                "have been set. 'max_new_tokens' will take precedence. "
+                'Please refer to the documentation for more information. '
+                '(https://huggingface.co/docs/transformers/main/'
+                'en/main_classes/text_generation)',
                 UserWarning,
             )
 
     if input_ids_seq_length >= generation_config.max_length:
-        input_ids_string = "input_ids"
+        input_ids_string = 'input_ids'
         logger.warning(
-            f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
-            f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
-            " increasing `max_new_tokens`."
-        )
+            f"Input length of {input_ids_string} is {input_ids_seq_length}, "
+            f"but 'max_length' is set to {generation_config.max_length}. "
+            'This can lead to unexpected behavior. You should consider'
+            " increasing 'max_new_tokens'.")
 
     # 2. Set generation parameters if not already defined
-    logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
-    stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
+    logits_processor = logits_processor if logits_processor is not None \
+        else LogitsProcessorList()
+    stopping_criteria = stopping_criteria if stopping_criteria is not None \
+        else StoppingCriteriaList()
 
     logits_processor = model._get_logits_processor(
         generation_config=generation_config,
@@ -104,14 +122,15 @@ def generate_interactive(
     )
 
     stopping_criteria = model._get_stopping_criteria(
-        generation_config=generation_config, stopping_criteria=stopping_criteria
-    )
+        generation_config=generation_config,
+        stopping_criteria=stopping_criteria)
     logits_warper = model._get_logits_warper(generation_config)
 
     unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
     scores = None
     while True:
-        model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
+        model_inputs = model.prepare_inputs_for_generation(
+            input_ids, **model_kwargs)
         # forward pass to get next token
         outputs = model(
             **model_inputs,
@@ -135,8 +154,10 @@ def generate_interactive(
 
         # update generated ids, model inputs, and length for next step
         input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
-        model_kwargs = model._update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False)
-        unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
+        model_kwargs = model._update_model_kwargs_for_generation(
+            outputs, model_kwargs, is_encoder_decoder=False)
+        unfinished_sequences = unfinished_sequences.mul(
+            (min(next_tokens != i for i in eos_token_id)).long())
 
         output_token_ids = input_ids[0].cpu().tolist()
         output_token_ids = output_token_ids[input_length:]
@@ -146,8 +167,10 @@ def generate_interactive(
         response = tokenizer.decode(output_token_ids)
 
         yield response
-        # stop when each sentence is finished, or if we exceed the maximum length
-        if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
+        # stop when each sentence is finished
+        # or if we exceed the maximum length
+        if unfinished_sequences.max() == 0 or stopping_criteria(
+                input_ids, scores):
             break
 
 
@@ -157,44 +180,48 @@ def on_btn_click():
 
 @st.cache_resource
 def load_model():
-    model = (
-        AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True)
-        .to(torch.bfloat16)
-        .cuda()
-    )
-    tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_remote_code=True)
+    model = (AutoModelForCausalLM.from_pretrained('internlm/internlm2-chat-7b',
+                                                  trust_remote_code=True).to(
+                                                      torch.bfloat16).cuda())
+    tokenizer = AutoTokenizer.from_pretrained('internlm/internlm2-chat-7b',
+                                              trust_remote_code=True)
     return model, tokenizer
 
 
 def prepare_generation_config():
     with st.sidebar:
-        max_length = st.slider("Max Length", min_value=8, max_value=32768, value=32768)
-        top_p = st.slider("Top P", 0.0, 1.0, 0.8, step=0.01)
-        temperature = st.slider("Temperature", 0.0, 1.0, 0.7, step=0.01)
-        st.button("Clear Chat History", on_click=on_btn_click)
+        max_length = st.slider('Max Length',
+                               min_value=8,
+                               max_value=32768,
+                               value=32768)
+        top_p = st.slider('Top P', 0.0, 1.0, 0.8, step=0.01)
+        temperature = st.slider('Temperature', 0.0, 1.0, 0.7, step=0.01)
+        st.button('Clear Chat History', on_click=on_btn_click)
 
-    generation_config = GenerationConfig(max_length=max_length, top_p=top_p, temperature=temperature)
+    generation_config = GenerationConfig(max_length=max_length,
+                                         top_p=top_p,
+                                         temperature=temperature)
 
     return generation_config
 
 
-user_prompt = "<|im_start|>user\n{user}<|im_end|>\n"
-robot_prompt = "<|im_start|>assistant\n{robot}<|im_end|>\n"
-cur_query_prompt = "<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n"
+user_prompt = '<|im_start|>user\n{user}<|im_end|>\n'
+robot_prompt = '<|im_start|>assistant\n{robot}<|im_end|>\n'
+cur_query_prompt = '<|im_start|>user\n{user}<|im_end|>\n\
+    <|im_start|>assistant\n'
 
 
 def combine_history(prompt):
     messages = st.session_state.messages
-    meta_instruction = (
-        "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai "
-        "AI Laboratory (上海人工智能实验室)."
-    )
+    meta_instruction = ('You are InternLM (书生·浦语), a helpful, honest, '
+                        'and harmless AI assistant developed by Shanghai '
+                        'AI Laboratory (上海人工智能实验室).')
     total_prompt = f"<s><|im_start|>system\n{meta_instruction}<|im_end|>\n"
     for message in messages:
-        cur_content = message["content"]
-        if message["role"] == "user":
+        cur_content = message['content']
+        if message['role'] == 'user':
             cur_prompt = user_prompt.format(user=cur_content)
-        elif message["role"] == "robot":
+        elif message['role'] == 'robot':
             cur_prompt = robot_prompt.format(robot=cur_content)
         else:
             raise RuntimeError
@@ -205,57 +232,59 @@ def combine_history(prompt):
 
 def main():
     # torch.cuda.empty_cache()
-    print("load model begin.")
+    print('load model begin.')
     model, tokenizer = load_model()
-    print("load model end.")
+    print('load model end.')
 
-    user_avator = "assets/user.png"
-    robot_avator = "assets/robot.png"
+    user_avator = 'assets/user.png'
+    robot_avator = 'assets/robot.png'
 
-    st.title("InternLM2-Chat-7B")
+    st.title('InternLM2-Chat-7B')
 
     generation_config = prepare_generation_config()
 
     # Initialize chat history
-    if "messages" not in st.session_state:
+    if 'messages' not in st.session_state:
         st.session_state.messages = []
 
     # Display chat messages from history on app rerun
     for message in st.session_state.messages:
-        with st.chat_message(message["role"], avatar=message.get("avatar")):
-            st.markdown(message["content"])
+        with st.chat_message(message['role'], avatar=message.get('avatar')):
+            st.markdown(message['content'])
 
     # Accept user input
-    if prompt := st.chat_input("What is up?"):
+    if prompt := st.chat_input('What is up?'):
         # Display user message in chat message container
-        with st.chat_message("user", avatar=user_avator):
+        with st.chat_message('user', avatar=user_avator):
             st.markdown(prompt)
         real_prompt = combine_history(prompt)
         # Add user message to chat history
-        st.session_state.messages.append({"role": "user", "content": prompt, "avatar": user_avator})
+        st.session_state.messages.append({
+            'role': 'user',
+            'content': prompt,
+            'avatar': user_avator
+        })
 
-        with st.chat_message("robot", avatar=robot_avator):
+        with st.chat_message('robot', avatar=robot_avator):
             message_placeholder = st.empty()
             for cur_response in generate_interactive(
-                model=model,
-                tokenizer=tokenizer,
-                prompt=real_prompt,
-                additional_eos_token_id=92542,
-                **asdict(generation_config),
+                    model=model,
+                    tokenizer=tokenizer,
+                    prompt=real_prompt,
+                    additional_eos_token_id=92542,
+                    **asdict(generation_config),
             ):
                 # Display robot response in chat message container
-                message_placeholder.markdown(cur_response + "▌")
-            message_placeholder.markdown(cur_response)  # pylint: disable=undefined-loop-variable
+                message_placeholder.markdown(cur_response + '▌')
+            message_placeholder.markdown(cur_response)
         # Add robot response to chat history
-        st.session_state.messages.append(
-            {
-                "role": "robot",
-                "content": cur_response,  # pylint: disable=undefined-loop-variable
-                "avatar": robot_avator,
-            }
-        )
+        st.session_state.messages.append({
+            'role': 'robot',
+            'content': cur_response,  # pylint: disable=undefined-loop-variable
+            'avatar': robot_avator,
+        })
         torch.cuda.empty_cache()
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     main()
diff --git a/finetune/README.md b/finetune/README.md
index e5152c5..06df0e0 100644
--- a/finetune/README.md
+++ b/finetune/README.md
@@ -97,4 +97,4 @@ xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-pa
 
 ## InternEvo
 
-[TODO]
+\[TODO\]
diff --git a/finetune/README_zh-CN.md b/finetune/README_zh-CN.md
index 98a53cc..742ff05 100644
--- a/finetune/README_zh-CN.md
+++ b/finetune/README_zh-CN.md
@@ -95,4 +95,4 @@ xtuner chat internlm/internlm2-chat-7b --visual-encoder openai/clip-vit-large-pa
 
 ## InternEvo
 
-[TODO]
+\[TODO\]
diff --git a/model_cards/internlm2_20b.md b/model_cards/internlm2_20b.md
index d289513..a92753a 100644
--- a/model_cards/internlm2_20b.md
+++ b/model_cards/internlm2_20b.md
@@ -16,12 +16,12 @@ The base model of InternLM2 has the following technical features:
 
 ## Model Zoo
 
-| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
-|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
-| **InternLM2-Base-20B** | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17 |
-| **InternLM2-20B** | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17 |
-| **InternLM2-Chat-20B-SFT**     | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft)         | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original)    | 2024-01-17   |
-| **InternLM2-Chat-20B**     | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b)         | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original)     | 2024-01-17   |
+| Model                      | Transformers(HF)                           | ModelScope(HF)                           | OpenXLab(HF)                           | OpenXLab(Origin)                           | Release Date |
+| -------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------ | ------------ |
+| **InternLM2-Base-20B**     | [🤗internlm2-base-20b](https://huggingface.co/internlm/internlm2-base-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-20b-original) | 2024-01-17   |
+| **InternLM2-20B**          | [🤗internlm2-20b](https://huggingface.co/internlm/internlm2-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-original) | 2024-01-17   |
+| **InternLM2-Chat-20B-SFT** | [🤗internlm2-chat-20b-sft](https://huggingface.co/internlm/internlm2-chat-20b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-sft-original) | 2024-01-17   |
+| **InternLM2-Chat-20B**     | [🤗internlm2-chat-20b](https://huggingface.co/internlm/internlm2-chat-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-20b-original) | 2024-01-17   |
 
 - `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo).
 
@@ -29,16 +29,15 @@ The base model of InternLM2 has the following technical features:
 
 We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
 
-| Dataset\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
-| --- | --- | --- | --- | --- | --- | --- |
-| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
-| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
-| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
-| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
-| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
-| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
-| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
-
+| Dataset\\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
+| --------------- | ------------ | ----------------- | ------------- | ------------------ | ------- | ----- |
+| MMLU            | 65.8         | 63.7              | 67.7          | 66.5               | 69.1    | 83.0  |
+| AGIEval         | 49.9         | 47.2              | 53.0          | 50.3               | 39.9    | 55.1  |
+| BBH             | 65.0         | 61.2              | 72.1          | 68.3               | 70.1    | 86.7  |
+| GSM8K           | 70.8         | 70.7              | 76.1          | 79.6               | 78.2    | 91.4  |
+| MATH            | 20.2         | 23.0              | 25.5          | 31.9               | 28.0    | 45.8  |
+| HumanEval       | 43.3         | 59.8              | 48.8          | 67.1               | 73.2    | 74.4  |
+| MBPP(Sanitized) | 51.8         | 51.4              | 63.0          | 65.8               | 78.9    | 79.0  |
 
 - The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
 - The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
diff --git a/model_cards/internlm2_7b.md b/model_cards/internlm2_7b.md
index 04edce5..d11ae45 100644
--- a/model_cards/internlm2_7b.md
+++ b/model_cards/internlm2_7b.md
@@ -16,13 +16,12 @@ The base model of InternLM2 has the following technical features:
 
 ## Model Zoo
 
-
-| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Origin) | Release Date |
-|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
-| **InternLM2-Base-7B**           | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b)                     | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) |  2024-01-17   |
-| **InternLM2-7B**           | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b)                     | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original)          |  2024-01-17   |
-| **InternLM2-Chat-7B-SFT**      | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft)           | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original)     | 2024-01-17  |
-| **InternLM2-Chat-7B**      | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b)           | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original)     | 2024-01-17  |
+| Model                     | Transformers(HF)                           | ModelScope(HF)                           | OpenXLab(HF)                           | OpenXLab(Origin)                            | Release Date |
+| ------------------------- | ------------------------------------------ | ---------------------------------------- | -------------------------------------- | ------------------------------------------- | ------------ |
+| **InternLM2-Base-7B**     | [🤗internlm2-base-7b](https://huggingface.co/internlm/internlm2-base-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-base-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-base-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-base-7b-original) | 2024-01-17   |
+| **InternLM2-7B**          | [🤗internlm2-7b](https://huggingface.co/internlm/internlm2-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-original) | 2024-01-17   |
+| **InternLM2-Chat-7B-SFT** | [🤗internlm2-chat-7b-sft](https://huggingface.co/internlm/internlm2-chat-7b-sft) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b-sft](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b-sft/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-sft-original) | 2024-01-17   |
+| **InternLM2-Chat-7B**     | [🤗internlm2-chat-7b](https://huggingface.co/internlm/internlm2-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> internlm2-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-chat-7b-original) | 2024-01-17   |
 
 - `HF` refers to the format used by HuggingFace in [transformers](https://github.com/huggingface/transformers), whereas `Origin` denotes the format adopted by the InternLM team in [InternEvo](https://github.com/InternLM/InternEvo).
 
@@ -30,16 +29,15 @@ The base model of InternLM2 has the following technical features:
 
 We have evaluated InternLM2 on several important benchmarks using the open-source evaluation tool [OpenCompass](https://github.com/open-compass/opencompass). Some of the evaluation results are shown in the table below. You are welcome to visit the [OpenCompass Leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
 
-| Dataset\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
-| --- | --- | --- | --- | --- | --- | --- |
-| MMLU | 65.8 | 63.7 | 67.7 | 66.5 | 69.1 | 83.0 |
-| AGIEval | 49.9 | 47.2 | 53.0 | 50.3 | 39.9 | 55.1 |
-| BBH | 65.0 | 61.2 | 72.1 | 68.3 | 70.1 | 86.7 |
-| GSM8K | 70.8 | 70.7 | 76.1 | 79.6 | 78.2 | 91.4 |
-| MATH | 20.2 | 23.0 | 25.5 | 31.9 | 28.0 | 45.8 |
-| HumanEval | 43.3 | 59.8 | 48.8 | 67.1 | 73.2 | 74.4 |
-| MBPP(Sanitized) | 51.8 | 51.4 | 63.0 | 65.8 | 78.9 | 79.0 |
-
+| Dataset\\Models | InternLM2-7B | InternLM2-Chat-7B | InternLM2-20B | InternLM2-Chat-20B | ChatGPT | GPT-4 |
+| --------------- | ------------ | ----------------- | ------------- | ------------------ | ------- | ----- |
+| MMLU            | 65.8         | 63.7              | 67.7          | 66.5               | 69.1    | 83.0  |
+| AGIEval         | 49.9         | 47.2              | 53.0          | 50.3               | 39.9    | 55.1  |
+| BBH             | 65.0         | 61.2              | 72.1          | 68.3               | 70.1    | 86.7  |
+| GSM8K           | 70.8         | 70.7              | 76.1          | 79.6               | 78.2    | 91.4  |
+| MATH            | 20.2         | 23.0              | 25.5          | 31.9               | 28.0    | 45.8  |
+| HumanEval       | 43.3         | 59.8              | 48.8          | 67.1               | 73.2    | 74.4  |
+| MBPP(Sanitized) | 51.8         | 51.4              | 63.0          | 65.8               | 78.9    | 79.0  |
 
 - The evaluation results were obtained from [OpenCompass](https://github.com/open-compass/opencompass) , and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/open-compass/opencompass).
 - The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/open-compass/opencompass), so please refer to the latest evaluation results of [OpenCompass](https://github.com/open-compass/opencompass).
diff --git a/model_cards/internlm_20b.md b/model_cards/internlm_20b.md
index 7feb536..3c1e6a2 100644
--- a/model_cards/internlm_20b.md
+++ b/model_cards/internlm_20b.md
@@ -13,45 +13,45 @@ In terms of model structure, InternLM-20B opted for a deeper architecture, with
 
 ## Model Zoo
 
-| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
-|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
-| **InternLM Chat 20B**     | [🤗internlm/internlm-chat-20b](https://huggingface.co/internlm/internlm-20b-chat)         | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b-chat/summary)         | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b)     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b-original)     | 2023-12-12   |
-| **InternLM 20B** | [🤗internlm/internlm-20b](https://huggingface.co/internlm/internlm-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b-original) | 2023-09-20 |
+| Model                 | Transformers(HF)                           | ModelScope(HF)                            | OpenXLab(HF)                            | OpenXLab(Original)                            | Release Date |
+| --------------------- | ------------------------------------------ | ----------------------------------------- | --------------------------------------- | --------------------------------------------- | ------------ |
+| **InternLM Chat 20B** | [🤗internlm/internlm-chat-20b](https://huggingface.co/internlm/internlm-20b-chat) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b-chat/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-20b-original) | 2023-12-12   |
+| **InternLM 20B**      | [🤗internlm/internlm-20b](https://huggingface.co/internlm/internlm-20b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-20b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-20b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-20b-original) | 2023-09-20   |
 
 ## Performance Evaluation
 
 On the 5 capability dimensions proposed by OpenCompass, InternLM-20B has achieved excellent results (the bolded scores represent the best performances within the 13B-33B parameter range).
 
-| Capability | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
-|----------|-----------|------------|---------------|--------------|-----------|-----------|------------|
-| Language     | 42.5      | 47         | 47.5          | **55**           | 44.6      | 47.1      | 51.6       |
-| Knowledge     | 58.2      | 58.3       | 48.9          | 60.1         | **64**        | 66        | 67.7       |
-| Understanding     | 45.5      | 50.9       | 58.1          | **67.3**         | 50.6      | 54.2      | 60.8       |
-| Reasoning     | 42.7      | 43.6       | 44.2          | **54.9**         | 46.4      | 49.8      | 55         |
-| Examination     | 37.3      | 45.2       | 51.8          | **62.5**         | 47.4      | 49.7      | 57.3       |
-| Overall   | 43.8      | 47.3       | 49.4          | **59.2**         | 48.9      | 51.9      | 57.4       |
+| Capability    | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
+| ------------- | --------- | ---------- | ------------- | ------------ | --------- | --------- | ---------- |
+| Language      | 42.5      | 47         | 47.5          | **55**       | 44.6      | 47.1      | 51.6       |
+| Knowledge     | 58.2      | 58.3       | 48.9          | 60.1         | **64**    | 66        | 67.7       |
+| Understanding | 45.5      | 50.9       | 58.1          | **67.3**     | 50.6      | 54.2      | 60.8       |
+| Reasoning     | 42.7      | 43.6       | 44.2          | **54.9**     | 46.4      | 49.8      | 55         |
+| Examination   | 37.3      | 45.2       | 51.8          | **62.5**     | 47.4      | 49.7      | 57.3       |
+| Overall       | 43.8      | 47.3       | 49.4          | **59.2**     | 48.9      | 51.9      | 57.4       |
 
 The table below compares the performance of mainstream open-source models on some influential and typical datasets.
 
-|      | Benchmarks           | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
-|------|------------------|-----------|------------|---------------|--------------|-----------|-----------|------------|
-| Examination | MMLU             | 47.73     | 54.99      | 59.55         | **62.05**        | 58.73     | 63.71     | 69.75      |
-|      | C-Eval (val)     | 31.83     | 41.4       | **59.01**         | 58.8         | 37.47     | 40.36     | 50.13      |
-|      | AGI-Eval         | 22.03     | 30.93      | 37.37         | **44.58**        | 33.53     | 33.92     | 40.02      |
-| Knowledge | BoolQ            | 78.75     | 82.42      | 67            | **87.46**        | 84.43     | 86.61     | 87.74      |
-|      | TriviaQA         | 52.47     | 59.36      | 46.61         | 57.26        | **66.24**     | 69.79     | 70.71      |
-|      | NaturalQuestions | 20.17     | 24.85      | 16.32         | 25.15        | **30.89**     | 33.41     | 34.16      |
-| Understanding | CMRC             | 9.26      | 31.59      | 29.85         | **68.78**        | 14.17     | 34.73     | 43.74      |
-|      | CSL              | 55        | 58.75      | 63.12         | **65.62**        | 57.5      | 59.38     | 60         |
-|      | RACE (middle)    | 53.41     | 63.02      | 68.94         | **86.35**        | 64.55     | 72.35     | 81.55      |
-|      | RACE (high)      | 47.63     | 58.86      | 67.18         | **83.28**        | 62.61     | 68.01     | 79.93      |
-|      | XSum             | 20.37     | 23.37      | 25.23         | **35.54**        | 20.55     | 19.91     | 25.38      |
-| Reasoning | WinoGrande       | 64.64     | 64.01      | 67.32         | **69.38**        | 66.85     | 69.38     | 69.77      |
-|      | BBH              | 37.93     | 45.62      | 48.98         | **52.51**        | 49.98     | 58.38     | 64.91      |
-|      | GSM8K            | 20.32     | 29.57      | **52.62**         | **52.62**        | 42.3      | 54.44     | 63.31      |
-|      | PIQA             | 79.71     | 79.76      | 78.07         | 80.25        | **81.34**     | 82.15     | 82.54      |
-| Programming | HumanEval        | 14.02     | 18.9       | 17.07         | **25.61**        | 17.68     | 18.9      | 26.22      |
-|      | MBPP             | 20.6      | 26.8       | 30.8          | **35.6**         | 28.4      | 33.6      | 39.6       |
+|               | Benchmarks       | Llama-13B | Llama2-13B | Baichuan2-13B | InternLM-20B | Llama-33B | Llama-65B | Llama2-70B |
+| ------------- | ---------------- | --------- | ---------- | ------------- | ------------ | --------- | --------- | ---------- |
+| Examination   | MMLU             | 47.73     | 54.99      | 59.55         | **62.05**    | 58.73     | 63.71     | 69.75      |
+|               | C-Eval (val)     | 31.83     | 41.4       | **59.01**     | 58.8         | 37.47     | 40.36     | 50.13      |
+|               | AGI-Eval         | 22.03     | 30.93      | 37.37         | **44.58**    | 33.53     | 33.92     | 40.02      |
+| Knowledge     | BoolQ            | 78.75     | 82.42      | 67            | **87.46**    | 84.43     | 86.61     | 87.74      |
+|               | TriviaQA         | 52.47     | 59.36      | 46.61         | 57.26        | **66.24** | 69.79     | 70.71      |
+|               | NaturalQuestions | 20.17     | 24.85      | 16.32         | 25.15        | **30.89** | 33.41     | 34.16      |
+| Understanding | CMRC             | 9.26      | 31.59      | 29.85         | **68.78**    | 14.17     | 34.73     | 43.74      |
+|               | CSL              | 55        | 58.75      | 63.12         | **65.62**    | 57.5      | 59.38     | 60         |
+|               | RACE (middle)    | 53.41     | 63.02      | 68.94         | **86.35**    | 64.55     | 72.35     | 81.55      |
+|               | RACE (high)      | 47.63     | 58.86      | 67.18         | **83.28**    | 62.61     | 68.01     | 79.93      |
+|               | XSum             | 20.37     | 23.37      | 25.23         | **35.54**    | 20.55     | 19.91     | 25.38      |
+| Reasoning     | WinoGrande       | 64.64     | 64.01      | 67.32         | **69.38**    | 66.85     | 69.38     | 69.77      |
+|               | BBH              | 37.93     | 45.62      | 48.98         | **52.51**    | 49.98     | 58.38     | 64.91      |
+|               | GSM8K            | 20.32     | 29.57      | **52.62**     | **52.62**    | 42.3      | 54.44     | 63.31      |
+|               | PIQA             | 79.71     | 79.76      | 78.07         | 80.25        | **81.34** | 82.15     | 82.54      |
+| Programming   | HumanEval        | 14.02     | 18.9       | 17.07         | **25.61**    | 17.68     | 18.9      | 26.22      |
+|               | MBPP             | 20.6      | 26.8       | 30.8          | **35.6**     | 28.4      | 33.6      | 39.6       |
 
 Overall, InternLM-20B comprehensively outperforms open-source models in the 13B parameter range in terms of overall capabilities, and on inference evaluation sets, it approaches or even surpasses the performance of Llama-65B.
 
diff --git a/model_cards/internlm_7b.md b/model_cards/internlm_7b.md
index ad926a3..228de3b 100644
--- a/model_cards/internlm_7b.md
+++ b/model_cards/internlm_7b.md
@@ -10,27 +10,27 @@ InternLM-7B contains a 7 billion parameter base model and a chat model tailored
 
 ## Model Zoo
 
-| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | OpenXLab(Original) | Release Date |
-|---------------------------|------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
-| **InternLM Chat 7B**      | [🤗internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b)           | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b)      | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-original)      | 2023-12-12   |
-| **InternLM 7B**           | [🤗internlm/internlm-7b](https://huggingface.co/internlm/internlm-7b)                     | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary)                     | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b)           | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b-original)           | 2023-07-06   |
+| Model                | Transformers(HF)                            | ModelScope(HF)                            | OpenXLab(HF)                            | OpenXLab(Original)                            | Release Date |
+| -------------------- | ------------------------------------------- | ----------------------------------------- | --------------------------------------- | --------------------------------------------- | ------------ |
+| **InternLM Chat 7B** | [🤗internlm/internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-chat-7b-original) | 2023-12-12   |
+| **InternLM 7B**      | [🤗internlm/internlm-7b](https://huggingface.co/internlm/internlm-7b) | [<img src="../assets/modelscope_logo.png" width="20px" /> Shanghai_AI_Laboratory/internlm-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-7b/summary) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b) | [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/header/openxlab_models.svg)](https://openxlab.org.cn/models/detail/OpenLMLab/InternLM-7b-original) | 2023-07-06   |
 
 ## Performance Evaluation
 
 We conducted a comprehensive evaluation of InternLM using the open-source evaluation tool [OpenCompass](https://github.com/internLM/OpenCompass/). The evaluation covered five dimensions of capabilities: disciplinary competence, language competence, knowledge competence, inference competence, and comprehension competence. Here are some of the evaluation results, and you can visit the [OpenCompass leaderboard](https://opencompass.org.cn/rank) for more evaluation results.
 
-| Datasets\Models | **InternLM-Chat-7B** | **InternLM-7B** | LLaMA-7B | Baichuan-7B | ChatGLM2-6B | Alpaca-7B | Vicuna-7B |
-| --------------- | -------------------------- | --------------------- | -------- | ----------- | ----------- | --------- | --------- |
-| C-Eval(Val)     | 52.0                       | 53.4                  | 24.2     | 42.7        | 50.9        | 28.9      | 31.2      |
-| MMLU            | 52.6                       | 51.0                  | 35.2*    | 41.5        | 46.0        | 39.7      | 47.3      |
-| AGIEval         | 46.4                       | 37.6                  | 20.8     | 24.6        | 39.0        | 24.1      | 26.4      |
-| CommonSenseQA   | 80.8                       | 59.5                  | 65.0     | 58.8        | 60.0        | 68.7      | 66.7      |
-| BUSTM           | 80.6                       | 50.6                  | 48.5     | 51.3        | 55.0        | 48.8      | 62.5      |
-| CLUEWSC         | 81.8                       | 59.1                  | 50.3     | 52.8        | 59.8        | 50.3      | 52.2      |
-| MATH            | 5.0                        | 7.1                   | 2.8      | 3.0         | 6.6         | 2.2       | 2.8       |
-| GSM8K           | 36.2                       | 31.2                  | 10.1     | 9.7         | 29.2        | 6.0       | 15.3      |
-| HumanEval       | 15.9                       | 10.4                  | 14.0     | 9.2         | 9.2         | 9.2       | 11.0      |
-| RACE(High)      | 80.3                       | 57.4                  | 46.9*    | 28.1        | 66.3        | 40.7      | 54.0      |
+| Datasets\\Models | **InternLM-Chat-7B** | **InternLM-7B** | LLaMA-7B | Baichuan-7B | ChatGLM2-6B | Alpaca-7B | Vicuna-7B |
+| ---------------- | -------------------- | --------------- | -------- | ----------- | ----------- | --------- | --------- |
+| C-Eval(Val)      | 52.0                 | 53.4            | 24.2     | 42.7        | 50.9        | 28.9      | 31.2      |
+| MMLU             | 52.6                 | 51.0            | 35.2\*   | 41.5        | 46.0        | 39.7      | 47.3      |
+| AGIEval          | 46.4                 | 37.6            | 20.8     | 24.6        | 39.0        | 24.1      | 26.4      |
+| CommonSenseQA    | 80.8                 | 59.5            | 65.0     | 58.8        | 60.0        | 68.7      | 66.7      |
+| BUSTM            | 80.6                 | 50.6            | 48.5     | 51.3        | 55.0        | 48.8      | 62.5      |
+| CLUEWSC          | 81.8                 | 59.1            | 50.3     | 52.8        | 59.8        | 50.3      | 52.2      |
+| MATH             | 5.0                  | 7.1             | 2.8      | 3.0         | 6.6         | 2.2       | 2.8       |
+| GSM8K            | 36.2                 | 31.2            | 10.1     | 9.7         | 29.2        | 6.0       | 15.3      |
+| HumanEval        | 15.9                 | 10.4            | 14.0     | 9.2         | 9.2         | 9.2       | 11.0      |
+| RACE(High)       | 80.3                 | 57.4            | 46.9\*   | 28.1        | 66.3        | 40.7      | 54.0      |
 
-- The evaluation results were obtained from [OpenCompass 20230706](https://github.com/internLM/OpenCompass/) (some data marked with *, which means come from the original papers), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/).
+- The evaluation results were obtained from [OpenCompass 20230706](https://github.com/internLM/OpenCompass/) (some data marked with \*, which means come from the original papers), and evaluation configuration can be found in the configuration files provided by [OpenCompass](https://github.com/internLM/OpenCompass/).
 - The evaluation data may have numerical differences due to the version iteration of [OpenCompass](https://github.com/internLM/OpenCompass/), so please refer to the latest evaluation results of [OpenCompass](https://github.com/internLM/OpenCompass/).
diff --git a/requirements.txt b/requirements.txt
index 4f88e70..5e44167 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
-transformers>=4.34
 sentencepiece
+transformers>=4.34
diff --git a/tests/test_hf_model.py b/tests/test_hf_model.py
index 897b205..2f7a06b 100644
--- a/tests/test_hf_model.py
+++ b/tests/test_hf_model.py
@@ -2,33 +2,32 @@ import pytest
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-prompts = ["你好", "what's your name"]
+prompts = ['你好', "what's your name"]
 
 
 def assert_model(response):
     assert len(response) != 0
-    assert "UNUSED_TOKEN" not in response
+    assert 'UNUSED_TOKEN' not in response
 
 
 class TestChat:
-    """
-    Test cases for chat model.
-    """
+    """Test cases for chat model."""
 
     @pytest.mark.parametrize(
-        "model_name",
+        'model_name',
         [
-            "internlm/internlm2-chat-7b",
-            "internlm/internlm2-chat-7b-sft",
+            'internlm/internlm2-chat-7b',
+            'internlm/internlm2-chat-7b-sft',
         ],
     )
     def test_demo_default(self, model_name):
-        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_name,
+                                                  trust_remote_code=True)
         # Set `torch_dtype=torch.float16` to load model in float16, otherwise
         # it will be loaded as float32 and might cause OOM Error.
         model = AutoModelForCausalLM.from_pretrained(
-            model_name, torch_dtype=torch.float16, trust_remote_code=True
-        ).cuda()
+            model_name, torch_dtype=torch.float16,
+            trust_remote_code=True).cuda()
         model = model.eval()
         for prompt in prompts:
             response, history = model.chat(tokenizer, prompt, history=[])
@@ -37,43 +36,45 @@ class TestChat:
 
         for prompt in prompts:
             length = 0
-            for response, history in model.stream_chat(tokenizer, prompt, history=[]):
-                print(response[length:], flush=True, end="")
+            for response, history in model.stream_chat(tokenizer,
+                                                       prompt,
+                                                       history=[]):
+                print(response[length:], flush=True, end='')
                 length = len(response)
             assert_model(response)
 
 
 class TestBase:
-    """
-    Test cases for base model.
-    """
+    """Test cases for base model."""
 
     @pytest.mark.parametrize(
-        "model_name",
+        'model_name',
         [
-            "internlm/internlm2-7b",
-            "internlm/internlm2-base-7b",
+            'internlm/internlm2-7b',
+            'internlm/internlm2-base-7b',
         ],
     )
     def test_demo_default(self, model_name):
-        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_name,
+                                                  trust_remote_code=True)
         # Set `torch_dtype=torch.float16` to load model in float16, otherwise
         # it will be loaded as float32 and might cause OOM Error.
         model = AutoModelForCausalLM.from_pretrained(
-            model_name, torch_dtype=torch.float16, trust_remote_code=True
-        ).cuda()
+            model_name, torch_dtype=torch.float16,
+            trust_remote_code=True).cuda()
         for prompt in prompts:
-            inputs = tokenizer(prompt, return_tensors="pt")
+            inputs = tokenizer(prompt, return_tensors='pt')
             for k, v in inputs.items():
                 inputs[k] = v.cuda()
             gen_kwargs = {
-                "max_length": 128,
-                "top_p": 10,
-                "temperature": 1.0,
-                "do_sample": True,
-                "repetition_penalty": 1.0,
+                'max_length': 128,
+                'top_p': 10,
+                'temperature': 1.0,
+                'do_sample': True,
+                'repetition_penalty': 1.0,
             }
             output = model.generate(**inputs, **gen_kwargs)
-            output = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
+            output = tokenizer.decode(output[0].tolist(),
+                                      skip_special_tokens=True)
             print(output)
             assert_model(output)
diff --git a/tools/README.md b/tools/README.md
index 37bf7af..256da11 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -5,6 +5,7 @@
 We offer the `convert2llama.py`, designed to seamlessly transform InternLM2 (HF format) into LLaMA (HF format). Here, HF refers to the format used by HuggingFace Transformers.
 
 ### Usage
+
 ```
 python convert2llama.py --src /path/to/internlm2/ckpt --tgt /path/to/target/ckpt
 ```
diff --git a/tools/convert2llama.py b/tools/convert2llama.py
index 7e156da..48368b7 100644
--- a/tools/convert2llama.py
+++ b/tools/convert2llama.py
@@ -12,18 +12,18 @@ from transformers import AutoConfig, LlamaConfig, LlamaTokenizer
 def save_conifg(config, tgt):
     config_dict = config.to_dict()
     unnecessary_keys = [
-        "_name_or_path",
-        "auto_map",
-        "transformers_version",
-        "model_type",
-        "architectures",
-        "tokenizer_class",
-        "attn_implementation",
+        '_name_or_path',
+        'auto_map',
+        'transformers_version',
+        'model_type',
+        'architectures',
+        'tokenizer_class',
+        'attn_implementation',
     ]
     for k in unnecessary_keys:
         config_dict.pop(k, None)
-    config_dict["attention_bias"] = config_dict.pop("bias")
-    config_dict["architectures"] = ["LlamaForCausalLM"]
+    config_dict['attention_bias'] = config_dict.pop('bias')
+    config_dict['architectures'] = ['LlamaForCausalLM']
     llama_config = LlamaConfig(**config_dict)
     llama_config.save_pretrained(tgt)
 
@@ -31,106 +31,109 @@ def save_conifg(config, tgt):
 def convert(src, tgt):
     """Convert InternLM2 huggingface checkpoints to Llama-style."""
 
-    print("Convert InternLM2 huggingface checkpoints to Llama...")
+    print('Convert InternLM2 huggingface checkpoints to Llama...')
 
     config = AutoConfig.from_pretrained(src, trust_remote_code=True)
-    assert not config.bias, "Cannot convert InternLM Model with bias to LLaMA."
+    assert not config.bias, 'Cannot convert InternLM Model with bias to LLaMA.'
 
     head_dim = config.hidden_size // config.num_attention_heads
-    num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
+    num_key_value_groups = config.num_attention_heads \
+        // config.num_key_value_heads
 
     # load index json file
-    index_file = os.path.join(src, "pytorch_model.bin.index.json")
+    index_file = os.path.join(src, 'pytorch_model.bin.index.json')
     if os.path.exists(index_file):
         with open(index_file) as fp:
             index_dict = json.load(fp)
-            index_dict["weight_map"] = {}
+            index_dict['weight_map'] = {}
     else:
         index_dict = None
 
     os.makedirs(tgt, exist_ok=True)
     for filename in tqdm(os.listdir(src)):
-        if not filename.endswith(".bin"):
+        if not filename.endswith('.bin'):
             continue
         states = torch.load(os.path.join(src, filename))
         llama_states = {}
         for k, v in states.copy().items():
-            if "wqkv" in k:
+            if 'wqkv' in k:
                 v = rearrange(
                     v,
-                    "(h gs d) dim -> h gs d dim",
+                    '(h gs d) dim -> h gs d dim',
                     gs=2 + num_key_value_groups,
                     d=head_dim,
                 )
-                wq, wk, wv = torch.split(v, [num_key_value_groups, 1, 1], dim=1)
-                wq = rearrange(wq, "h gs d dim -> (h gs d) dim")
-                wk = rearrange(wk, "h gs d dim -> (h gs d) dim")
-                wv = rearrange(wv, "h gs d dim -> (h gs d) dim")
-                _prefix = k.split("attention")[0]
-                wq_key = _prefix + "self_attn.q_proj.weight"
-                wk_key = _prefix + "self_attn.k_proj.weight"
-                wv_key = _prefix + "self_attn.v_proj.weight"
+                wq, wk, wv = torch.split(v, [num_key_value_groups, 1, 1],
+                                         dim=1)
+                wq = rearrange(wq, 'h gs d dim -> (h gs d) dim')
+                wk = rearrange(wk, 'h gs d dim -> (h gs d) dim')
+                wv = rearrange(wv, 'h gs d dim -> (h gs d) dim')
+                _prefix = k.split('attention')[0]
+                wq_key = _prefix + 'self_attn.q_proj.weight'
+                wk_key = _prefix + 'self_attn.k_proj.weight'
+                wv_key = _prefix + 'self_attn.v_proj.weight'
                 llama_states[wq_key] = wq.clone()
                 llama_states[wk_key] = wk.clone()
                 llama_states[wv_key] = wv.clone()
 
-            elif "attention.wo" in k:
-                new_k = k.replace("attention.wo", "self_attn.o_proj")
+            elif 'attention.wo' in k:
+                new_k = k.replace('attention.wo', 'self_attn.o_proj')
                 llama_states[new_k] = v
-            elif "feed_forward.w1" in k:
-                new_k = k.replace("feed_forward.w1", "mlp.gate_proj")
+            elif 'feed_forward.w1' in k:
+                new_k = k.replace('feed_forward.w1', 'mlp.gate_proj')
                 llama_states[new_k] = v
-            elif "feed_forward.w2" in k:
-                new_k = k.replace("feed_forward.w2", "mlp.down_proj")
+            elif 'feed_forward.w2' in k:
+                new_k = k.replace('feed_forward.w2', 'mlp.down_proj')
                 llama_states[new_k] = v
-            elif "feed_forward.w3" in k:
-                new_k = k.replace("feed_forward.w3", "mlp.up_proj")
+            elif 'feed_forward.w3' in k:
+                new_k = k.replace('feed_forward.w3', 'mlp.up_proj')
                 llama_states[new_k] = v
-            elif "attention_norm" in k:
-                new_k = k.replace("attention_norm", "input_layernorm")
+            elif 'attention_norm' in k:
+                new_k = k.replace('attention_norm', 'input_layernorm')
                 llama_states[new_k] = v
-            elif "ffn_norm" in k:
-                new_k = k.replace("ffn_norm", "post_attention_layernorm")
+            elif 'ffn_norm' in k:
+                new_k = k.replace('ffn_norm', 'post_attention_layernorm')
                 llama_states[new_k] = v
-            elif "tok_embeddings" in k:
-                llama_states["model.embed_tokens.weight"] = v
-            elif "output" in k:
-                llama_states["lm_head.weight"] = v
+            elif 'tok_embeddings' in k:
+                llama_states['model.embed_tokens.weight'] = v
+            elif 'output' in k:
+                llama_states['lm_head.weight'] = v
             else:
                 llama_states[k] = v
 
         if index_dict is not None:
             for k in llama_states:
-                index_dict["weight_map"][k] = filename
+                index_dict['weight_map'][k] = filename
         print(f"Saving to {os.path.join(tgt, filename)}...", flush=True)
         torch.save(llama_states, os.path.join(tgt, filename))
         del states
 
-    print("Saving config and tokenizer...")
+    print('Saving config and tokenizer...')
     # index.json
     if index_dict is not None:
-        with open(os.path.join(tgt, "pytorch_model.bin.index.json"), "w") as fp:
+        with open(os.path.join(tgt, 'pytorch_model.bin.index.json'),
+                  'w') as fp:
             json.dump(index_dict, fp, indent=2)
     # tokenizer
     tokenizer = LlamaTokenizer.from_pretrained(src)
-    tokenizer.init_kwargs.pop("auto_map", None)
+    tokenizer.init_kwargs.pop('auto_map', None)
     tokenizer.save_pretrained(tgt)
     # config
     save_conifg(config, tgt)
-    print("Done!")
+    print('Done!')
 
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("--src", type=str, help="Input folder")
-    parser.add_argument("--tgt", type=str, help="Output folder")
+    parser.add_argument('--src', type=str, help='Input folder')
+    parser.add_argument('--tgt', type=str, help='Output folder')
 
     args = parser.parse_args()
 
     return args
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
     args = parse_args()
 
     convert(args.src, args.tgt)