From 9e60ea0b64790b21345a29ceec778d4e31ce41d5 Mon Sep 17 00:00:00 2001 From: RangiLyu Date: Fri, 26 Jan 2024 17:48:14 +0800 Subject: [PATCH] doc: update requirements --- README.md | 14 ++++++++++---- README_zh-CN.md | 14 ++++++++++---- chat/README.md | 4 ++-- chat/README_zh-CN.md | 4 ++-- chat/lmdeploy.md | 2 +- chat/lmdeploy_zh_cn.md | 2 +- finetune/README.md | 2 +- finetune/README_zh-CN.md | 2 +- 8 files changed, 28 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index da3ed09..bdef9d9 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,12 @@ The release of InternLM2 series contains two model sizes: 7B and 20B. 7B models - According to the released performance of 2024-01-17. +## Requirements + +- Python >= 3.8 +- PyTorch >= 1.12.0 (2.0.0 and above are recommended) +- Transformers >= 4.34 + ## Usages We briefly show the usages with [Transformers](#import-from-transformers), [ModelScope](#import-from-modelscope), and [Web demos](#dialogue). @@ -143,7 +149,7 @@ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_re # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error. model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto", trust_remote_code=True, torch_dtype=torch.float16) # (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes. - # InternLM 7B in 4bit will cost nearly 8GB GPU memory. + # InternLM 7B in 4bit will cost nearly 8GB GPU memory. # pip install -U bitsandbytes # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True) # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True) @@ -167,7 +173,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto", trust_re # Set `torch_dtype=torch.float16` to load model in float16, otherwise it will be loaded as float32 and might cause OOM Error. model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16) # (Optional) If on low resource devices, you can load model in 4-bit or 8-bit to further save GPU memory via bitsandbytes. - # InternLM 7B in 4bit will cost nearly 8GB GPU memory. + # InternLM 7B in 4bit will cost nearly 8GB GPU memory. # pip install -U bitsandbytes # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True) # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True) @@ -183,7 +189,7 @@ print(response) You can interact with the InternLM Chat 7B model through a frontend interface by running the following code: ```bash -pip install streamlit==1.24.0 +pip install streamlit pip install transformers>=4.34 streamlit run ./chat/web_demo.py ``` @@ -192,7 +198,7 @@ streamlit run ./chat/web_demo.py We use [LMDeploy](https://github.com/InternLM/LMDeploy) for fast deployment of InternLM. -With only 4 lines of codes, you can perform `internlm2-chat-7b` inference after `pip install lmdeploy`. +With only 4 lines of codes, you can perform `internlm2-chat-7b` inference after `pip install lmdeploy>=0.2.1`. ```python from lmdeploy import pipeline diff --git a/README_zh-CN.md b/README_zh-CN.md index 0eb6942..56a4fd0 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -122,6 +122,12 @@ InternLM2 系列模型在本仓库正式发布,具有如下特性: - 性能数据截止2024-01-17 +## 依赖 + +- Python >= 3.8 +- PyTorch >= 1.12.0 (推荐 2.0.0 和更高版本) +- Transformers >= 4.34 + ## 使用案例 接下来我们展示使用 [Transformers](#import-from-transformers),[ModelScope](#import-from-modelscope) 和 [Web demo](#dialogue) 进行推理。 @@ -141,7 +147,7 @@ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-chat-7b", trust_re # 设置`torch_dtype=torch.float16`来将模型精度指定为torch.float16,否则可能会因为您的硬件原因造成显存不足的问题。 model = AutoModelForCausalLM.from_pretrained("internlm/internlm2-chat-7b", device_map="auto",trust_remote_code=True, torch_dtype=torch.float16) # (可选) 如果在低资源设备上,可以通过bitsandbytes加载4-bit或8-bit量化的模型,进一步节省GPU显存. - # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. + # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. # pip install -U bitsandbytes # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True) # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True) @@ -164,7 +170,7 @@ model_dir = snapshot_download('Shanghai_AI_Laboratory/internlm2-chat-7b') tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, torch_dtype=torch.float16) # (可选) 如果在低资源设备上,可以通过bitsandbytes加载4-bit或8-bit量化的模型,进一步节省GPU显存. - # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. + # 4-bit 量化的 InternLM 7B 大约会消耗 8GB 显存. # pip install -U bitsandbytes # 8-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_8bit=True) # 4-bit: model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, load_in_4bit=True) @@ -180,7 +186,7 @@ print(response) 可以通过以下代码启动一个前端的界面来与 InternLM Chat 7B 模型进行交互 ```bash -pip install streamlit==1.24.0 +pip install streamlit pip install transformers>=4.34 streamlit run ./chat/web_demo.py ``` @@ -189,7 +195,7 @@ streamlit run ./chat/web_demo.py 我们使用 [LMDeploy](https://github.com/InternLM/LMDeploy) 完成 InternLM 的一键部署。 -通过 `pip install lmdeploy` 安装 LMDeploy 之后,只需 4 行代码,就可以实现离线批处理: +通过 `pip install lmdeploy>=0.2.1` 安装 LMDeploy 之后,只需 4 行代码,就可以实现离线批处理: ```python from lmdeploy import pipeline diff --git a/chat/README.md b/chat/README.md index 48a953b..8ce04ea 100644 --- a/chat/README.md +++ b/chat/README.md @@ -51,8 +51,8 @@ print(response) You can interact with the InternLM Chat 7B model through a frontend interface by running the following code: ```bash -pip install streamlit==1.24.0 -pip install transformers==4.30.2 +pip install streamlit +pip install transformers>=4.34 streamlit run ./chat/web_demo.py ``` diff --git a/chat/README_zh-CN.md b/chat/README_zh-CN.md index 613de60..f687ee5 100644 --- a/chat/README_zh-CN.md +++ b/chat/README_zh-CN.md @@ -45,7 +45,7 @@ print(response) 可以通过以下代码启动一个前端的界面来与 InternLM2 Chat 7B 模型进行交互 ```bash -pip install streamlit==1.24.0 -pip install transformers==4.30.2 +pip install streamlit +pip install transformers>=4.34 streamlit run ./web_demo.py ``` diff --git a/chat/lmdeploy.md b/chat/lmdeploy.md index 36c7a16..daa4471 100644 --- a/chat/lmdeploy.md +++ b/chat/lmdeploy.md @@ -12,7 +12,7 @@ This article primarily highlights the basic usage of LMDeploy. For a comprehensi Install lmdeploy with pip (python 3.8+) ```shell -pip install lmdeploy +pip install lmdeploy>=0.2.1 ``` ## Offline batch inference diff --git a/chat/lmdeploy_zh_cn.md b/chat/lmdeploy_zh_cn.md index 1df7e54..c7cf44e 100644 --- a/chat/lmdeploy_zh_cn.md +++ b/chat/lmdeploy_zh_cn.md @@ -12,7 +12,7 @@ 使用 pip(python 3.8+)安装 LMDeploy ```shell -pip install lmdeploy +pip install lmdeploy>=0.2.1 ``` ## 离线批处理 diff --git a/finetune/README.md b/finetune/README.md index e5152c5..bf82a7b 100644 --- a/finetune/README.md +++ b/finetune/README.md @@ -29,7 +29,7 @@ We recommend two projects to fine-tune InternLM. - Install XTuner with DeepSpeed integration ```shell - pip install -U 'xtuner[deepspeed]' + pip install -U 'xtuner[deepspeed]>=0.1.13' ``` ### Fine-tune diff --git a/finetune/README_zh-CN.md b/finetune/README_zh-CN.md index 98a53cc..0f5a356 100644 --- a/finetune/README_zh-CN.md +++ b/finetune/README_zh-CN.md @@ -29,7 +29,7 @@ - 安装集成 DeepSpeed 版本的 XTuner ```shell - pip install -U 'xtuner[deepspeed]' + pip install -U 'xtuner[deepspeed]>=0.1.13' ``` ### 微调