From 8798563db9556fb99d19e78966220c795bf432fb Mon Sep 17 00:00:00 2001 From: regmiao Date: Tue, 28 Mar 2023 17:52:19 +0800 Subject: [PATCH 1/5] add pull_model.py --- pull_model.py | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pull_model.py diff --git a/pull_model.py b/pull_model.py new file mode 100644 index 0000000..c539e05 --- /dev/null +++ b/pull_model.py @@ -0,0 +1,2 @@ +from transformers import AutoModel +model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() From 89fc500237ec7292a3291c26d086707cc3b2b831 Mon Sep 17 00:00:00 2001 From: regmiao Date: Tue, 28 Mar 2023 17:52:27 +0800 Subject: [PATCH 2/5] add Dockerfile --- Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bad6354 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime +COPY . . +RUN pip3 install -r requirements.txt +RUN python3 pull_model.py +EXPOSE 7860 +CMD [ "python","web_demo.py" ] \ No newline at end of file From 9b608e475bd7c0fdc5e354148e8d7375f5b8afd0 Mon Sep 17 00:00:00 2001 From: regmiao Date: Wed, 29 Mar 2023 17:09:21 +0800 Subject: [PATCH 3/5] add dockerfile --- Dockerfile | 6 ++++-- README.md | 6 +++++- pull_model.py | 2 -- web_demo.py | 12 ++++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) delete mode 100644 pull_model.py diff --git a/Dockerfile b/Dockerfile index bad6354..fc8858e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,8 @@ FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime COPY . . RUN pip3 install -r requirements.txt -RUN python3 pull_model.py +ENV model_path="/model" + EXPOSE 7860 -CMD [ "python","web_demo.py" ] \ No newline at end of file + +CMD [ "python","web_demo./py" ] \ No newline at end of file diff --git a/README.md b/README.md index 075ce78..0d08ca5 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,11 @@ ChatGLM-6B 使用了和 ChatGPT 相似的技术,针对中文问答和对话进 ### 环境安装 使用 pip 安装依赖:`pip install -r requirements.txt`,其中 `transformers` 库版本推荐为 `4.26.1`,但理论上不低于 `4.23.1` 即可。 - +### docker镜像部署 +```bash +docker build -t chatglm . +docker run -v /the/model/path:/model chatglm +``` ### 代码调用 可以通过如下代码调用 ChatGLM-6B 模型来生成对话: diff --git a/pull_model.py b/pull_model.py deleted file mode 100644 index c539e05..0000000 --- a/pull_model.py +++ /dev/null @@ -1,2 +0,0 @@ -from transformers import AutoModel -model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() diff --git a/web_demo.py b/web_demo.py index 88a6dc8..9d4a25a 100644 --- a/web_demo.py +++ b/web_demo.py @@ -1,10 +1,14 @@ from transformers import AutoModel, AutoTokenizer import gradio as gr - -tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) -model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() +import os +modelPath = os.getenv('model_path') +modelPath = modelPath if modelPath!="" else "THUDM/chatglm-6b" +tokenizer = AutoTokenizer.from_pretrained(modelPath, trust_remote_code=True) +model = AutoModel.from_pretrained(modelPath, trust_remote_code=True).half().cuda() model = model.eval() + + MAX_TURNS = 20 MAX_BOXES = MAX_TURNS * 2 @@ -42,4 +46,4 @@ with gr.Blocks() as demo: temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True) button = gr.Button("Generate") button.click(predict, [txt, max_length, top_p, temperature, state], [state] + text_boxes) -demo.queue().launch(share=False, inbrowser=True) +demo.queue().launch(share=False, inbrowser=True,server_name="0.0.0.0") From 30e7e8037d6555a19325674391bd8686e4ec0f05 Mon Sep 17 00:00:00 2001 From: regmiao Date: Wed, 29 Mar 2023 17:11:59 +0800 Subject: [PATCH 4/5] fix typo --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fc8858e..f43052c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,4 @@ ENV model_path="/model" EXPOSE 7860 -CMD [ "python","web_demo./py" ] \ No newline at end of file +CMD [ "python3","web_demo.py" ] \ No newline at end of file From 748c1597e9164b6532ae28f85ea29d4ee93792b9 Mon Sep 17 00:00:00 2001 From: regmiao Date: Wed, 29 Mar 2023 17:16:10 +0800 Subject: [PATCH 5/5] update Readme --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0d08ca5..d10e20a 100644 --- a/README.md +++ b/README.md @@ -34,11 +34,7 @@ ChatGLM-6B 使用了和 ChatGPT 相似的技术,针对中文问答和对话进 ### 环境安装 使用 pip 安装依赖:`pip install -r requirements.txt`,其中 `transformers` 库版本推荐为 `4.26.1`,但理论上不低于 `4.23.1` 即可。 -### docker镜像部署 -```bash -docker build -t chatglm . -docker run -v /the/model/path:/model chatglm -``` + ### 代码调用 可以通过如下代码调用 ChatGLM-6B 模型来生成对话: @@ -122,6 +118,12 @@ curl -X POST "http://127.0.0.1:8000" \ } ``` +### 在docker容器中部署 +```bash +docker build -t chatglm . +docker run -v /the/model/path:/model chatglm +``` + ## 低成本部署 ### 模型量化 默认情况下,模型以 FP16 精度加载,运行上述代码需要大概 13GB 显存。如果你的 GPU 显存有限,可以尝试以量化方式加载模型,使用方法如下: @@ -145,8 +147,6 @@ model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=Tru model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4-qe", trust_remote_code=True).half().cuda() ``` - - ### CPU 部署 如果你没有 GPU 硬件的话,也可以在 CPU 上进行推理,但是推理速度会更慢。使用方法如下(需要大概 32GB 内存) ```python