diff --git a/Dockerfile b/Dockerfile index bad6354..fc8858e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,8 @@ FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime COPY . . RUN pip3 install -r requirements.txt -RUN python3 pull_model.py +ENV model_path="/model" + EXPOSE 7860 -CMD [ "python","web_demo.py" ] \ No newline at end of file + +CMD [ "python","web_demo./py" ] \ No newline at end of file diff --git a/README.md b/README.md index 075ce78..0d08ca5 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,11 @@ ChatGLM-6B 使用了和 ChatGPT 相似的技术,针对中文问答和对话进 ### 环境安装 使用 pip 安装依赖:`pip install -r requirements.txt`,其中 `transformers` 库版本推荐为 `4.26.1`,但理论上不低于 `4.23.1` 即可。 - +### docker镜像部署 +```bash +docker build -t chatglm . +docker run -v /the/model/path:/model chatglm +``` ### 代码调用 可以通过如下代码调用 ChatGLM-6B 模型来生成对话: diff --git a/pull_model.py b/pull_model.py deleted file mode 100644 index c539e05..0000000 --- a/pull_model.py +++ /dev/null @@ -1,2 +0,0 @@ -from transformers import AutoModel -model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() diff --git a/web_demo.py b/web_demo.py index 88a6dc8..9d4a25a 100644 --- a/web_demo.py +++ b/web_demo.py @@ -1,10 +1,14 @@ from transformers import AutoModel, AutoTokenizer import gradio as gr - -tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) -model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() +import os +modelPath = os.getenv('model_path') +modelPath = modelPath if modelPath!="" else "THUDM/chatglm-6b" +tokenizer = AutoTokenizer.from_pretrained(modelPath, trust_remote_code=True) +model = AutoModel.from_pretrained(modelPath, trust_remote_code=True).half().cuda() model = model.eval() + + MAX_TURNS = 20 MAX_BOXES = MAX_TURNS * 2 @@ -42,4 +46,4 @@ with gr.Blocks() as demo: temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True) button = gr.Button("Generate") button.click(predict, [txt, max_length, top_p, temperature, state], [state] + text_boxes) -demo.queue().launch(share=False, inbrowser=True) +demo.queue().launch(share=False, inbrowser=True,server_name="0.0.0.0")