Add Docker start

2023-09-19 17:18:30 +08:00 · 2023-09-19 17:18:30 +08:00 · b2a4fb6648
parent 3d0225f969
commit b2a4fb6648
8 changed files with 91 additions and 1 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,2 @@
+.env
+/THUDM
--- a/.env
+++ b/.env
@ -0,0 +1,7 @@
+# Start 
+
+# 默认启动webui
+START_MODE=web_demo.py
+
+# 启动Api
+# START_MODE=api.py
--- a/.env.template
+++ b/.env.template
@ -0,0 +1,7 @@
+# Start 
+
+# 默认启动webui
+START_MODE=web_demo.py
+
+# 启动Api
+# START_MODE=api.py
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+.env
+/THUDM
--- a/14
+++ b/14
@ -0,0 +1,14 @@
+FROM nvcr.io/nvidia/pytorch:23.06-py3
+
+WORKDIR /app
+
+# RUN git clone --depth=1 https://github.com/THUDM/ChatGLM2-6B.git /app
+
+COPY . .
+
+RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
+    pip install -r requirements.txt
+
+EXPOSE 7860 8000
+
+CMD python ${CLI_ARGS}
--- a/README.md
+++ b/README.md
@ -279,7 +279,22 @@ if __name__ == "__main__":
        if hasattr(chunk.choices[0].delta, "content"):
            print(chunk.choices[0].delta.content, end="", flush=True)
 ```
+### Docker 部署

+复制一份配置文件 `.env.template` 到`.env`，默认启动模式为`webui`
+
+如前文所述，在`THUDM\chatglm2-6b`文件夹放置好对应的模型文件
+
+编译镜像，启动程序并加载`int4`模型
+> 显存小于`8G`可以使用此启动方式
+```
+docker-compose --profile int4 up -d
+```
+编译镜像，启动程序并加载完整模型
+> 大约需要`13G`显存
+```
+docker-compose --profile int16 up -d
+```

 ## 低成本部署

--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,43 @@
+version: "3.3"
+services:
+  chatglm2-6b:
+    profiles: ["int16"]
+    build:
+      context: .
+    ports:
+      - 7860:7860
+      - 8000:8000
+    stdin_open: true
+    tty: true
+    volumes:
+      - ./THUDM/chatglm2-6b:/app/THUDM/chatglm2-6b
+    environment:
+      - CLI_ARGS=$START_MODE
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
+
+  chatglm2-6b-int4:
+    profiles: ["int4"]
+    build:
+      context: .
+    ports:
+      - 7860:7860
+      - 8000:8000
+    stdin_open: true
+    tty: true
+    volumes:
+      - ./THUDM/chatglm2-6b-int4:/app/THUDM/chatglm2-6b
+    environment:
+      - CLI_ARGS=$START_MODE
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
--- a/web_demo.py
+++ b/web_demo.py
@ -105,4 +105,4 @@ with gr.Blocks() as demo:

    emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)

-demo.queue().launch(share=False, inbrowser=True)
+demo.queue().launch(server_name="0.0.0.0",share=False, inbrowser=True)