mirror of https://github.com/THUDM/ChatGLM2-6B
Add Docker start
parent
3d0225f969
commit
b2a4fb6648
|
@ -0,0 +1,2 @@
|
|||
.env
|
||||
/THUDM
|
|
@ -0,0 +1,7 @@
|
|||
# Start
|
||||
|
||||
# 默认启动webui
|
||||
START_MODE=web_demo.py
|
||||
|
||||
# 启动Api
|
||||
# START_MODE=api.py
|
|
@ -0,0 +1,7 @@
|
|||
# Start
|
||||
|
||||
# 默认启动webui
|
||||
START_MODE=web_demo.py
|
||||
|
||||
# 启动Api
|
||||
# START_MODE=api.py
|
|
@ -0,0 +1,2 @@
|
|||
.env
|
||||
/THUDM
|
|
@ -0,0 +1,14 @@
|
|||
FROM nvcr.io/nvidia/pytorch:23.06-py3
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# RUN git clone --depth=1 https://github.com/THUDM/ChatGLM2-6B.git /app
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
|
||||
pip install -r requirements.txt
|
||||
|
||||
EXPOSE 7860 8000
|
||||
|
||||
CMD python ${CLI_ARGS}
|
15
README.md
15
README.md
|
@ -279,7 +279,22 @@ if __name__ == "__main__":
|
|||
if hasattr(chunk.choices[0].delta, "content"):
|
||||
print(chunk.choices[0].delta.content, end="", flush=True)
|
||||
```
|
||||
### Docker 部署
|
||||
|
||||
复制一份配置文件 `.env.template` 到`.env`,默认启动模式为`webui`
|
||||
|
||||
如前文所述,在`THUDM\chatglm2-6b`文件夹放置好对应的模型文件
|
||||
|
||||
编译镜像,启动程序并加载`int4`模型
|
||||
> 显存小于`8G`可以使用此启动方式
|
||||
```
|
||||
docker-compose --profile int4 up -d
|
||||
```
|
||||
编译镜像,启动程序并加载完整模型
|
||||
> 大约需要`13G`显存
|
||||
```
|
||||
docker-compose --profile int16 up -d
|
||||
```
|
||||
|
||||
## 低成本部署
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
version: "3.3"
|
||||
services:
|
||||
chatglm2-6b:
|
||||
profiles: ["int16"]
|
||||
build:
|
||||
context: .
|
||||
ports:
|
||||
- 7860:7860
|
||||
- 8000:8000
|
||||
stdin_open: true
|
||||
tty: true
|
||||
volumes:
|
||||
- ./THUDM/chatglm2-6b:/app/THUDM/chatglm2-6b
|
||||
environment:
|
||||
- CLI_ARGS=$START_MODE
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['0']
|
||||
capabilities: [gpu]
|
||||
|
||||
chatglm2-6b-int4:
|
||||
profiles: ["int4"]
|
||||
build:
|
||||
context: .
|
||||
ports:
|
||||
- 7860:7860
|
||||
- 8000:8000
|
||||
stdin_open: true
|
||||
tty: true
|
||||
volumes:
|
||||
- ./THUDM/chatglm2-6b-int4:/app/THUDM/chatglm2-6b
|
||||
environment:
|
||||
- CLI_ARGS=$START_MODE
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['0']
|
||||
capabilities: [gpu]
|
|
@ -105,4 +105,4 @@ with gr.Blocks() as demo:
|
|||
|
||||
emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
|
||||
|
||||
demo.queue().launch(share=False, inbrowser=True)
|
||||
demo.queue().launch(server_name="0.0.0.0",share=False, inbrowser=True)
|
||||
|
|
Loading…
Reference in New Issue