From 263e5022a51aae4abb3f2d6ae38e05a9d9cac9d1 Mon Sep 17 00:00:00 2001 From: raxcl Date: Sun, 20 Aug 2023 11:30:23 +0800 Subject: [PATCH] Update web_demo.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更好的适配笔记本用户 --- web_demo.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/web_demo.py b/web_demo.py index 1af24c9..3631f7e 100644 --- a/web_demo.py +++ b/web_demo.py @@ -8,6 +8,11 @@ model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).c # 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量 # from utils import load_model_on_gpus # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2) + +# 量化int4模型示例 替换第八行即可 quantize 对应gpt内存大小(适用于gpu不够的笔记本用户,如gpu为4G,采用前者,gpu为8G,采用后者) +# model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().quantize(4).cuda() +# model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().quantize(8).cuda() + model = model.eval() """Override Chatbot.postprocess"""