feat: use int4

2023-05-05 00:19:43 +08:00 · 2023-05-05 00:19:43 +08:00 · c81e046c1e
parent 614211d928
commit c81e046c1e
3 changed files with 7 additions and 7 deletions
--- a/api.py
+++ b/api.py
@ -50,7 +50,7 @@ async def create_item(request: Request):
 if __name__ == '__main__':
-    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-    model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+    model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
    model.eval()
    uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
--- a/cli_demo.py
+++ b/cli_demo.py
@ -3,8 +3,8 @@ import platform
 import signal
 from transformers import AutoTokenizer, AutoModel
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
 model = model.eval()
 os_name = platform.system()
--- a/web_demo.py
+++ b/web_demo.py
@ -2,8 +2,8 @@ from transformers import AutoModel, AutoTokenizer
 import gradio as gr
 import mdtex2html
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
-model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
 model = model.eval()
 """Override Chatbot.postprocess"""
@ -98,4 +98,4 @@ with gr.Blocks() as demo:
    emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
-demo.queue().launch(share=False, inbrowser=True)
+demo.queue().launch(share=False, inbrowser=False, server_name="0.0.0.0")