mirror of https://github.com/THUDM/ChatGLM-6B
feat: use int4
parent
614211d928
commit
c81e046c1e
4
api.py
4
api.py
|
@ -50,7 +50,7 @@ async def create_item(request: Request):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
|
||||
model.eval()
|
||||
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
|
||||
|
|
|
@ -3,8 +3,8 @@ import platform
|
|||
import signal
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
|
||||
model = model.eval()
|
||||
|
||||
os_name = platform.system()
|
||||
|
|
|
@ -2,8 +2,8 @@ from transformers import AutoModel, AutoTokenizer
|
|||
import gradio as gr
|
||||
import mdtex2html
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
||||
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
|
||||
model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
|
||||
model = model.eval()
|
||||
|
||||
"""Override Chatbot.postprocess"""
|
||||
|
@ -98,4 +98,4 @@ with gr.Blocks() as demo:
|
|||
|
||||
emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
|
||||
|
||||
demo.queue().launch(share=False, inbrowser=True)
|
||||
demo.queue().launch(share=False, inbrowser=False, server_name="0.0.0.0")
|
||||
|
|
Loading…
Reference in New Issue