feat(cli): add parameters for cli

--cpu: use cpu mode, force to use cpu for mac
--local: use local model
--showTime: show time consuming
pull/151/head
kingzeus 2023-03-19 12:45:27 +08:00
parent 6cda36633e
commit 5558a26371
1 changed files with 35 additions and 4 deletions

View File

@ -1,13 +1,40 @@
import os
import platform
import argparse
import time
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
model = model.eval()
parser = argparse.ArgumentParser(description='cli demo')
parser.add_argument('--cpu', action='store_true', help='cpu mode')
parser.add_argument('--showTime', action='store_true', help='show time consuming')
parser.add_argument('--local', action='store_true',help='using local models,default path:/models/chatglm-6b')
args = parser.parse_args()
os_name = platform.system()
# mac: force use cpu
if os_name == 'Darwin':
args.cpu = True
model_name = "THUDM/chatglm-6b"
if args.local:
model_name = "./models/chatglm-6b"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
if(args.cpu):
model = model.float()
else:
model = model.half().cuda()
model = model.eval()
history = []
print("欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序")
while True:
@ -20,5 +47,9 @@ while True:
os.system(command)
print("欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序")
continue
timeStart = time.perf_counter()
response, history = model.chat(tokenizer, query, history=history)
print(f"ChatGLM-6B{response}")
timeEnd = time.perf_counter()
showTime="({timeEnd - timeStart:0.4f}s)" if args.showTime else ""
print(f"ChatGLM-6B {showTime}{response}")