feat(cli): add parameters for cli

--cpu: use cpu mode, force to use cpu for mac --local: use local model --showTime: show time consuming
2023-03-19 12:45:27 +08:00 · 2023-03-19 12:45:27 +08:00 · 5558a26371
parent 6cda36633e
commit 5558a26371
1 changed files with 35 additions and 4 deletions
--- a/cli_demo.py
+++ b/cli_demo.py
@ -1,13 +1,40 @@
 import os
 import platform
 import argparse
 import time
 from transformers import AutoTokenizer, AutoModel
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+
-model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+
-model = model.eval()
+parser = argparse.ArgumentParser(description='cli demo')
 parser.add_argument('--cpu', action='store_true', help='cpu mode')
 parser.add_argument('--showTime', action='store_true', help='show time consuming')
 parser.add_argument('--local', action='store_true',help='using local models,default path:/models/chatglm-6b')
 args = parser.parse_args()
 os_name = platform.system()
 # mac: force use cpu
 if os_name == 'Darwin':
    args.cpu = True
 model_name = "THUDM/chatglm-6b"
 if args.local:
    model_name = "./models/chatglm-6b"
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
 if(args.cpu):
    model = model.float()
 else:
    model =  model.half().cuda()
 model = model.eval()
 history = []
 print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
 while True:
@ -20,5 +47,9 @@ while True:
        os.system(command)
        print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
        continue
    timeStart = time.perf_counter()
    response, history = model.chat(tokenizer, query, history=history)
-    print(f"ChatGLM-6B：{response}")
+    timeEnd = time.perf_counter()
    showTime="({timeEnd - timeStart:0.4f}s)" if args.showTime else ""
    print(f"ChatGLM-6B {showTime}：{response}")