feat(cli): add parameters for cli

--cpu: use cpu mode, force to use cpu for mac --local: use local model --showTime: show time consuming
2023-03-19 12:45:27 +08:00 · 2023-03-19 12:45:27 +08:00 · 5558a26371
parent 6cda36633e
commit 5558a26371
1 changed files with 35 additions and 4 deletions
--- a/cli_demo.py
+++ b/cli_demo.py
@ -1,13 +1,40 @@
 import os
 import platform
+import argparse
+import time
 from transformers import AutoTokenizer, AutoModel

-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
-model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
-model = model.eval()
+
+
+parser = argparse.ArgumentParser(description='cli demo')
+parser.add_argument('--cpu', action='store_true', help='cpu mode')
+parser.add_argument('--showTime', action='store_true', help='show time consuming')
+parser.add_argument('--local', action='store_true',help='using local models,default path:/models/chatglm-6b')
+
+args = parser.parse_args()

 os_name = platform.system()

+# mac: force use cpu
+if os_name == 'Darwin':
+    args.cpu = True
+
+
+model_name = "THUDM/chatglm-6b"
+if args.local:
+    model_name = "./models/chatglm-6b"
+
+
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+if(args.cpu):
+    model = model.float()
+else:
+    model =  model.half().cuda()
+model = model.eval()
+
+
+
 history = []
 print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
 while True:
@ -20,5 +47,9 @@ while True:
        os.system(command)
        print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
        continue
+    timeStart = time.perf_counter()
    response, history = model.chat(tokenizer, query, history=history)
-    print(f"ChatGLM-6B：{response}")
+    timeEnd = time.perf_counter()
+    showTime="({timeEnd - timeStart:0.4f}s)" if args.showTime else ""
+
+    print(f"ChatGLM-6B {showTime}：{response}")