From 5558a26371fc0bbd0d5466e07f6cc570916d611e Mon Sep 17 00:00:00 2001
From: kingzeus <fyfcnc@hotmail.com>
Date: Sun, 19 Mar 2023 12:45:27 +0800
Subject: [PATCH 1/4] feat(cli): add parameters for cli

--cpu: use cpu mode, force to use cpu for mac
--local: use local model
--showTime: show time consuming
---
 cli_demo.py | 39 +++++++++++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/cli_demo.py b/cli_demo.py
index d87f707..b8d3c85 100644
--- a/cli_demo.py
+++ b/cli_demo.py
@@ -1,13 +1,40 @@
 import os
 import platform
+import argparse
+import time
 from transformers import AutoTokenizer, AutoModel
 
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
-model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
-model = model.eval()
+
+
+parser = argparse.ArgumentParser(description='cli demo')
+parser.add_argument('--cpu', action='store_true', help='cpu mode')
+parser.add_argument('--showTime', action='store_true', help='show time consuming')
+parser.add_argument('--local', action='store_true',help='using local models,default path:/models/chatglm-6b')
+
+args = parser.parse_args()
 
 os_name = platform.system()
 
+# mac: force use cpu
+if os_name == 'Darwin':
+    args.cpu = True
+
+
+model_name = "THUDM/chatglm-6b"
+if args.local:
+    model_name = "./models/chatglm-6b"
+
+
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+if(args.cpu):
+    model = model.float()
+else:
+    model =  model.half().cuda()
+model = model.eval()
+
+
+
 history = []
 print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
 while True:
@@ -20,5 +47,9 @@ while True:
         os.system(command)
         print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
         continue
+    timeStart = time.perf_counter()
     response, history = model.chat(tokenizer, query, history=history)
-    print(f"ChatGLM-6B：{response}")
+    timeEnd = time.perf_counter()
+    showTime="({timeEnd - timeStart:0.4f}s)" if args.showTime else ""
+
+    print(f"ChatGLM-6B {showTime}：{response}")

From 29d06aef62eeee52fb62c7c0b1a1a7e5d7d182dc Mon Sep 17 00:00:00 2001
From: kingzeus <fyfcnc@hotmail.com>
Date: Mon, 20 Mar 2023 23:16:19 +0800
Subject: [PATCH 2/4] add  submodules for models add ignore for venv

---
 .gitignore             | 1 +
 .gitmodules            | 6 ++++++
 models/chatglm-6b      | 1 +
 models/chatglm-6b-int4 | 1 +
 4 files changed, 9 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 .gitmodules
 create mode 160000 models/chatglm-6b
 create mode 160000 models/chatglm-6b-int4

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5ceb386
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+venv
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..0093bb4
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "models/chatglm-6b-int4"]
+	path = models/chatglm-6b-int4
+	url = https://huggingface.co/THUDM/chatglm-6b-int4
+[submodule "models/chatglm-6b"]
+	path = models/chatglm-6b
+	url = https://huggingface.co/THUDM/chatglm-6b
diff --git a/models/chatglm-6b b/models/chatglm-6b
new file mode 160000
index 0000000..1b54948
--- /dev/null
+++ b/models/chatglm-6b
@@ -0,0 +1 @@
+Subproject commit 1b54948bb28de5258b55b893e193c3046a0b0484
diff --git a/models/chatglm-6b-int4 b/models/chatglm-6b-int4
new file mode 160000
index 0000000..3ba9437
--- /dev/null
+++ b/models/chatglm-6b-int4
@@ -0,0 +1 @@
+Subproject commit 3ba943724188bca4a34c7cfc3f2752c05cf75ceb

From 2aa175710fcb88c42a548d808bbbae0477d57707 Mon Sep 17 00:00:00 2001
From: kingzeus <fyfcnc@hotmail.com>
Date: Sun, 2 Apr 2023 13:45:24 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=AD=90=E6=A8=A1?=
 =?UTF-8?q?=E5=9D=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 models/chatglm-6b      | 2 +-
 models/chatglm-6b-int4 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/chatglm-6b b/models/chatglm-6b
index 1b54948..08bc851 160000
--- a/models/chatglm-6b
+++ b/models/chatglm-6b
@@ -1 +1 @@
-Subproject commit 1b54948bb28de5258b55b893e193c3046a0b0484
+Subproject commit 08bc85104db4e8da2c215a29c469218953056251
diff --git a/models/chatglm-6b-int4 b/models/chatglm-6b-int4
index 3ba9437..7458231 160000
--- a/models/chatglm-6b-int4
+++ b/models/chatglm-6b-int4
@@ -1 +1 @@
-Subproject commit 3ba943724188bca4a34c7cfc3f2752c05cf75ceb
+Subproject commit 7458231b5ac7f19cc49496c35617a4ea66f0533e

From ff6d7fbeeb337328304964123699ae434e4465cb Mon Sep 17 00:00:00 2001
From: kingzeus <fyfcnc@hotmail.com>
Date: Sun, 2 Apr 2023 16:10:52 +0800
Subject: [PATCH 4/4] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=A4=E4=BA=92?=
 =?UTF-8?q?=E5=BC=8F=E5=8F=82=E6=95=B0=E9=80=89=E9=A1=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 cli_demo.py      | 114 ++++++++++++++++++++++++++++++++++++++++++++---
 requirements.txt |   1 +
 2 files changed, 110 insertions(+), 5 deletions(-)

diff --git a/cli_demo.py b/cli_demo.py
index da80fff..d5ccf6e 100644
--- a/cli_demo.py
+++ b/cli_demo.py
@@ -3,9 +3,32 @@ import platform
 import signal
 from transformers import AutoTokenizer, AutoModel
 
-tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
-model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
-model = model.eval()
+import inquirer
+import torch
+# 参数
+choices_jobType = [("GPU", 1), ("CPU", 2)]
+choices_floatType = [("half", 1), ("float", 2)]
+choices_model = [("默认(chatglm-6b)", 'chatglm-6b'), ("量化int4(chatglm-6b-int4)", 'chatglm-6b-int4')]
+
+def print_list(choices, v):
+    for element in choices:
+        if element[1] == v:
+            return element[0]
+    return None
+
+
+def print_confirm(v):
+    if v:
+        return '是'
+    else:
+        return '否'
+
+
+def print_confirm2(display, v1, v2=True, v3=True):
+    if v1 and v2 and v3:
+        return display
+    else:
+        return ''
 
 os_name = platform.system()
 clear_command = 'cls' if os_name == 'Windows' else 'clear'
@@ -25,7 +48,29 @@ def signal_handler(signal, frame):
     stop_stream = True
 
 
-def main():
+def main(answers):
+    model_name = answers['path'] + answers['model']
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+    # 精度设置
+    if answers['float_type'] == 2:
+        model = model.float()
+    else:
+        model = model.half()
+    # 设备设置
+    if answers['job_type'] == 1:
+        if os_name == 'Darwin':
+            model = model.to("mps")
+        else:
+            model = model.cuda()
+
+    model = model.eval()
+
+
+
+
+
     history = []
     global stop_stream
     print("欢迎使用 ChatGLM-6B 模型，输入内容即可进行对话，clear 清空对话历史，stop 终止程序")
@@ -54,4 +99,63 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    isGPUSport = torch.cuda.is_available() or (torch.backends.mps.is_available() if os_name == 'Darwin' else False)
+    # 设置选项
+    questions = [
+        inquirer.List(
+            "job_type",
+            message="选择运行类型?",
+            default=1 if isGPUSport else 2,
+            choices=choices_jobType,
+            # 如果支持GPU,默认GPU
+            # 如果不支持GPU的话，默认CPU，不显示
+            ignore= not isGPUSport,
+        ),
+        inquirer.List(
+            "float_type",
+            message="选择浮点精度?",
+            # mac mps半精度容易报错，默认float
+            # 默认使用half
+            default=2 if os_name == 'Darwin' else 1,
+            choices=choices_floatType,
+
+        ),
+        inquirer.Confirm(
+            "isLocal",
+            message="是否使用本地模型",
+            default=True,
+        ),
+        inquirer.Text(
+            "path",
+            message="设置模型路径",
+            # 使用本地模型的话，可以设置目录
+            default=lambda answer: './models/' if answer['isLocal'] else 'THUDM/',
+            ignore=lambda answer: not answer['isLocal'],
+        ),
+        inquirer.List(
+            "model",
+            message="选择模型?",
+            # mac mps半精度容易报错，默认float
+            # 默认使用half
+            default='chatglm-6b' if os_name == 'Darwin' else 'chatglm-6b-int4',
+            choices=choices_model,
+            ignore=os_name == 'Darwin',
+        ),
+
+    ]
+
+    # 处理选项
+    answers = inquirer.prompt(questions)
+
+    print('========= 选项 =========')
+    print('运行类型: %s' % (print_list(choices_jobType, answers['job_type'])))
+    print('浮点精度: %s' % (print_list(choices_floatType, answers['float_type'])))
+    print('本地模型: %s' % (print_confirm(answers['isLocal'])))
+    print('模型: %s%s' % (answers['path'],  answers['model']))
+    if os_name == 'Darwin':
+        print('----说明-----')
+        print('MacOS下，如果使用GPU报错的话，建议：')
+        print('1.安装 PyTorch-Nightly：pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu')
+        print('2.出现 LLVM ERROR: Failed to infer result type(s). 可以把精度设置为float')
+    print('------------------------')
+    main(answers)
diff --git a/requirements.txt b/requirements.txt
index 00707fe..960e436 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ icetk
 cpm_kernels
 torch>=1.10
 gradio
+inquirer