mirror of https://github.com/THUDM/ChatGLM-6B
Add support for streaming output
parent
6cda36633e
commit
2ed89f3898
48
cli_demo.py
48
cli_demo.py
|
@ -7,18 +7,38 @@ model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).ha
|
|||
model = model.eval()
|
||||
|
||||
os_name = platform.system()
|
||||
clear_command = 'cls' if os_name == 'Windows' else 'clear'
|
||||
|
||||
history = []
|
||||
print("欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
|
||||
while True:
|
||||
query = input("\n用户:")
|
||||
if query == "stop":
|
||||
break
|
||||
if query == "clear":
|
||||
history = []
|
||||
command = 'cls' if os_name == 'Windows' else 'clear'
|
||||
os.system(command)
|
||||
print("欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
|
||||
continue
|
||||
response, history = model.chat(tokenizer, query, history=history)
|
||||
print(f"ChatGLM-6B:{response}")
|
||||
|
||||
def build_prompt(history):
|
||||
prompt = "欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序"
|
||||
for query, response in history:
|
||||
prompt += f"\n用户:{query}"
|
||||
prompt += f"\nChatGLM-6B:{response}"
|
||||
return prompt
|
||||
|
||||
|
||||
def main():
|
||||
history = []
|
||||
print("欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
|
||||
while True:
|
||||
query = input("\n用户:")
|
||||
if query == "stop":
|
||||
break
|
||||
if query == "clear":
|
||||
history = []
|
||||
os.system(clear_command)
|
||||
print("欢迎使用 ChatGLM-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
|
||||
continue
|
||||
count = 0
|
||||
for response, history in model.stream_chat(tokenizer, query, history=history):
|
||||
count += 1
|
||||
if count % 8 == 0:
|
||||
os.system(clear_command)
|
||||
print(build_prompt(history), flush=True)
|
||||
os.system(clear_command)
|
||||
print(build_prompt(history), flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
18
web_demo.py
18
web_demo.py
|
@ -12,15 +12,15 @@ MAX_BOXES = MAX_TURNS * 2
|
|||
def predict(input, max_length, top_p, temperature, history=None):
|
||||
if history is None:
|
||||
history = []
|
||||
response, history = model.chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
|
||||
temperature=temperature)
|
||||
updates = []
|
||||
for query, response in history:
|
||||
updates.append(gr.update(visible=True, value="用户:" + query))
|
||||
updates.append(gr.update(visible=True, value="ChatGLM-6B:" + response))
|
||||
if len(updates) < MAX_BOXES:
|
||||
updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
|
||||
return [history] + updates
|
||||
for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
|
||||
temperature=temperature):
|
||||
updates = []
|
||||
for query, response in history:
|
||||
updates.append(gr.update(visible=True, value="用户:" + query))
|
||||
updates.append(gr.update(visible=True, value="ChatGLM-6B:" + response))
|
||||
if len(updates) < MAX_BOXES:
|
||||
updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
|
||||
yield [history] + updates
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
|
|
Loading…
Reference in New Issue