Add support for streaming output

pull/154/head
duzx16 2023-03-19 14:33:05 +08:00
parent 6cda36633e
commit 2ed89f3898
2 changed files with 43 additions and 23 deletions

View File

@ -7,18 +7,38 @@ model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).ha
model = model.eval()
os_name = platform.system()
clear_command = 'cls' if os_name == 'Windows' else 'clear'
history = []
print("欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序")
while True:
query = input("\n用户:")
if query == "stop":
break
if query == "clear":
history = []
command = 'cls' if os_name == 'Windows' else 'clear'
os.system(command)
print("欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序")
continue
response, history = model.chat(tokenizer, query, history=history)
print(f"ChatGLM-6B{response}")
def build_prompt(history):
prompt = "欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序"
for query, response in history:
prompt += f"\n用户:{query}"
prompt += f"\nChatGLM-6B{response}"
return prompt
def main():
history = []
print("欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序")
while True:
query = input("\n用户:")
if query == "stop":
break
if query == "clear":
history = []
os.system(clear_command)
print("欢迎使用 ChatGLM-6B 模型输入内容即可进行对话clear 清空对话历史stop 终止程序")
continue
count = 0
for response, history in model.stream_chat(tokenizer, query, history=history):
count += 1
if count % 8 == 0:
os.system(clear_command)
print(build_prompt(history), flush=True)
os.system(clear_command)
print(build_prompt(history), flush=True)
if __name__ == "__main__":
main()

View File

@ -12,15 +12,15 @@ MAX_BOXES = MAX_TURNS * 2
def predict(input, max_length, top_p, temperature, history=None):
if history is None:
history = []
response, history = model.chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
temperature=temperature)
updates = []
for query, response in history:
updates.append(gr.update(visible=True, value="用户:" + query))
updates.append(gr.update(visible=True, value="ChatGLM-6B" + response))
if len(updates) < MAX_BOXES:
updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
return [history] + updates
for response, history in model.stream_chat(tokenizer, input, history, max_length=max_length, top_p=top_p,
temperature=temperature):
updates = []
for query, response in history:
updates.append(gr.update(visible=True, value="用户:" + query))
updates.append(gr.update(visible=True, value="ChatGLM-6B" + response))
if len(updates) < MAX_BOXES:
updates = updates + [gr.Textbox.update(visible=False)] * (MAX_BOXES - len(updates))
yield [history] + updates
with gr.Blocks() as demo: