From 4b8312c08e8d05a5f41453d63c8671aab601ed1c Mon Sep 17 00:00:00 2001 From: Camille Zhong <44392324+Camille7777@users.noreply.github.com> Date: Fri, 1 Mar 2024 17:27:50 +0800 Subject: [PATCH] fix sft single turn inference example (#5416) --- applications/Colossal-LLaMA-2/inference_example.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/applications/Colossal-LLaMA-2/inference_example.py b/applications/Colossal-LLaMA-2/inference_example.py index 77e18d8b5..63ce91e50 100644 --- a/applications/Colossal-LLaMA-2/inference_example.py +++ b/applications/Colossal-LLaMA-2/inference_example.py @@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs): model.to(device) try: - tokenizer = AutoTokenizer.from_pretrained(model_path) + tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left') except OSError: raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.") @@ -29,6 +29,7 @@ def generate(args): if args.prompt_style == "sft": conversation = default_conversation.copy() conversation.append_message("Human", args.input_txt) + conversation.append_message("Assistant", None) input_txt = conversation.get_prompt() else: BASE_INFERENCE_SUFFIX = "\n\n->\n\n" @@ -46,7 +47,7 @@ def generate(args): num_return_sequences=1, ) response = tokenizer.decode(output.cpu()[0, num_input_tokens:], skip_special_tokens=True) - logger.info(f"Question: {input_txt} \n\n Answer: \n{response}") + logger.info(f"\nHuman: {args.input_txt} \n\nAssistant: \n{response}") return response