From 30a94431323d71c5ef06bd4b7f047aced3312fdf Mon Sep 17 00:00:00 2001 From: Tong Li Date: Fri, 8 Nov 2024 11:00:37 +0800 Subject: [PATCH] [Coati] Refine prompt for better inference (#6117) * refine prompt * update prompt * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../coati/reasoner/guided_search/mcts.py | 4 ++-- .../reasoner/guided_search/prompt_store/base.py | 1 + .../reasoner/guided_search/prompt_store/qwen.py | 12 +++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/applications/ColossalChat/coati/reasoner/guided_search/mcts.py b/applications/ColossalChat/coati/reasoner/guided_search/mcts.py index 693e2b750..a87211da2 100644 --- a/applications/ColossalChat/coati/reasoner/guided_search/mcts.py +++ b/applications/ColossalChat/coati/reasoner/guided_search/mcts.py @@ -58,7 +58,7 @@ class MCTS(BaseModel): """ Root Initiation. """ - # Dummy answer as root. + # Simple answer as root. You can also use negative response such as "I do not know" as a response. base_answer = self.sample_base_answer() self.root = MCTSNode(answer=base_answer) self.self_evaluate(self.root) @@ -190,7 +190,7 @@ class MCTS(BaseModel): messages=[ { "role": "system", - "content": "The user will provide a problem. Solve the problem. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer]. \nThe answer is [answer] \n#### [answer].", + "content": self.cfg.base_system_prompt, }, { "role": "user", diff --git a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py index b325b8fa2..57b63def1 100644 --- a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py +++ b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py @@ -5,6 +5,7 @@ class PromptCFG(BaseModel): model: str base_url: str max_tokens: int = 4096 + base_system_prompt: str critic_system_prompt: str refine_system_prompt: str evaluate_system_prompt: str diff --git a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py index 8bf0fa959..64dbc2415 100644 --- a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py +++ b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py @@ -7,14 +7,16 @@ from coati.reasoner.guided_search.prompt_store.base import PromptCFG Qwen32B_prompt_CFG = PromptCFG( base_url="http://0.0.0.0:8008/v1", model="Qwen2.5-32B-Instruct", - critic_system_prompt="Provide a detailed and constructive critique to improve the answer. " - "Highlight specific areas that need refinement or correction.", + base_system_prompt="The user will present a problem. Analyze and solve the problem in the following structure:\n" + "Begin with [Reasoning Process] to explain the approach. \n Proceed with [Verification] to confirm the solution. \n Conclude with [Final Answer] in the format: 'Answer: [answer]'", + critic_system_prompt="Provide a detailed and constructive critique of the answer, focusing on ways to improve its clarity, accuracy, and relevance." + "Highlight specific areas that need refinement or correction, and offer concrete suggestions for enhancing the overall quality and effectiveness of the response.", refine_system_prompt="""# Instruction Refine the answer based on the critique. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer]. """, evaluate_system_prompt=( - "Analyze this answer strictly and critic, provide a reward score between -100 and 100 for the answer quality, using very strict standards. " - "Do not give a full score above 95. Make sure the reward score is an integer. " - "Return *ONLY* the score." + "Critically analyze this answer and provide a reward score between -100 and 100 based on strict standards." + "The score should clearly reflect the quality of the answer." + "Make sure the reward score is an integer. You should only return the score. If the score is greater than 95, return 95." ), )