[Coati] Refine prompt for better inference (#6117)

* refine prompt * update prompt * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2 weeks ago · 30a9443132
3 changed files with 10 additions and 7 deletions
--- a/applications/ColossalChat/coati/reasoner/guided_search/mcts.py
+++ b/applications/ColossalChat/coati/reasoner/guided_search/mcts.py
@ -58,7 +58,7 @@ class MCTS(BaseModel):
        """
        Root Initiation.
        """
-        # Dummy answer as root.
+        # Simple answer as root. You can also use negative response such as "I do not know" as a response.
        base_answer = self.sample_base_answer()
        self.root = MCTSNode(answer=base_answer)
        self.self_evaluate(self.root)
@ -190,7 +190,7 @@ class MCTS(BaseModel):
            messages=[
                {
                    "role": "system",
-                    "content": "The user will provide a problem. Solve the problem. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer]. \nThe answer is [answer] \n#### [answer].",
+                    "content": self.cfg.base_system_prompt,
                },
                {
                    "role": "user",
--- a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py
+++ b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py
@ -5,6 +5,7 @@ class PromptCFG(BaseModel):
    model: str
    base_url: str
    max_tokens: int = 4096
    base_system_prompt: str
    critic_system_prompt: str
    refine_system_prompt: str
    evaluate_system_prompt: str
--- a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py
+++ b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py
@ -7,14 +7,16 @@ from coati.reasoner.guided_search.prompt_store.base import PromptCFG
 Qwen32B_prompt_CFG = PromptCFG(
    base_url="http://0.0.0.0:8008/v1",
    model="Qwen2.5-32B-Instruct",
-    critic_system_prompt="Provide a detailed and constructive critique to improve the answer. "
+    base_system_prompt="The user will present a problem. Analyze and solve the problem in the following structure:\n"
-    "Highlight specific areas that need refinement or correction.",
+    "Begin with [Reasoning Process] to explain the approach. \n Proceed with [Verification] to confirm the solution. \n Conclude with [Final Answer] in the format: 'Answer: [answer]'",
    critic_system_prompt="Provide a detailed and constructive critique of the answer, focusing on ways to improve its clarity, accuracy, and relevance."
    "Highlight specific areas that need refinement or correction, and offer concrete suggestions for enhancing the overall quality and effectiveness of the response.",
    refine_system_prompt="""# Instruction
                            Refine the answer based on the critique. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer].
                         """,
    evaluate_system_prompt=(
-        "Analyze this answer strictly and critic, provide a reward score between -100 and 100 for the answer quality, using very strict standards. "
+        "Critically analyze this answer and provide a reward score between -100 and 100 based on strict standards."
-        "Do not give a full score above 95. Make sure the reward score is an integer. "
+        "The score should clearly reflect the quality of the answer."
-        "Return *ONLY* the score."
+        "Make sure the reward score is an integer. You should only return the score. If the score is greater than 95, return 95."
    ),
 )