[Coati] Refine prompt for better inference (#6117)

* refine prompt * update prompt * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-08 11:00:37 +08:00 · 2024-11-08 11:00:37 +08:00 · 30a9443132
parent 7a60161035
commit 30a9443132
3 changed files with 10 additions and 7 deletions
--- a/applications/ColossalChat/coati/reasoner/guided_search/mcts.py
+++ b/applications/ColossalChat/coati/reasoner/guided_search/mcts.py
@ -58,7 +58,7 @@ class MCTS(BaseModel):
        """
        Root Initiation.
        """
-        # Dummy answer as root.
+        # Simple answer as root. You can also use negative response such as "I do not know" as a response.
        base_answer = self.sample_base_answer()
        self.root = MCTSNode(answer=base_answer)
        self.self_evaluate(self.root)
@ -190,7 +190,7 @@ class MCTS(BaseModel):
            messages=[
                {
                    "role": "system",
-                    "content": "The user will provide a problem. Solve the problem. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer]. \nThe answer is [answer] \n#### [answer].",
+                    "content": self.cfg.base_system_prompt,
                },
                {
                    "role": "user",
--- a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py
+++ b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py
@ -5,6 +5,7 @@ class PromptCFG(BaseModel):
    model: str
    base_url: str
    max_tokens: int = 4096
+    base_system_prompt: str
    critic_system_prompt: str
    refine_system_prompt: str
    evaluate_system_prompt: str
--- a/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py
+++ b/applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py
@ -7,14 +7,16 @@ from coati.reasoner.guided_search.prompt_store.base import PromptCFG
 Qwen32B_prompt_CFG = PromptCFG(
    base_url="http://0.0.0.0:8008/v1",
    model="Qwen2.5-32B-Instruct",
-    critic_system_prompt="Provide a detailed and constructive critique to improve the answer. "
-    "Highlight specific areas that need refinement or correction.",
+    base_system_prompt="The user will present a problem. Analyze and solve the problem in the following structure:\n"
+    "Begin with [Reasoning Process] to explain the approach. \n Proceed with [Verification] to confirm the solution. \n Conclude with [Final Answer] in the format: 'Answer: [answer]'",
+    critic_system_prompt="Provide a detailed and constructive critique of the answer, focusing on ways to improve its clarity, accuracy, and relevance."
+    "Highlight specific areas that need refinement or correction, and offer concrete suggestions for enhancing the overall quality and effectiveness of the response.",
    refine_system_prompt="""# Instruction
                            Refine the answer based on the critique. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer].
                         """,
    evaluate_system_prompt=(
-        "Analyze this answer strictly and critic, provide a reward score between -100 and 100 for the answer quality, using very strict standards. "
-        "Do not give a full score above 95. Make sure the reward score is an integer. "
-        "Return *ONLY* the score."
+        "Critically analyze this answer and provide a reward score between -100 and 100 based on strict standards."
+        "The score should clearly reflect the quality of the answer."
+        "Make sure the reward score is an integer. You should only return the score. If the score is greater than 95, return 95."
    ),
 )