Browse Source

[Coati] Refine prompt for better inference (#6117)

* refine prompt

* update prompt

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pull/5294/merge
Tong Li 2 weeks ago committed by GitHub
parent
commit
30a9443132
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 4
      applications/ColossalChat/coati/reasoner/guided_search/mcts.py
  2. 1
      applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py
  3. 12
      applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py

4
applications/ColossalChat/coati/reasoner/guided_search/mcts.py

@ -58,7 +58,7 @@ class MCTS(BaseModel):
""" """
Root Initiation. Root Initiation.
""" """
# Dummy answer as root. # Simple answer as root. You can also use negative response such as "I do not know" as a response.
base_answer = self.sample_base_answer() base_answer = self.sample_base_answer()
self.root = MCTSNode(answer=base_answer) self.root = MCTSNode(answer=base_answer)
self.self_evaluate(self.root) self.self_evaluate(self.root)
@ -190,7 +190,7 @@ class MCTS(BaseModel):
messages=[ messages=[
{ {
"role": "system", "role": "system",
"content": "The user will provide a problem. Solve the problem. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer]. \nThe answer is [answer] \n#### [answer].", "content": self.cfg.base_system_prompt,
}, },
{ {
"role": "user", "role": "user",

1
applications/ColossalChat/coati/reasoner/guided_search/prompt_store/base.py

@ -5,6 +5,7 @@ class PromptCFG(BaseModel):
model: str model: str
base_url: str base_url: str
max_tokens: int = 4096 max_tokens: int = 4096
base_system_prompt: str
critic_system_prompt: str critic_system_prompt: str
refine_system_prompt: str refine_system_prompt: str
evaluate_system_prompt: str evaluate_system_prompt: str

12
applications/ColossalChat/coati/reasoner/guided_search/prompt_store/qwen.py

@ -7,14 +7,16 @@ from coati.reasoner.guided_search.prompt_store.base import PromptCFG
Qwen32B_prompt_CFG = PromptCFG( Qwen32B_prompt_CFG = PromptCFG(
base_url="http://0.0.0.0:8008/v1", base_url="http://0.0.0.0:8008/v1",
model="Qwen2.5-32B-Instruct", model="Qwen2.5-32B-Instruct",
critic_system_prompt="Provide a detailed and constructive critique to improve the answer. " base_system_prompt="The user will present a problem. Analyze and solve the problem in the following structure:\n"
"Highlight specific areas that need refinement or correction.", "Begin with [Reasoning Process] to explain the approach. \n Proceed with [Verification] to confirm the solution. \n Conclude with [Final Answer] in the format: 'Answer: [answer]'",
critic_system_prompt="Provide a detailed and constructive critique of the answer, focusing on ways to improve its clarity, accuracy, and relevance."
"Highlight specific areas that need refinement or correction, and offer concrete suggestions for enhancing the overall quality and effectiveness of the response.",
refine_system_prompt="""# Instruction refine_system_prompt="""# Instruction
Refine the answer based on the critique. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer]. Refine the answer based on the critique. The response should begin with [reasoning process]...[Verification]... and end with [Final Answer].
""", """,
evaluate_system_prompt=( evaluate_system_prompt=(
"Analyze this answer strictly and critic, provide a reward score between -100 and 100 for the answer quality, using very strict standards. " "Critically analyze this answer and provide a reward score between -100 and 100 based on strict standards."
"Do not give a full score above 95. Make sure the reward score is an integer. " "The score should clearly reflect the quality of the answer."
"Return *ONLY* the score." "Make sure the reward score is an integer. You should only return the score. If the score is greater than 95, return 95."
), ),
) )

Loading…
Cancel
Save