use Chinese prompt for follow up question parsing

2023-03-24 21:27:22 -07:00 · 2023-03-24 21:27:22 -07:00 · 2c424264b9
parent 45f59c14a2
commit 2c424264b9
1 changed files with 27 additions and 8 deletions
--- a/example_with_langchain_and_vectorstore/chat_backend.py
+++ b/example_with_langchain_and_vectorstore/chat_backend.py
@ -11,6 +11,8 @@ from langchain.chains import (
    QAWithSourcesChain,
    VectorDBQAWithSourcesChain,
 )
+from langchain.prompts.prompt import PromptTemplate
+
 from langchain.docstore.document import Document
 from langchain.vectorstores.faiss import FAISS
 from langchain.chat_models import ChatOpenAI
@ -23,7 +25,7 @@ from langchain.prompts.chat import (
 from transformers import AutoTokenizer, AutoModel

 # Set up OpenAI API key
-# This is solely for the purpose of semantic search part of langchain vector search. 
+# This is solely for the purpose of semantic search part of langchain vector search.
 # Completion is still purely done using ChatGLM model.
 os.environ["OPENAI_API_KEY"] = ""

@ -59,17 +61,19 @@ def init_wiki_agent(
    index_dir,
    max_token=800,
    temperature=0.3,
-):  
-    
+):
+
    embeddings = OpenAIEmbeddings()
    if index_dir:
        vectorstore = FAISS.load_local(index_dir, embeddings=embeddings)
    else:
        raise ValueError("Need saved vector store location")
-    system_template = """使用以下wikipedia的片段来回答用户的问题。
-如果无法从中得到答案，请说 "不知道" 或 "没有足够的相关信息". 不要试图编造答案。
+    system_template = """使用以下文段, 简洁和专业的来回答用户的问题。
+如果无法从中得到答案，请说 "不知道" 或 "没有足够的相关信息". 不要试图编造答案。 答案请使用中文.
 ----------------
-{context}"""
+{context}
+----------------
+"""
    messages = [
        SystemMessagePromptTemplate.from_template(system_template),
        HumanMessagePromptTemplate.from_template("{question}"),
@ -78,12 +82,27 @@ def init_wiki_agent(
    # qa = ChatVectorDBChain.from_llm(llm=ChatOpenAI(temperature=temperature, max_tokens=max_token),
    #                                  vectorstore=vectorstore,
    #                                  qa_prompt=prompt)
+
+    condese_propmt_template = """任务: 给一段对话和一个后续问题，将后续问题改写成一个独立的问题。(确保问题是完整的, 没有模糊的指代)
+聊天记录：
+{chat_history}
+###
+
+后续问题：{question}
+
+改写后的独立, 完整的问题："""
+    new_question_prompt = PromptTemplate.from_template(condese_propmt_template)
+
    from chatglm_llm import ChatGLM_G
+
    qa = ChatVectorDBChain.from_llm(
-        llm=ChatGLM_G(), vectorstore=vectorstore, qa_prompt=prompt
+        llm=ChatGLM_G(),
+        vectorstore=vectorstore,
+        qa_prompt=prompt,
+        condense_question_prompt=new_question_prompt,
    )
    qa.return_source_documents = True
-    qa.top_k_docs_for_context = 2
+    qa.top_k_docs_for_context = 3
    return qa