optimize prompt

eosphoros-ai · Aug 28, 2024 · dbbe11c · dbbe11c
1 parent 48a10be
commit dbbe11c
Show file tree

Hide file tree

Showing 3 changed files with 201 additions and 31 deletions.
diff --git a/dbgpt/rag/transformer/community_summarizer.py b/dbgpt/rag/transformer/community_summarizer.py
@@ -34,12 +34,12 @@ def __init__(self, llm_client: LLMClient, model_name: str):
     "\n"
     "### 技能 2: 关系识别\n"
     "- 准确地识别[Relationships:]章节中的关系信息，包括来源实体名、关系名、"
-    "目标实体名、关系描述信息。\n"
+    "目标实体名、关系描述信息，实体名也可能是文档ID、目录ID、文本块ID。\n"
     "- 关系信息的一般格式有:\n"
     "(来源实体名)-[关系名]->(目标实体名)\n"
     "(来源实体名)-[关系名:关系描述]->(目标实体名)\n"
     "(来源实体名)-[关系名:关系属性表]->(目标实体名)\n"
-    "(文本块实体)-[包含]->(实体名)\n"
+    "(文本块ID)-[包含]->(实体名)\n"
     "(目录ID)-[包含]->(文本块实体)\n"
     "(目录ID)-[包含]->(子目录ID)\n"
     "(文档ID)-[包含]->(文本块实体)\n"
@@ -57,12 +57,14 @@ def __init__(self, llm_client: LLMClient, model_name: str):
     "2. 使用准确、恰当、简洁的语言总结图结构表达的信息，不要生成与图结构中无关的信息。"
     "\n"
     "## 约束条件\n"
+    "- 不要在答案中描述你的思考过程，直接给出用户问题的答案，不要生成无关信息。\n"
     "- 确保以第三人称书写，从客观角度对知识图谱表达的信息进行总结性描述。\n"
     "- 如果实体或关系的描述信息为空，对最终的总结信息没有贡献，不要生成无关信息。\n"
     "- 如果提供的描述信息相互矛盾，请解决矛盾并提供一个单一、连贯的描述。\n"
     "- 避免使用停用词和过于常见的词汇。\n"
     "\n"
     "## 参考案例\n"
+    "--案例仅帮助你理解提示词的输入和输出格式，请不要在答案中使用它们。--\n"
     "输入:\n"
     "```\n"
     "Entities:\n"
@@ -119,7 +121,8 @@ def __init__(self, llm_client: LLMClient, model_name: str):
     "### Skill 2: Relationship Recognition\n"
     "- Accurately recognize relationship information in the [Relationships:] "
     "section, including source_entity_name, relationship_name, "
-    "target_entity_name, and relationship_description.\n"
+    "target_entity_name, and relationship_description, The entity_name may "
+    "also be the document_id, catalog_id, or chunk_id.\n"
     "- The general formats for relationship information are:\n"
     "(source_entity_name)-[relationship_name]->(target_entity_name)\n"
     "(source_entity_name)-[relationship_name: relationship_description]->"
@@ -150,6 +153,8 @@ def __init__(self, llm_client: LLMClient, model_name: str):
     "without generating irrelevant information."
     "\n"
     "## Constraints\n"
+    "- Don't describe your thought process in the answer, provide the answer "
+    "to the user's question directly without generating irrelevant information."
     "- Ensure the summary is written in the third person and objectively "
     "reflects the information conveyed by the knowledge graph.\n"
     "- If the descriptions of entities or relationships are empty and "
@@ -160,6 +165,8 @@ def __init__(self, llm_client: LLMClient, model_name: str):
     "- Avoid using stop words and overly common words.\n"
     "\n"
     "## Reference Example\n"
+    "--The case is only to help you understand the input and output format of "
+    "the prompt, please do not use it in your answer.--\n"
     "Input:\n"
     "```\n"
     "Entities:\n"

diff --git a/dbgpt/rag/transformer/graph_extractor.py b/dbgpt/rag/transformer/graph_extractor.py
@@ -132,6 +132,7 @@ def drop(self):
     "- 关联上下文是可选信息，可能为空。\n"
     "\n"
     "## 约束条件\n"
+    "- 如果文本已提供了和输出格式接近/相同的数据，请按照输出格式要求直接格式化输出。"
     "- 尽可能多的生成文本中提及的实体和关系信息，但不要随意创造不存在的实体和关系。\n"
     "- 确保以第三人称书写，从客观角度描述实体名称、关系名称，以及他们的总结性描述。\n"
     "- 尽可能多地使用关联上下文中的信息丰富实体和关系的内容，这非常重要。\n"
@@ -148,7 +149,8 @@ def drop(self):
     "(来源实体名#关系名#目标实体名#关系总结)\n"
     "...\n"
     "\n"
-    "## 参考案例\n"
+    "## 参考案例"
+    "--案例仅帮助你理解提示词的输入和输出格式，请不要在答案中使用它们。--\n"
     "输入:\n"
     "```\n"
     "[上下文]:\n"
@@ -233,6 +235,9 @@ def drop(self):
     "- Context is optional and may be empty.\n"
     "\n"
     "## Constraints\n"
+    "- If the text has provided data that is similar to or the same as the "
+    "output format, please format the output directly according to the "
+    "output format requirements."
     "- Generate as much entity and relation information mentioned in the text "
     "as possible, but do not create nonexistent entities or relations.\n"
     "- Ensure the writing is in the third person, describing entity names, "
@@ -256,6 +261,8 @@ def drop(self):
     "...\n"
     "\n"
     "## Reference Example\n"
+    "--The case is only to help you understand the input and output format of "
+    "the prompt, please do not use it in your answer.--\n"
     "Input:\n"
     "```\n"
     "[Context]:\n"

diff --git a/dbgpt/storage/knowledge_graph/community_summary.py b/dbgpt/storage/knowledge_graph/community_summary.py
@@ -1,4 +1,4 @@
-"""Define the CommunitySummaryKnowledgeGraph class inheriting from BuiltinKnowledgeGraph."""
+"""Define the CommunitySummaryKnowledgeGraph."""
 
 import logging
 import os
@@ -152,7 +152,10 @@ async def asimilar_search_with_scores(
     ) -> List[Chunk]:
         # global search: retrieve relevant community summaries
         communities = await self._community_store.search_communities(text)
-        summaries = "\n".join([c.summary for c in communities])
+        summaries = [
+            f"Section {i + 1}:\n{community.summary}"
+            for i, community in enumerate(communities)
+        ]
 
         # local search: extract keywords and explore subgraph
         keywords = await self._keyword_extractor.extract(text)
@@ -162,32 +165,12 @@ async def asimilar_search_with_scores(
         if not summaries and not subgraph:
             return []
 
-        content = (
-            "The following entities and relationships provided after "
-            "[Subgraph] are retrieved from the knowledge graph "
-            "based on the keywords:\n"
-            f"\"{','.join(keywords)}\".\n"
-            "The text provided after [Summary] is a summary supplement "
-            "to the entities and relations."
-            "---------------------\n"
-            "The following examples after [Entities] and [Relationships] that "
-            "can help you understand the data format of the knowledge graph, "
-            "but do not use them in the answer.\n"
-            "[Entities]:\n"
-            "(alice)\n"
-            "(bob:{age:28})\n"
-            '(carry:{age:18;role:"teacher"})\n\n'
-            "[Relationships]:\n"
-            "(alice)-[reward]->(alice)\n"
-            '(alice)-[notify:{method:"email"}]->'
-            '(carry:{age:18;role:"teacher"})\n'
-            '(bob:{age:28})-[teach:{course:"math";hour:180}]->(alice)\n'
-            "---------------------\n"
-            f"[Subgraph]:\n{subgraph}\n"
-            f"[Summary]:\n{summaries}\n"
+        # merge search results into context
+        context = HYBRID_SEARCH_PT_CN.format(
+            context=summaries,
+            graph=subgraph
         )
-
-        return [Chunk(content=content)]
+        return [Chunk(content=context)]
 
     def truncate(self) -> List[str]:
         """Truncate knowledge graph."""
@@ -212,3 +195,176 @@ def delete_vector_name(self, index_name: str):
 
         logger.info(f"Drop triplet extractor")
         self._triplet_extractor.drop()
+
+
+HYBRID_SEARCH_PT_CN = (
+    "## 角色\n"
+    "你非常擅长结合提示词模板提供的[上下文]信息与[知识图谱]信息，"
+    "准确恰当地回答用户的问题，并保证不会输出与上下文和知识图谱无关的信息。"
+    "\n"
+    "## 技能\n"
+    "### 技能 1: 上下文理解\n"
+    "- 准确地理解[上下文]提供的信息，上下文信息可能被拆分为多个章节。\n"
+    "- 上下文的每个章节内容都会以[Section]开始，并按需进行了编号。\n"
+    "- 上下文信息提供了与用户问题相关度最高的总结性描述，请合理使用它们。"
+    "### 技能 2: 知识图谱理解\n"
+    "- 准确地识别[知识图谱]中提供的[Entities:]章节中的实体信息"
+    "和[Relationships:]章节中的关系信息，实体和关系信息的一般格式为：\n"
+    "```"
+    "* 实体信息格式:\n"
+    "- (实体名)\n"
+    "- (实体名:实体描述)\n"
+    "- (实体名:实体属性表)\n"
+    "- (文本块ID:文档块内容)\n"
+    "- (目录ID:目录名)\n"
+    "- (文档ID:文档名称)\n"
+    "\n"
+    "* 关系信息的格式:\n"
+    "- (来源实体名)-[关系名]->(目标实体名)\n"
+    "- (来源实体名)-[关系名:关系描述]->(目标实体名)\n"
+    "- (来源实体名)-[关系名:关系属性表]->(目标实体名)\n"
+    "- (文本块实体)-[包含]->(实体名)\n"
+    "- (目录ID)-[包含]->(文本块实体)\n"
+    "- (目录ID)-[包含]->(子目录ID)\n"
+    "- (文档ID)-[包含]->(文本块实体)\n"
+    "- (文档ID)-[包含]->(目录ID)\n"
+    "```"
+    "- 正确地将关系信息中的实体名/ID与实体信息关联，还原出图结构。"
+    "- 将图结构所表达的信息作为用户提问的明细上下文，辅助生成更好的答案。\n"
+    "\n"
+    "## 约束条件\n"
+    "- 不要在答案中描述你的思考过程，直接给出用户问题的答案，不要生成无关信息。\n"
+    "- 若[知识图谱]没有提供信息，此时应根据[上下文]提供的信息回答问题。"
+    "- 确保以第三人称书写，从客观角度结合[上下文]和[知识图谱]表达的信息回答问题。\n"
+    "- 若提供的信息相互矛盾，请解决矛盾并提供一个单一、连贯的描述。\n"
+    "- 避免使用停用词和过于常见的词汇。\n"
+    "\n"
+    "## 参考案例\n"
+    "```\n"
+    "[上下文]:\n"
+    "Section 1:\n"
+    "菲尔・贾伯的大儿子叫雅各布・贾伯。\n"
+    "Section 2:\n"
+    "菲尔・贾伯的小儿子叫比尔・贾伯。\n"
+    "[知识图谱]:\n"
+    "Entities:\n"
+    "(菲尔・贾伯#菲尔兹咖啡创始人)\n"
+    "(菲尔兹咖啡#加利福尼亚州伯克利创立的咖啡品牌)\n"
+    "(雅各布・贾伯#菲尔・贾伯的儿子)\n"
+    "(美国多地#菲尔兹咖啡的扩展地区)\n"
+    "\n"
+    "Relationships:\n"
+    "(菲尔・贾伯#创建#菲尔兹咖啡#1978年在加利福尼亚州伯克利创立)\n"
+    "(菲尔兹咖啡#位于#加利福尼亚州伯克利#菲尔兹咖啡的创立地点)\n"
+    "(菲尔・贾伯#拥有#雅各布・贾伯#菲尔・贾伯的儿子)\n"
+    "(雅各布・贾伯#担任#首席执行官#在2005年成为菲尔兹咖啡的首席执行官)\n"
+    "(菲尔兹咖啡#扩展至#美国多地#菲尔兹咖啡的扩展范围)\n"
+    "```\n"
+    "\n"
+    "----\n"
+    "\n"
+    "接下来[上下文]和[知识图谱]的信息，可以帮助你回答更好地用户的问题。\n"
+    "\n"
+    "[上下文]:\n"
+    "{context}\n"
+    "\n"
+    "[知识图谱]:\n"
+    "{graph}\n"
+    "\n"
+)
+
+HYBRID_SEARCH_PT_EN = (
+    "## Role\n"
+    "You excel at combining the information provided in the [Context] with "
+    "information from the [KnowledgeGraph] to accurately and appropriately "
+    "answer user questions, ensuring that you do not output information "
+    "unrelated to the context and knowledge graph.\n"
+    "\n"
+    "## Skills\n"
+    "### Skill 1: Context Understanding\n"
+    "- Accurately understand the information provided in the [Context], "
+    "which may be divided into several sections.\n"
+    "- Each section in the context will start with [Section] "
+    "and may be numbered as needed.\n"
+    "- The context provides a summary description most relevant to the user’s "
+    "question, and it should be used wisely."
+    "### Skill 2: Knowledge Graph Understanding\n"
+    "- Accurately identify entity information in the [Entities:] section and "
+    "relationship information in the [Relationships:] section "
+    "of the [KnowledgeGraph]. The general format for entity "
+    "and relationship information is:\n"
+    "```"
+    "* Entity Information Format:\n"
+    "- (entity_name)\n"
+    "- (entity_name: entity_description)\n"
+    "- (entity_name: entity_property_map)\n"
+    "- (chunk_id: chunk_content)\n"
+    "- (catalog_id: catalog_name)\n"
+    "- (document_id: document_name)\n"
+    "\n"
+    "* Relationship Information Format:\n"
+    "- (source_entity_name)-[relationship_name]->(target_entity_name)\n"
+    "- (source_entity_name)-[relationship_name: relationship_description]->"
+    "(target_entity_name)\n"
+    "- (source_entity_name)-[relationship_name: relationship_property_map]->"
+    "(target_entity_name)\n"
+    "- (chunk_id)-[Contains]->(entity_name)\n"
+    "- (catalog_id)-[Contains]->(chunk_id)\n"
+    "- (catalog_id)-[Contains]->(sub_catalog_id)\n"
+    "- (document_id)-[Contains]->(chunk_id)\n"
+    "- (document_id)-[Contains]->(catalog_id)\n"
+    "```"
+    "- Correctly associate entity names/IDs in the relationship information "
+    "with entity information to restore the graph structure."
+    "- Use the information expressed by the graph structure as detailed "
+    "context for the user's query to assist in generating better answers.\n"
+    "\n"
+    "## Constraints\n"
+    "- Don't describe your thought process in the answer, provide the answer "
+    "to the user's question directly without generating irrelevant information."
+    "- If the [KnowledgeGraph] does not provide information, you should answer "
+    "the question based on the information provided in the [Context]."
+    "- Ensure to write in the third person, responding to questions from "
+    "an objective perspective based on the information combined from the "
+    "[Context] and the [KnowledgeGraph].\n"
+    "- If the provided information is contradictory, resolve the "
+    "contradictions and provide a single, coherent description.\n"
+    "- Avoid using stop words and overly common vocabulary.\n"
+    "\n"
+    "## Reference Example\n"
+    "```\n"
+    "[Context]:\n"
+    "Section 1:\n"
+    "Phil Schiller's eldest son is Jacob Schiller.\n"
+    "Section 2:\n"
+    "Phil Schiller's youngest son is Bill Schiller.\n"
+    "[KnowledgeGraph]:\n"
+    "Entities:\n"
+    "(Phil Jaber#Founder of Philz Coffee)\n"
+    "(Philz Coffee#Coffee brand founded in Berkeley, California)\n"
+    "(Jacob Jaber#Son of Phil Jaber)\n"
+    "(Multiple locations in the USA#Expansion regions of Philz Coffee)\n"
+    "\n"
+    "Relationships:\n"
+    "(Phil Jaber#Created#Philz Coffee"
+    "#Founded in Berkeley, California in 1978)\n"
+    "(Philz Coffee#Located in#Berkeley, California"
+    "#Founding location of Philz Coffee)\n"
+    "(Phil Jaber#Has#Jacob Jaber#Son of Phil Jaber)\n"
+    "(Jacob Jaber#Serves as#CEO#Became CEO of Philz Coffee in 2005)\n"
+    "(Philz Coffee#Expanded to#Multiple locations in the USA"
+    "#Expansion regions of Philz Coffee)\n"
+    "```\n"
+    "\n"
+    "----\n"
+    "\n"
+    "The following information from the [Context] and [KnowledgeGraph] can "
+    "help you better answer user questions.\n"
+    "\n"
+    "[Context]:\n"
+    "{context}\n"
+    "\n"
+    "[KnowledgeGraph]:\n"
+    "{graph}\n"
+    "\n"
+)