fix bug

eosphoros-ai · Dec 13, 2024 · fdc0068 · fdc0068
1 parent 24443c2
commit fdc0068
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 26 deletions.
diff --git a/dbgpt/rag/transformer/graph_embedder.py b/dbgpt/rag/transformer/graph_embedder.py
@@ -1,13 +1,9 @@
 """GraphEmbedder class."""
 
-import asyncio
 import logging
-import re
-from typing import Dict, List, Optional
+from typing import List
 
-from dbgpt.core import Chunk, LLMClient
-from dbgpt.storage.graph_store.graph import Edge, Graph, MemoryGraph, Vertex, GraphElemType
-from dbgpt.storage.vector_store.base import VectorStoreBase
+from dbgpt.storage.graph_store.graph import Graph, GraphElemType
 from dbgpt.rag.transformer.text2vector import Text2Vector
 
 logger = logging.getLogger(__name__)
@@ -23,7 +19,7 @@ def __init__(self):
     async def embed(
         self,
         text: str,
-    ) -> List[List[Graph]]:
+    ) -> List[float]:
         """Embed"""
         return await super()._embed(text)
 

diff --git a/dbgpt/rag/transformer/text2vector.py b/dbgpt/rag/transformer/text2vector.py
@@ -2,7 +2,6 @@
 
 import logging
 import dashscope
-import json
 from http import HTTPStatus
 from abc import ABC
 from typing import List

diff --git a/dbgpt/storage/knowledge_graph/community/tugraph_store_adapter.py b/dbgpt/storage/knowledge_graph/community/tugraph_store_adapter.py
@@ -74,6 +74,7 @@ async def get_community(self, community_id: str) -> Community:
         all_edge_graph = self.query(edge_query)
         all_graph = MemoryGraph()
         for vertex in all_vertex_graph.vertices():
+            vertex.del_prop("embedding")
             all_graph.upsert_vertex(vertex)
         for edge in all_edge_graph.edges():
             all_graph.append_edge(edge)
@@ -587,20 +588,22 @@ def explore(
             else:
                 rel = f"-[r:{GraphElemType.RELATION.value}*{depth_string}]-"
 
-            if isinstance(subs, List[str]):
+            if all(isinstance(item, str) for item in subs):
                 header = f"WHERE n.id IN {[self._escape_quotes(sub) for sub in subs]} "
             else:
+                final_list = []
                 for sub in subs:
                     vector = str(sub);
                     similarity_search = (
                         f"CALL db.vertexVectorKnnSearch("
                         f"'{GraphElemType.ENTITY.value}','embedding', {vector}, "
                         "{top_k:2, hnsw_ef_search:10})"
-                        "YIELD node RETURN node.id AS id"
+                        "YIELD node RETURN node.id AS id;"
                     )
                     result_list = self.graph_store.conn.run(query=similarity_search)
-                    final_list = final_list + result_list
-                header = f"WHERE n.id IN {[(record["id"]) for record in final_list]} "
+                    final_list.extend(result_list)
+                id_list = [(record["id"]) for record in final_list]
+                header = f"WHERE n.id IN {id_list} "
 
             query = (
                 f"MATCH p=(n:{GraphElemType.ENTITY.value})"
@@ -626,9 +629,10 @@ def explore(
 
             # Check if the entities exist in the graph
 
-            if isinstance(subs, List[str]):
+            if all(isinstance(item, str) for item in subs):
                 header = f"WHERE n.id IN {[self._escape_quotes(sub) for sub in subs]} "
             else:
+                final_list = []
                 for sub in subs:
                     vector = str(sub);
                     similarity_search = (
@@ -638,19 +642,20 @@ def explore(
                         "YIELD node RETURN node.id AS id"
                     )
                     result_list = self.graph_store.conn.run(query=similarity_search)
-                    final_list = final_list + result_list
-                header = f"WHERE n.id IN {[(record["id"]) for record in final_list]} "
-
+                    final_list.extend(result_list)
+                id_list = [(record["id"]) for record in final_list]
+                header = f"WHERE n.id IN {id_list} "
             check_entity_query = (
                 f"MATCH (n:{GraphElemType.ENTITY.value}) "
                 f"{header}" 
                 "RETURN n"
             )
             if self.query(check_entity_query):
                 # Query the leaf chunks in the chain from documents to chunks
-                if isinstance(subs, List[str]):
+                if all(isinstance(item, str) for item in subs):
                     header = f"WHERE m.name IN {[self._escape_quotes(sub) for sub in subs]} "
                 else:
+                    final_list = []
                     for sub in subs:
                         vector = str(sub);
                         similarity_search = (
@@ -660,8 +665,9 @@ def explore(
                             "YIELD node RETURN node.name AS name"
                         )
                         result_list = self.graph_store.conn.run(query=similarity_search)
-                        final_list = final_list + result_list
-                header = f"WHERE m.name IN {[(record["name"]) for record in final_list]} "
+                        final_list.extend(result_list)
+                    name_list = [(record["name"]) for record in final_list]
+                    header = f"WHERE n.name IN {name_list} "
 
                 leaf_chunk_query = (
                     f"MATCH p=(n:{GraphElemType.CHUNK.value})-"
@@ -701,11 +707,12 @@ def explore(
                         )
                     )
             else:
-                if isinstance(subs, List[str]):
+                if all(isinstance(item, str) for item in subs):
                     _subs_condition = " OR ".join(
                         [f"m.content CONTAINS '{self._escape_quotes(sub)}'" for sub in subs]
                     )
                 else:
+                    final_list = []
                     for sub in subs:
                         vector = str(sub);
                         similarity_search = (
@@ -715,9 +722,10 @@ def explore(
                             "YIELD node RETURN node.name AS name"
                         )
                         result_list = self.graph_store.conn.run(query=similarity_search)
-                        final_list = final_list + result_list
-                    _subs_condition = f"m.name IN {[(record["name"]) for record in final_list]} "
-
+                        final_list.extend(result_list)
+                    name_list = [(record["name"]) for record in final_list]
+                    _subs_condition = f"n.name IN {name_list} "
+
                 # Query the chain from documents to chunks,
                 # document -> chunk -> chunk -> chunk -> ... -> chunk
                 chain_query = (

diff --git a/dbgpt/storage/knowledge_graph/community_summary.py b/dbgpt/storage/knowledge_graph/community_summary.py
@@ -351,7 +351,9 @@ async def asimilar_search_with_scores(
         similar_search_enabled = self._similar_search_enabled
 
         if similar_search_enabled:
-            keywords: List[List[float]] = await self._garph_embedder.embed(text)
+            keywords: List[List[float]] = []
+            vector = await self._garph_embedder.embed(text)
+            keywords.append(vector)
         else:
             keywords: List[str] = await self._keyword_extractor.extract(text)
 
@@ -369,8 +371,13 @@ async def asimilar_search_with_scores(
 
             if document_graph_enabled:
                 keywords_for_document_graph = keywords
-                for vertex in subgraph.vertices():
-                    keywords_for_document_graph.append(vertex.name)
+                if similar_search_enabled:
+                    for vertex in subgraph.vertices():
+                        vector = await self._garph_embedder.embed(vertex.name)
+                        keywords_for_document_graph.append(vector)
+                else: 
+                    for vertex in subgraph.vertices():
+                        keywords_for_document_graph.append(vertex.name)
 
                 subgraph_for_doc = self._graph_store_apdater.explore(
                     subs=keywords_for_document_graph,
@@ -384,6 +391,10 @@ async def asimilar_search_with_scores(
                     limit=self._knowledge_graph_chunk_search_top_size,
                     search_scope="document_graph",
                 )
+
+        for vertex in subgraph.vertices():
+            vertex.del_prop("embedding")
+
         knowledge_graph_str = subgraph.format() if subgraph else ""
         knowledge_graph_for_doc_str = (
             subgraph_for_doc.format() if subgraph_for_doc else ""