Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Jant1L committed Dec 13, 2024
1 parent 24443c2 commit fdc0068
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 26 deletions.
10 changes: 3 additions & 7 deletions dbgpt/rag/transformer/graph_embedder.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
"""GraphEmbedder class."""

import asyncio
import logging
import re
from typing import Dict, List, Optional
from typing import List

from dbgpt.core import Chunk, LLMClient
from dbgpt.storage.graph_store.graph import Edge, Graph, MemoryGraph, Vertex, GraphElemType
from dbgpt.storage.vector_store.base import VectorStoreBase
from dbgpt.storage.graph_store.graph import Graph, GraphElemType
from dbgpt.rag.transformer.text2vector import Text2Vector

logger = logging.getLogger(__name__)
Expand All @@ -23,7 +19,7 @@ def __init__(self):
async def embed(
self,
text: str,
) -> List[List[Graph]]:
) -> List[float]:
"""Embed"""
return await super()._embed(text)

Expand Down
1 change: 0 additions & 1 deletion dbgpt/rag/transformer/text2vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import logging
import dashscope
import json
from http import HTTPStatus
from abc import ABC
from typing import List
Expand Down
38 changes: 23 additions & 15 deletions dbgpt/storage/knowledge_graph/community/tugraph_store_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ async def get_community(self, community_id: str) -> Community:
all_edge_graph = self.query(edge_query)
all_graph = MemoryGraph()
for vertex in all_vertex_graph.vertices():
vertex.del_prop("embedding")
all_graph.upsert_vertex(vertex)
for edge in all_edge_graph.edges():
all_graph.append_edge(edge)
Expand Down Expand Up @@ -587,20 +588,22 @@ def explore(
else:
rel = f"-[r:{GraphElemType.RELATION.value}*{depth_string}]-"

if isinstance(subs, List[str]):
if all(isinstance(item, str) for item in subs):
header = f"WHERE n.id IN {[self._escape_quotes(sub) for sub in subs]} "
else:
final_list = []
for sub in subs:
vector = str(sub);
similarity_search = (
f"CALL db.vertexVectorKnnSearch("
f"'{GraphElemType.ENTITY.value}','embedding', {vector}, "
"{top_k:2, hnsw_ef_search:10})"
"YIELD node RETURN node.id AS id"
"YIELD node RETURN node.id AS id;"
)
result_list = self.graph_store.conn.run(query=similarity_search)
final_list = final_list + result_list
header = f"WHERE n.id IN {[(record["id"]) for record in final_list]} "
final_list.extend(result_list)
id_list = [(record["id"]) for record in final_list]
header = f"WHERE n.id IN {id_list} "

query = (
f"MATCH p=(n:{GraphElemType.ENTITY.value})"
Expand All @@ -626,9 +629,10 @@ def explore(

# Check if the entities exist in the graph

if isinstance(subs, List[str]):
if all(isinstance(item, str) for item in subs):
header = f"WHERE n.id IN {[self._escape_quotes(sub) for sub in subs]} "
else:
final_list = []
for sub in subs:
vector = str(sub);
similarity_search = (
Expand All @@ -638,19 +642,20 @@ def explore(
"YIELD node RETURN node.id AS id"
)
result_list = self.graph_store.conn.run(query=similarity_search)
final_list = final_list + result_list
header = f"WHERE n.id IN {[(record["id"]) for record in final_list]} "

final_list.extend(result_list)
id_list = [(record["id"]) for record in final_list]
header = f"WHERE n.id IN {id_list} "
check_entity_query = (
f"MATCH (n:{GraphElemType.ENTITY.value}) "
f"{header}"
"RETURN n"
)
if self.query(check_entity_query):
# Query the leaf chunks in the chain from documents to chunks
if isinstance(subs, List[str]):
if all(isinstance(item, str) for item in subs):
header = f"WHERE m.name IN {[self._escape_quotes(sub) for sub in subs]} "
else:
final_list = []
for sub in subs:
vector = str(sub);
similarity_search = (
Expand All @@ -660,8 +665,9 @@ def explore(
"YIELD node RETURN node.name AS name"
)
result_list = self.graph_store.conn.run(query=similarity_search)
final_list = final_list + result_list
header = f"WHERE m.name IN {[(record["name"]) for record in final_list]} "
final_list.extend(result_list)
name_list = [(record["name"]) for record in final_list]
header = f"WHERE n.name IN {name_list} "

leaf_chunk_query = (
f"MATCH p=(n:{GraphElemType.CHUNK.value})-"
Expand Down Expand Up @@ -701,11 +707,12 @@ def explore(
)
)
else:
if isinstance(subs, List[str]):
if all(isinstance(item, str) for item in subs):
_subs_condition = " OR ".join(
[f"m.content CONTAINS '{self._escape_quotes(sub)}'" for sub in subs]
)
else:
final_list = []
for sub in subs:
vector = str(sub);
similarity_search = (
Expand All @@ -715,9 +722,10 @@ def explore(
"YIELD node RETURN node.name AS name"
)
result_list = self.graph_store.conn.run(query=similarity_search)
final_list = final_list + result_list
_subs_condition = f"m.name IN {[(record["name"]) for record in final_list]} "

final_list.extend(result_list)
name_list = [(record["name"]) for record in final_list]
_subs_condition = f"n.name IN {name_list} "

# Query the chain from documents to chunks,
# document -> chunk -> chunk -> chunk -> ... -> chunk
chain_query = (
Expand Down
17 changes: 14 additions & 3 deletions dbgpt/storage/knowledge_graph/community_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,9 @@ async def asimilar_search_with_scores(
similar_search_enabled = self._similar_search_enabled

if similar_search_enabled:
keywords: List[List[float]] = await self._garph_embedder.embed(text)
keywords: List[List[float]] = []
vector = await self._garph_embedder.embed(text)
keywords.append(vector)
else:
keywords: List[str] = await self._keyword_extractor.extract(text)

Expand All @@ -369,8 +371,13 @@ async def asimilar_search_with_scores(

if document_graph_enabled:
keywords_for_document_graph = keywords
for vertex in subgraph.vertices():
keywords_for_document_graph.append(vertex.name)
if similar_search_enabled:
for vertex in subgraph.vertices():
vector = await self._garph_embedder.embed(vertex.name)
keywords_for_document_graph.append(vector)
else:
for vertex in subgraph.vertices():
keywords_for_document_graph.append(vertex.name)

subgraph_for_doc = self._graph_store_apdater.explore(
subs=keywords_for_document_graph,
Expand All @@ -384,6 +391,10 @@ async def asimilar_search_with_scores(
limit=self._knowledge_graph_chunk_search_top_size,
search_scope="document_graph",
)

for vertex in subgraph.vertices():
vertex.del_prop("embedding")

knowledge_graph_str = subgraph.format() if subgraph else ""
knowledge_graph_for_doc_str = (
subgraph_for_doc.format() if subgraph_for_doc else ""
Expand Down

0 comments on commit fdc0068

Please sign in to comment.