@@ -54,9 +54,7 @@ async def test_query_chunk_metadata_handling(self):
54
54
result = await rag_tool .query (content = content , vector_db_ids = vector_db_ids )
55
55
56
56
assert result is not None
57
- expected_metadata_string = (
58
- "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1'}"
59
- )
57
+ expected_metadata_string = "Metadata: {'chunk_id': 'chunk1', 'document_id': 'doc1', 'source': 'test_source', 'key1': 'value1', 'vector_db_id': 'db1'}"
60
58
assert expected_metadata_string in result .content [1 ].text
61
59
assert result .content is not None
62
60
@@ -77,3 +75,71 @@ async def test_query_accepts_valid_modes(self):
77
75
# Test that invalid mode raises an error
78
76
with pytest .raises (ValueError ):
79
77
RAGQueryConfig (mode = "wrong_mode" )
78
+
79
+ @pytest .mark .asyncio
80
+ async def test_query_adds_vector_db_id_to_chunk_metadata (self ):
81
+ rag_tool = MemoryToolRuntimeImpl (
82
+ config = MagicMock (),
83
+ vector_io_api = MagicMock (),
84
+ inference_api = MagicMock (),
85
+ )
86
+
87
+ vector_db_ids = ["db1" , "db2" ]
88
+
89
+ # Fake chunks from each DB
90
+ chunk_metadata1 = ChunkMetadata (
91
+ document_id = "doc1" ,
92
+ chunk_id = "chunk1" ,
93
+ source = "test_source1" ,
94
+ metadata_token_count = 5 ,
95
+ )
96
+ chunk1 = Chunk (
97
+ content = "chunk from db1" ,
98
+ metadata = {"vector_db_id" : "db1" , "document_id" : "doc1" },
99
+ stored_chunk_id = "c1" ,
100
+ chunk_metadata = chunk_metadata1 ,
101
+ )
102
+
103
+ chunk_metadata2 = ChunkMetadata (
104
+ document_id = "doc2" ,
105
+ chunk_id = "chunk2" ,
106
+ source = "test_source2" ,
107
+ metadata_token_count = 5 ,
108
+ )
109
+ chunk2 = Chunk (
110
+ content = "chunk from db2" ,
111
+ metadata = {"vector_db_id" : "db2" , "document_id" : "doc2" },
112
+ stored_chunk_id = "c2" ,
113
+ chunk_metadata = chunk_metadata2 ,
114
+ )
115
+
116
+ rag_tool .vector_io_api .query_chunks = AsyncMock (
117
+ side_effect = [
118
+ QueryChunksResponse (chunks = [chunk1 ], scores = [0.9 ]),
119
+ QueryChunksResponse (chunks = [chunk2 ], scores = [0.8 ]),
120
+ ]
121
+ )
122
+
123
+ result = await rag_tool .query (content = "test" , vector_db_ids = vector_db_ids )
124
+ returned_chunks = result .metadata ["chunks" ]
125
+ returned_scores = result .metadata ["scores" ]
126
+ returned_doc_ids = result .metadata ["document_ids" ]
127
+
128
+ assert returned_chunks == ["chunk from db1" , "chunk from db2" ]
129
+ assert returned_scores == (0.9 , 0.8 )
130
+ assert returned_doc_ids == ["doc1" , "doc2" ]
131
+
132
+ # Parse metadata from query result
133
+ def parse_metadata (s ):
134
+ import ast
135
+ import re
136
+
137
+ match = re .search (r"Metadata:\s*(\{.*\})" , s )
138
+ if not match :
139
+ raise ValueError (f"No metadata found in string: { s } " )
140
+ return ast .literal_eval (match .group (1 ))
141
+
142
+ returned_metadata = [
143
+ parse_metadata (item .text )["vector_db_id" ] for item in result .content if "Metadata:" in item .text
144
+ ]
145
+ assert returned_metadata == ["db1" , "db2" ]
0 commit comments