Skip to content

Commit 3be119b

Browse files
author
Charli Posner
committed
KXI-28991 newclientversion
1 parent 808f96b commit 3be119b

File tree

3 files changed

+33
-59
lines changed

3 files changed

+33
-59
lines changed
Binary file not shown.

datastore/providers/kdbai_datastore.py

Lines changed: 33 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -45,49 +45,6 @@
4545
)
4646

4747

48-
SCHEMA = {
49-
{
50-
"name": "chunk_id",
51-
"dataType": ["string"],
52-
"description": "The chunk id",
53-
},
54-
{
55-
"name": "document_id",
56-
"dataType": ["string"],
57-
"description": "The document id",
58-
},
59-
{
60-
"name": "text",
61-
"dataType": ["text"],
62-
"description": "The chunk's text",
63-
},
64-
{
65-
"name": "source",
66-
"dataType": ["string"],
67-
"description": "The source of the data",
68-
},
69-
{
70-
"name": "source_id",
71-
"dataType": ["string"],
72-
"description": "The source id",
73-
},
74-
{
75-
"name": "url",
76-
"dataType": ["string"],
77-
"description": "The source url",
78-
},
79-
{
80-
"name": "created_at",
81-
"dataType": ["date"],
82-
"description": "Creation date of document",
83-
},
84-
{
85-
"name": "author",
86-
"dataType": ["string"],
87-
"description": "Document author",
88-
},
89-
}
90-
9148

9249
# DEFAULT_EMBEDDING_MODEL = 'text-embedding-ada-002'
9350

@@ -157,25 +114,42 @@ def __init__(
157114
print("Tables in current session:")
158115
print(session.list())
159116

160-
# load the table configuration for the kdbai vector database (see schema from milvus)
161-
# schema = json.loads("""
162-
# {"type":"splayed",
163-
# "columns":[
164-
# {"name": "time", "type": "timespan"},
165-
# {"name": "sym", "type": "symbol"},
166-
# {"name": "id", "type": "symbol"},
167-
# {"name": "tag", "type": "symbol", "attrMem": "grouped"},
168-
# {"name": "text", "type": "string"},
169-
# {"name": "embeddings", "type": "reals",
170-
# "vectorIndex": {"type": "flat", "metric": "L2", "dims": 1536}}]
171-
# }""")
172-
173-
## ^^ USE SCHEMA INSTEAD
117+
#schema = {"type":"splayed","columns":[{"name":"time","type":"timespan"},{"name":"sym","type":"symbol","attrMem":"grouped"},{"name":"chunk_id","type":"symbol"},{"name":"document_id","type":"symbol"},{"name":"text","type":"string"},{"name":"source","type":"string"},{"name":"source_id","type":"symbol"},{"name":"url","type":"string"},{"name":"created_at","type":"timespan"},{"name":"author","type":"string"},{"name":"embeddings","type":"reals", "vectorIndex": {"type": "flat", "metric": "L2", "dims": 1536}}]}
118+
119+
schema = dict(
120+
columns=[
121+
dict(name='chunk_id', pytype='str'),
122+
dict(name='document_id', pytype='str'),
123+
dict(name='text', pytype='bytes'),
124+
dict(name='source', pytype='bytes'),
125+
dict(name='source_id', pytype='str'),
126+
dict(name='url', pytype='bytes'),
127+
dict(name='created_at', pytype='datetime64[ns]'),
128+
dict(name='author', pytype='bytes'),
129+
dict(name='embeddings',
130+
vectorIndex=dict(type='flat', metric='L2', dims=8))]
131+
)
174132

175133
# create a vector database table using the schema
176-
print('Accessing \'documents\' table:')
177-
self._table = session.table('documents')
134+
print('Creating table:')
135+
self._table = session.create_table('testingqueryfunc', schema)
136+
137+
print('Session tables:')
138+
print(session.list())
139+
140+
print('Table schema:')
141+
print(self._table.schema())
142+
143+
print('Table query:')
178144
print(self._table.query())
145+
146+
print('Inserting data:')
147+
df = kx.q('{([] chunk_id:x?`8; document_id:x?`8; text:{rand[256]?" "} each til x; source:{rand[30]?" "} each til x; source_id:x?`8; url:{rand[100]?" "} each til x; created_at:x?1D; author:{rand[30]?" "} each til x; embeddings:(x;1536)#(x*1536)?1e)}', 10).pd()
148+
self._table.insert(df)
149+
150+
print('Table query:')
151+
print(self._table.query())
152+
179153

180154
except Exception as e:
181155
logger.error(f"Error in creating table: {e}")
0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)