|
45 | 45 | )
|
46 | 46 |
|
47 | 47 |
|
48 |
| -SCHEMA = { |
49 |
| - { |
50 |
| - "name": "chunk_id", |
51 |
| - "dataType": ["string"], |
52 |
| - "description": "The chunk id", |
53 |
| - }, |
54 |
| - { |
55 |
| - "name": "document_id", |
56 |
| - "dataType": ["string"], |
57 |
| - "description": "The document id", |
58 |
| - }, |
59 |
| - { |
60 |
| - "name": "text", |
61 |
| - "dataType": ["text"], |
62 |
| - "description": "The chunk's text", |
63 |
| - }, |
64 |
| - { |
65 |
| - "name": "source", |
66 |
| - "dataType": ["string"], |
67 |
| - "description": "The source of the data", |
68 |
| - }, |
69 |
| - { |
70 |
| - "name": "source_id", |
71 |
| - "dataType": ["string"], |
72 |
| - "description": "The source id", |
73 |
| - }, |
74 |
| - { |
75 |
| - "name": "url", |
76 |
| - "dataType": ["string"], |
77 |
| - "description": "The source url", |
78 |
| - }, |
79 |
| - { |
80 |
| - "name": "created_at", |
81 |
| - "dataType": ["date"], |
82 |
| - "description": "Creation date of document", |
83 |
| - }, |
84 |
| - { |
85 |
| - "name": "author", |
86 |
| - "dataType": ["string"], |
87 |
| - "description": "Document author", |
88 |
| - }, |
89 |
| -} |
90 |
| - |
91 | 48 |
|
92 | 49 | # DEFAULT_EMBEDDING_MODEL = 'text-embedding-ada-002'
|
93 | 50 |
|
@@ -157,25 +114,42 @@ def __init__(
|
157 | 114 | print("Tables in current session:")
|
158 | 115 | print(session.list())
|
159 | 116 |
|
160 |
| - # load the table configuration for the kdbai vector database (see schema from milvus) |
161 |
| - # schema = json.loads(""" |
162 |
| - # {"type":"splayed", |
163 |
| - # "columns":[ |
164 |
| - # {"name": "time", "type": "timespan"}, |
165 |
| - # {"name": "sym", "type": "symbol"}, |
166 |
| - # {"name": "id", "type": "symbol"}, |
167 |
| - # {"name": "tag", "type": "symbol", "attrMem": "grouped"}, |
168 |
| - # {"name": "text", "type": "string"}, |
169 |
| - # {"name": "embeddings", "type": "reals", |
170 |
| - # "vectorIndex": {"type": "flat", "metric": "L2", "dims": 1536}}] |
171 |
| - # }""") |
172 |
| - |
173 |
| - ## ^^ USE SCHEMA INSTEAD |
| 117 | + #schema = {"type":"splayed","columns":[{"name":"time","type":"timespan"},{"name":"sym","type":"symbol","attrMem":"grouped"},{"name":"chunk_id","type":"symbol"},{"name":"document_id","type":"symbol"},{"name":"text","type":"string"},{"name":"source","type":"string"},{"name":"source_id","type":"symbol"},{"name":"url","type":"string"},{"name":"created_at","type":"timespan"},{"name":"author","type":"string"},{"name":"embeddings","type":"reals", "vectorIndex": {"type": "flat", "metric": "L2", "dims": 1536}}]} |
| 118 | + |
| 119 | + schema = dict( |
| 120 | + columns=[ |
| 121 | + dict(name='chunk_id', pytype='str'), |
| 122 | + dict(name='document_id', pytype='str'), |
| 123 | + dict(name='text', pytype='bytes'), |
| 124 | + dict(name='source', pytype='bytes'), |
| 125 | + dict(name='source_id', pytype='str'), |
| 126 | + dict(name='url', pytype='bytes'), |
| 127 | + dict(name='created_at', pytype='datetime64[ns]'), |
| 128 | + dict(name='author', pytype='bytes'), |
| 129 | + dict(name='embeddings', |
| 130 | + vectorIndex=dict(type='flat', metric='L2', dims=8))] |
| 131 | + ) |
174 | 132 |
|
175 | 133 | # create a vector database table using the schema
|
176 |
| - print('Accessing \'documents\' table:') |
177 |
| - self._table = session.table('documents') |
| 134 | + print('Creating table:') |
| 135 | + self._table = session.create_table('testingqueryfunc', schema) |
| 136 | + |
| 137 | + print('Session tables:') |
| 138 | + print(session.list()) |
| 139 | + |
| 140 | + print('Table schema:') |
| 141 | + print(self._table.schema()) |
| 142 | + |
| 143 | + print('Table query:') |
178 | 144 | print(self._table.query())
|
| 145 | + |
| 146 | + print('Inserting data:') |
| 147 | + df = kx.q('{([] chunk_id:x?`8; document_id:x?`8; text:{rand[256]?" "} each til x; source:{rand[30]?" "} each til x; source_id:x?`8; url:{rand[100]?" "} each til x; created_at:x?1D; author:{rand[30]?" "} each til x; embeddings:(x;1536)#(x*1536)?1e)}', 10).pd() |
| 148 | + self._table.insert(df) |
| 149 | + |
| 150 | + print('Table query:') |
| 151 | + print(self._table.query()) |
| 152 | + |
179 | 153 |
|
180 | 154 | except Exception as e:
|
181 | 155 | logger.error(f"Error in creating table: {e}")
|
|
0 commit comments