@@ -277,6 +277,53 @@ def pick_individual_type(jsonschema_type: dict):
277277 if "object" in jsonschema_type ["type" ]:
278278 return JSONB ()
279279 if "array" in jsonschema_type ["type" ]:
280+ # Select between different kinds of `ARRAY` data types.
281+ #
282+ # This currently leverages an unspecified definition for the Singer SCHEMA,
283+ # using the `additionalProperties` attribute to convey additional type
284+ # information, agnostic of the target database.
285+ #
286+ # In this case, it is about telling different kinds of `ARRAY` types apart:
287+ # Either it is a vanilla `ARRAY`, to be stored into a `jsonb[]` type, or,
288+ # alternatively, it can be a "vector" kind `ARRAY` of floating point numbers,
289+ # effectively what pgvector is storing in its `VECTOR` type.
290+ #
291+ # Still, `type: "vector"` is only a surrogate label here, because other
292+ # database systems may use different types for implementing the same thing,
293+ # and need to translate accordingly.
294+ """
295+ Schema override rule in `meltano.yml`:
296+
297+ type: "array"
298+ items:
299+ type: "number"
300+ additionalProperties:
301+ storage:
302+ type: "vector"
303+ dim: 4
304+
305+ Produced schema annotation in `catalog.json`:
306+
307+ {"type": "array",
308+ "items": {"type": "number"},
309+ "additionalProperties": {"storage": {"type": "vector", "dim": 4}}}
310+ """
311+ if (
312+ "additionalProperties" in jsonschema_type
313+ and "storage" in jsonschema_type ["additionalProperties" ]
314+ ):
315+ storage_properties = jsonschema_type ["additionalProperties" ]["storage" ]
316+ if (
317+ "type" in storage_properties
318+ and storage_properties ["type" ] == "vector"
319+ ):
320+ # On PostgreSQL/pgvector, use the corresponding type definition
321+ # from its SQLAlchemy dialect.
322+ from pgvector .sqlalchemy import (
323+ Vector , # type: ignore[import-untyped]
324+ )
325+
326+ return Vector (storage_properties ["dim" ])
280327 return ARRAY (JSONB ())
281328 if jsonschema_type .get ("format" ) == "date-time" :
282329 return TIMESTAMP ()
@@ -310,6 +357,13 @@ def pick_best_sql_type(sql_type_array: list):
310357 NOTYPE ,
311358 ]
312359
360+ try :
361+ from pgvector .sqlalchemy import Vector
362+
363+ precedence_order .append (Vector )
364+ except ImportError :
365+ pass
366+
313367 for sql_type in precedence_order :
314368 for obj in sql_type_array :
315369 if isinstance (obj , sql_type ):
0 commit comments