8
8
EmbeddingFunction ,
9
9
QueryConfig ,
10
10
)
11
+ from chromadb .base_types import CollectionSchema , ValueType
11
12
from chromadb .utils .embedding_functions import (
12
13
known_embedding_functions ,
13
14
register_embedding_function ,
@@ -44,6 +45,7 @@ class CollectionConfiguration(TypedDict, total=True):
44
45
spann : Optional [SpannConfiguration ]
45
46
embedding_function : Optional [EmbeddingFunction ] # type: ignore
46
47
query_embedding_function : Optional [EmbeddingFunction ] # type: ignore
48
+ schema : Optional [Dict [str , Dict [ValueType , CollectionSchema ]]]
47
49
48
50
49
51
def load_collection_configuration_from_json_str (
@@ -126,6 +128,7 @@ def load_collection_configuration_from_json(
126
128
spann = spann_config ,
127
129
embedding_function = ef , # type: ignore
128
130
query_embedding_function = query_ef , # type: ignore
131
+ schema = config_json_map .get ("schema" ),
129
132
)
130
133
131
134
@@ -139,6 +142,7 @@ def collection_configuration_to_json(config: CollectionConfiguration) -> Dict[st
139
142
spann_config = config .get ("spann" )
140
143
ef = config .get ("embedding_function" )
141
144
query_ef = config .get ("query_embedding_function" )
145
+ schema = config .get ("schema" )
142
146
else :
143
147
try :
144
148
hnsw_config = config .get_parameter ("hnsw" ).value
@@ -211,6 +215,7 @@ def collection_configuration_to_json(config: CollectionConfiguration) -> Dict[st
211
215
"spann" : spann_config ,
212
216
"embedding_function" : ef_config ,
213
217
"query_embedding_function" : query_ef_config ,
218
+ "schema" : schema ,
214
219
}
215
220
216
221
@@ -292,6 +297,7 @@ class CreateCollectionConfiguration(TypedDict, total=False):
292
297
spann : Optional [CreateSpannConfiguration ]
293
298
embedding_function : Optional [EmbeddingFunction ] # type: ignore
294
299
query_config : Optional [QueryConfig ]
300
+ schema : Optional [Dict [str , Dict [ValueType , CollectionSchema ]]]
295
301
296
302
297
303
def create_collection_configuration_from_legacy_collection_metadata (
@@ -430,6 +436,7 @@ def create_collection_configuration_to_json(
430
436
"spann" : spann_config ,
431
437
"embedding_function" : ef_config ,
432
438
"query_config" : query_config ,
439
+ "schema" : config .get ("schema" ),
433
440
}
434
441
435
442
@@ -502,6 +509,7 @@ class UpdateCollectionConfiguration(TypedDict, total=False):
502
509
spann : Optional [UpdateSpannConfiguration ]
503
510
embedding_function : Optional [EmbeddingFunction ] # type: ignore
504
511
query_config : Optional [QueryConfig ]
512
+ schema : Optional [Dict [str , Dict [ValueType , CollectionSchema ]]]
505
513
506
514
507
515
def update_collection_configuration_from_legacy_collection_metadata (
@@ -556,10 +564,17 @@ def update_collection_configuration_to_json(
556
564
"""Convert an UpdateCollectionConfiguration to a JSON-serializable dict"""
557
565
hnsw_config = config .get ("hnsw" )
558
566
spann_config = config .get ("spann" )
567
+ schema = config .get ("schema" )
559
568
ef = config .get ("embedding_function" )
560
569
q = config .get ("query_config" )
561
570
query_config : Dict [str , Any ] | None = None
562
- if hnsw_config is None and spann_config is None and ef is None and q is None :
571
+ if (
572
+ hnsw_config is None
573
+ and spann_config is None
574
+ and ef is None
575
+ and q is None
576
+ and schema is None
577
+ ):
563
578
return {}
564
579
565
580
if hnsw_config is not None :
@@ -601,6 +616,7 @@ def update_collection_configuration_to_json(
601
616
"spann" : spann_config ,
602
617
"embedding_function" : ef_config ,
603
618
"query_config" : query_config ,
619
+ "schema" : schema ,
604
620
}
605
621
606
622
@@ -764,14 +780,40 @@ def overwrite_collection_configuration(
764
780
ef_config [k ] = v
765
781
query_ef = updated_embedding_function .build_from_config (ef_config )
766
782
783
+ existing_schema = existing_config .get ("schema" )
784
+ new_diff_schema = update_config .get ("schema" )
785
+ updated_schema : Optional [Dict [str , Dict [ValueType , CollectionSchema ]]] = None
786
+ if existing_schema is not None :
787
+ if new_diff_schema is not None :
788
+ updated_schema = overwrite_schema (existing_schema , new_diff_schema )
789
+ else :
790
+ updated_schema = existing_schema
791
+ else :
792
+ updated_schema = new_diff_schema
793
+
767
794
return CollectionConfiguration (
768
795
hnsw = updated_hnsw_config ,
769
796
spann = updated_spann_config ,
770
797
embedding_function = updated_embedding_function ,
771
798
query_embedding_function = query_ef ,
799
+ schema = updated_schema ,
772
800
)
773
801
774
802
803
+ def overwrite_schema (
804
+ existing_schema : Dict [str , Dict [ValueType , CollectionSchema ]],
805
+ new_diff_schema : Dict [str , Dict [ValueType , CollectionSchema ]],
806
+ ) -> Dict [str , Dict [ValueType , CollectionSchema ]]:
807
+ """Overwrite a schema with a new configuration"""
808
+ for new_key , new_value in new_diff_schema .items ():
809
+ if new_key in existing_schema :
810
+ for value_type , new_schema in new_value .items ():
811
+ existing_schema [new_key ][value_type ] = new_schema
812
+ else :
813
+ existing_schema [new_key ] = new_value
814
+ return existing_schema
815
+
816
+
775
817
def validate_embedding_function_conflict_on_create (
776
818
embedding_function : Optional [EmbeddingFunction ], # type: ignore
777
819
configuration_ef : Optional [EmbeddingFunction ], # type: ignore
0 commit comments