-
Notifications
You must be signed in to change notification settings - Fork 1.9k
[CLN] schema: build default with config ef & default_knn_index, remove #document population in defaults #5775
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,8 +4,7 @@ use chroma_config::Configurable; | |
| use chroma_error::{ChromaError, ErrorCodes}; | ||
| use chroma_sysdb::SysDb; | ||
| use chroma_types::{ | ||
| CollectionAndSegments, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, Schema, | ||
| SchemaError, | ||
| CollectionAndSegments, CollectionUuid, GetCollectionWithSegmentsError, Schema, SchemaError, | ||
| }; | ||
| use serde::{Deserialize, Serialize}; | ||
| use std::{ | ||
|
|
@@ -143,7 +142,6 @@ impl CollectionsWithSegmentsProvider { | |
| pub(crate) async fn get_collection_with_segments( | ||
| &mut self, | ||
| collection_id: CollectionUuid, | ||
| knn_index: KnnIndex, | ||
| ) -> Result<CollectionAndSegments, CollectionsWithSegmentsProviderError> { | ||
| if let Some(collection_and_segments_with_ttl) = self | ||
| .collections_with_segments_cache | ||
|
|
@@ -185,14 +183,13 @@ impl CollectionsWithSegmentsProvider { | |
| .await? | ||
| }; | ||
|
|
||
| // reconcile schema and config | ||
| let reconciled_schema = Schema::reconcile_schema_and_config( | ||
| collection_and_segments_sysdb.collection.schema.as_ref(), | ||
| Some(&collection_and_segments_sysdb.collection.config), | ||
| knn_index, | ||
| ) | ||
| .map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?; | ||
| collection_and_segments_sysdb.collection.schema = Some(reconciled_schema); | ||
| if collection_and_segments_sysdb.collection.schema.is_none() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. have we tested that schema is None and not {} for older collections?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
| collection_and_segments_sysdb.collection.schema = Some( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we are always passing schema down to the reader then should we update the reader in |
||
| Schema::try_from(&collection_and_segments_sysdb.collection.config) | ||
| .map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?, | ||
| ); | ||
| } | ||
|
|
||
| self.set_collection_with_segments(collection_and_segments_sysdb.clone()) | ||
| .await; | ||
| Ok(collection_and_segments_sysdb) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,7 @@ use chroma_sysdb::SysDb; | |
| use chroma_system::Handler; | ||
| use chroma_system::{Component, ComponentContext}; | ||
| use chroma_types::{ | ||
| Chunk, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, LogRecord, SchemaError, | ||
| Chunk, CollectionUuid, GetCollectionWithSegmentsError, LogRecord, Schema, SchemaError, | ||
| }; | ||
| use serde::{Deserialize, Serialize}; | ||
| use thiserror::Error; | ||
|
|
@@ -141,9 +141,12 @@ impl Handler<BackfillMessage> for LocalCompactionManager { | |
| .get_collection_with_segments(message.collection_id) | ||
| .await?; | ||
| let schema_previously_persisted = collection_and_segments.collection.schema.is_some(); | ||
| collection_and_segments | ||
| .collection | ||
| .reconcile_schema_with_config(KnnIndex::Hnsw)?; | ||
| if !schema_previously_persisted { | ||
| collection_and_segments.collection.schema = Some( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Minor change - for local we init the schema here if None but for distributed we init in the writer? Would be good to make the behavior uniform across the two |
||
| Schema::try_from(&collection_and_segments.collection.config) | ||
| .map_err(CompactionManagerError::SchemaReconcileError)?, | ||
| ); | ||
| } | ||
| // If collection is uninitialized, that means nothing has been written yet. | ||
| let dim = match collection_and_segments.collection.dimension { | ||
| Some(dim) => dim, | ||
|
|
@@ -267,7 +270,12 @@ impl Handler<PurgeLogsMessage> for LocalCompactionManager { | |
| .get_collection_with_segments(message.collection_id) | ||
| .await?; | ||
| let mut collection = collection_segments.collection.clone(); | ||
| collection.reconcile_schema_with_config(KnnIndex::Hnsw)?; | ||
| if collection.schema.is_none() { | ||
| collection.schema = Some( | ||
| Schema::try_from(&collection.config) | ||
| .map_err(CompactionManagerError::SchemaReconcileError)?, | ||
| ); | ||
| } | ||
| // If dimension is None, that means nothing has been written yet. | ||
| let dim = match collection.dimension { | ||
| Some(dim) => dim, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[CriticalError]
The JSON tag has been changed from
"boolean"to"bool"in the Go struct and corresponding test JSON strings. However, there's a critical compatibility issue to consider:Breaking Change Impact: This JSON field name change will break compatibility with existing stored data or API clients that expect the
"boolean"field name.Migration Strategy: If this change is intentional to match Rust's naming convention, you'll need to ensure:
booleanfields toboolIf this is purely for Go-Rust JSON compatibility (as mentioned in the PR description), consider:
Could you clarify the migration plan for this breaking change?
Context for Agents