Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/pkg/sysdb/coordinator/model/collection_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ type ValueTypes struct {
SparseVector *SparseVectorValueType `json:"sparse_vector,omitempty"`
Int *IntValueType `json:"int,omitempty"`
Float *FloatValueType `json:"float,omitempty"`
Boolean *BoolValueType `json:"boolean,omitempty"`
Boolean *BoolValueType `json:"bool,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[CriticalError]

The JSON tag has been changed from "boolean" to "bool" in the Go struct and corresponding test JSON strings. However, there's a critical compatibility issue to consider:

  1. Breaking Change Impact: This JSON field name change will break compatibility with existing stored data or API clients that expect the "boolean" field name.

  2. Migration Strategy: If this change is intentional to match Rust's naming convention, you'll need to ensure:

    • Database migration to rename existing boolean fields to bool
    • API versioning to handle both field names during transition
    • Client SDK updates across all languages

If this is purely for Go-Rust JSON compatibility (as mentioned in the PR description), consider:

// Option 1: Support both during transition
Boolean *BoolValueType `json:"bool,omitempty" legacy:"boolean,omitempty"`

// Option 2: Custom marshaling to handle both names

Could you clarify the migration plan for this breaking change?

Context for Agents
[**CriticalError**]

The JSON tag has been changed from `"boolean"` to `"bool"` in the Go struct and corresponding test JSON strings. However, there's a critical compatibility issue to consider:

1. **Breaking Change Impact**: This JSON field name change will break compatibility with existing stored data or API clients that expect the `"boolean"` field name.

2. **Migration Strategy**: If this change is intentional to match Rust's naming convention, you'll need to ensure:
   - Database migration to rename existing `boolean` fields to `bool`
   - API versioning to handle both field names during transition
   - Client SDK updates across all languages

If this is purely for Go-Rust JSON compatibility (as mentioned in the PR description), consider:
```go
// Option 1: Support both during transition
Boolean *BoolValueType `json:"bool,omitempty" legacy:"boolean,omitempty"`

// Option 2: Custom marshaling to handle both names
```

Could you clarify the migration plan for this breaking change?

File: go/pkg/sysdb/coordinator/model/collection_configuration.go
Line: 252

}

type Schema struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func TestUpdateSchemaFromConfig_HnswSuccess(t *testing.T) {
"config": {}
}
},
"boolean": {
"bool": {
"bool_inverted_index": {
"enabled": true,
"config": {}
Expand Down Expand Up @@ -312,7 +312,7 @@ func TestUpdateSchemaFromConfig_SpannSuccess(t *testing.T) {
"config": {}
}
},
"boolean": {
"bool": {
"bool_inverted_index": {
"enabled": true,
"config": {}
Expand Down Expand Up @@ -481,7 +481,7 @@ func TestUpdateSchemaFromConfig_EmbeddingFunction(t *testing.T) {
"config": {}
}
},
"boolean": {
"bool": {
"bool_inverted_index": {
"enabled": true,
"config": {}
Expand Down
2 changes: 1 addition & 1 deletion go/pkg/sysdb/coordinator/table_catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1555,7 +1555,7 @@ func TestUpdateCollection_WithSchema(t *testing.T) {
"config": {}
}
},
"boolean": {
"bool": {
"bool_inverted_index": {
"enabled": true,
"config": {}
Expand Down
21 changes: 8 additions & 13 deletions rust/cli/src/commands/vacuum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use chroma_segment::local_segment_manager::LocalSegmentManager;
use chroma_sqlite::db::SqliteDb;
use chroma_sysdb::SysDb;
use chroma_system::System;
use chroma_types::{CollectionUuid, KnnIndex, ListCollectionsRequest};
use chroma_types::{CollectionUuid, ListCollectionsRequest, Schema};
use clap::Parser;
use colored::Colorize;
use dialoguer::Confirm;
Expand Down Expand Up @@ -101,17 +101,18 @@ async fn trigger_vector_segments_max_seq_id_migration(
sqlite: &SqliteDb,
sysdb: &mut SysDb,
segment_manager: &LocalSegmentManager,
default_knn_index: KnnIndex,
) -> Result<(), Box<dyn Error>> {
let collection_ids = get_collection_ids_to_migrate(sqlite).await?;

for collection_id in collection_ids {
let mut collection = sysdb.get_collection_with_segments(collection_id).await?;

collection
.collection
.reconcile_schema_with_config(default_knn_index)
.map_err(|e| Box::new(e) as Box<dyn Error>)?;
if collection.collection.schema.is_none() {
collection.collection.schema = Some(
Schema::try_from(&collection.collection.config)
.map_err(|e| Box::new(e) as Box<dyn Error>)?,
);
}

// If collection is uninitialized, that means nothing has been written yet.
let dim = match collection.collection.dimension {
Expand Down Expand Up @@ -153,13 +154,7 @@ pub async fn vacuum_chroma(config: FrontendConfig) -> Result<(), Box<dyn Error>>

println!("Purging the log...\n");

trigger_vector_segments_max_seq_id_migration(
&sqlite,
&mut sysdb,
&segment_manager,
config.default_knn_index,
)
.await?;
trigger_vector_segments_max_seq_id_migration(&sqlite, &mut sysdb, &segment_manager).await?;

let tenant = String::from("default_tenant");
let database = String::from("default_database");
Expand Down
19 changes: 8 additions & 11 deletions rust/frontend/src/get_collection_with_segments_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use chroma_config::Configurable;
use chroma_error::{ChromaError, ErrorCodes};
use chroma_sysdb::SysDb;
use chroma_types::{
CollectionAndSegments, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, Schema,
SchemaError,
CollectionAndSegments, CollectionUuid, GetCollectionWithSegmentsError, Schema, SchemaError,
};
use serde::{Deserialize, Serialize};
use std::{
Expand Down Expand Up @@ -143,7 +142,6 @@ impl CollectionsWithSegmentsProvider {
pub(crate) async fn get_collection_with_segments(
&mut self,
collection_id: CollectionUuid,
knn_index: KnnIndex,
) -> Result<CollectionAndSegments, CollectionsWithSegmentsProviderError> {
if let Some(collection_and_segments_with_ttl) = self
.collections_with_segments_cache
Expand Down Expand Up @@ -185,14 +183,13 @@ impl CollectionsWithSegmentsProvider {
.await?
};

// reconcile schema and config
let reconciled_schema = Schema::reconcile_schema_and_config(
collection_and_segments_sysdb.collection.schema.as_ref(),
Some(&collection_and_segments_sysdb.collection.config),
knn_index,
)
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?;
collection_and_segments_sysdb.collection.schema = Some(reconciled_schema);
if collection_and_segments_sysdb.collection.schema.is_none() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have we tested that schema is None and not {} for older collections?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

collection_and_segments_sysdb.collection.schema = Some(
Copy link
Contributor

@sanketkedia sanketkedia Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we are always passing schema down to the reader then should we update the reader in distributed_hnsw.rs to use schema instead of collection config (with fallback to legacy metadata). Similar to local_hnsw.rs. Makes things uniform and easier to understand. (I understand that the current code is also correct, this is just a code design nit)

Schema::try_from(&collection_and_segments_sysdb.collection.config)
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?,
);
}

self.set_collection_with_segments(collection_and_segments_sysdb.clone())
.await;
Ok(collection_and_segments_sysdb)
Expand Down
26 changes: 16 additions & 10 deletions rust/frontend/src/impls/in_memory_frontend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use chroma_types::operator::{Filter, KnnBatch, KnnProjection, Limit, Projection,
use chroma_types::plan::{Count, Get, Knn};
use chroma_types::{
test_segment, Collection, CollectionAndSegments, CreateCollectionError, Database, Include,
IncludeList, InternalCollectionConfiguration, KnnIndex, Segment, VectorIndexConfiguration,
IncludeList, InternalCollectionConfiguration, KnnIndex, Schema, SchemaError, Segment,
VectorIndexConfiguration,
};
use std::collections::HashSet;

Expand Down Expand Up @@ -221,22 +222,27 @@ impl InMemoryFrontend {
));
}

let mut collection = Collection {
let schema = Schema::reconcile_schema_and_config(
request.schema.as_ref(),
request.configuration.as_ref(),
KnnIndex::Hnsw,
)
.map_err(CreateCollectionError::InvalidSchema)?;

let config = InternalCollectionConfiguration::try_from(&schema).map_err(|e| {
CreateCollectionError::InvalidSchema(SchemaError::InvalidSchema { reason: e })
})?;

let collection = Collection {
name: request.name.clone(),
tenant: request.tenant_id.clone(),
database: request.database_name.clone(),
metadata: request.metadata,
config: request
.configuration
.unwrap_or(InternalCollectionConfiguration::default_hnsw()),
schema: request.schema,
config,
schema: Some(schema),
..Default::default()
};

collection
.reconcile_schema_with_config(KnnIndex::Hnsw)
.map_err(CreateCollectionError::InvalidSchema)?;

// Prevent SPANN usage in InMemoryFrontend
if matches!(
collection.config.vector_index,
Expand Down
25 changes: 13 additions & 12 deletions rust/frontend/src/impls/service_based_frontend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ impl ServiceBasedFrontend {
) -> Result<Collection, GetCollectionError> {
Ok(self
.collections_with_segments_provider
.get_collection_with_segments(collection_id, self.default_knn_index)
.get_collection_with_segments(collection_id)
.await
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?
.collection)
Expand All @@ -188,7 +188,7 @@ impl ServiceBasedFrontend {
) -> Result<Option<u32>, GetCollectionError> {
Ok(self
.collections_with_segments_provider
.get_collection_with_segments(collection_id, self.default_knn_index)
.get_collection_with_segments(collection_id)
.await
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?
.collection
Expand Down Expand Up @@ -381,7 +381,7 @@ impl ServiceBasedFrontend {
if self.enable_schema {
for collection in collections.iter_mut() {
collection
.reconcile_schema_with_config(self.default_knn_index)
.reconcile_schema_for_read()
.map_err(GetCollectionsError::InvalidSchema)?;
}
}
Expand Down Expand Up @@ -425,7 +425,7 @@ impl ServiceBasedFrontend {
if self.enable_schema {
for collection in &mut collections {
collection
.reconcile_schema_with_config(self.default_knn_index)
.reconcile_schema_for_read()
.map_err(GetCollectionError::InvalidSchema)?;
}
}
Expand All @@ -450,7 +450,7 @@ impl ServiceBasedFrontend {

if self.enable_schema {
collection
.reconcile_schema_with_config(self.default_knn_index)
.reconcile_schema_for_read()
.map_err(GetCollectionByCrnError::InvalidSchema)?;
}
Ok(collection)
Expand Down Expand Up @@ -630,9 +630,10 @@ impl ServiceBasedFrontend {
// that was retrieved from sysdb, rather than the one that was passed in
if self.enable_schema {
collection
.reconcile_schema_with_config(self.default_knn_index)
.reconcile_schema_for_read()
.map_err(CreateCollectionError::InvalidSchema)?;
}

Ok(collection)
}

Expand Down Expand Up @@ -735,7 +736,7 @@ impl ServiceBasedFrontend {
.await?;
collection_and_segments
.collection
.reconcile_schema_with_config(self.default_knn_index)
.reconcile_schema_for_read()
.map_err(ForkCollectionError::InvalidSchema)?;
let collection = collection_and_segments.collection.clone();
let latest_collection_logical_size_bytes = collection_and_segments
Expand Down Expand Up @@ -1099,7 +1100,7 @@ impl ServiceBasedFrontend {
let read_event = if let Some(where_clause) = r#where {
let collection_and_segments = self
.collections_with_segments_provider
.get_collection_with_segments(collection_id, self.default_knn_index)
.get_collection_with_segments(collection_id)
.await
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
if self.enable_schema {
Expand Down Expand Up @@ -1309,7 +1310,7 @@ impl ServiceBasedFrontend {
) -> Result<CountResponse, QueryError> {
let collection_and_segments = self
.collections_with_segments_provider
.get_collection_with_segments(collection_id, self.default_knn_index)
.get_collection_with_segments(collection_id)
.await
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
let latest_collection_logical_size_bytes = collection_and_segments
Expand Down Expand Up @@ -1424,7 +1425,7 @@ impl ServiceBasedFrontend {
) -> Result<GetResponse, QueryError> {
let collection_and_segments = self
.collections_with_segments_provider
.get_collection_with_segments(collection_id, self.default_knn_index)
.get_collection_with_segments(collection_id)
.await
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
if self.enable_schema {
Expand Down Expand Up @@ -1569,7 +1570,7 @@ impl ServiceBasedFrontend {
) -> Result<QueryResponse, QueryError> {
let collection_and_segments = self
.collections_with_segments_provider
.get_collection_with_segments(collection_id, self.default_knn_index)
.get_collection_with_segments(collection_id)
.await
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
if self.enable_schema {
Expand Down Expand Up @@ -1726,7 +1727,7 @@ impl ServiceBasedFrontend {
// Get collection and segments once for all queries
let collection_and_segments = self
.collections_with_segments_provider
.get_collection_with_segments(request.collection_id, self.default_knn_index)
.get_collection_with_segments(request.collection_id)
.await
.map_err(|err| QueryError::Other(Box::new(err) as Box<dyn ChromaError>))?;
if self.enable_schema {
Expand Down
18 changes: 13 additions & 5 deletions rust/log/src/local_compaction_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use chroma_sysdb::SysDb;
use chroma_system::Handler;
use chroma_system::{Component, ComponentContext};
use chroma_types::{
Chunk, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, LogRecord, SchemaError,
Chunk, CollectionUuid, GetCollectionWithSegmentsError, LogRecord, Schema, SchemaError,
};
use serde::{Deserialize, Serialize};
use thiserror::Error;
Expand Down Expand Up @@ -141,9 +141,12 @@ impl Handler<BackfillMessage> for LocalCompactionManager {
.get_collection_with_segments(message.collection_id)
.await?;
let schema_previously_persisted = collection_and_segments.collection.schema.is_some();
collection_and_segments
.collection
.reconcile_schema_with_config(KnnIndex::Hnsw)?;
if !schema_previously_persisted {
collection_and_segments.collection.schema = Some(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Minor change - for local we init the schema here if None but for distributed we init in the writer? Would be good to make the behavior uniform across the two

Schema::try_from(&collection_and_segments.collection.config)
.map_err(CompactionManagerError::SchemaReconcileError)?,
);
}
// If collection is uninitialized, that means nothing has been written yet.
let dim = match collection_and_segments.collection.dimension {
Some(dim) => dim,
Expand Down Expand Up @@ -267,7 +270,12 @@ impl Handler<PurgeLogsMessage> for LocalCompactionManager {
.get_collection_with_segments(message.collection_id)
.await?;
let mut collection = collection_segments.collection.clone();
collection.reconcile_schema_with_config(KnnIndex::Hnsw)?;
if collection.schema.is_none() {
collection.schema = Some(
Schema::try_from(&collection.config)
.map_err(CompactionManagerError::SchemaReconcileError)?,
);
}
// If dimension is None, that means nothing has been written yet.
let dim = match collection.dimension {
Some(dim) => dim,
Expand Down
Loading
Loading