Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions llama_stack/core/routing_tables/vector_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ async def register_vector_db(
provider_vector_db_id: str | None = None,
vector_db_name: str | None = None,
) -> VectorDB:
provider_vector_db_id = provider_vector_db_id or vector_db_id
if provider_id is None:
if len(self.impls_by_provider_id) > 0:
provider_id = list(self.impls_by_provider_id.keys())[0]
Expand All @@ -69,14 +68,33 @@ async def register_vector_db(
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
if "embedding_dimension" not in model.metadata:
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")

provider = self.impls_by_provider_id[provider_id]
logger.warning(
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand, if it's deprecated (or want to deprecate soon), why convert the call to vector store under the hood? Can't we just fail the call and tell users to use VectorStore?

Copy link
Collaborator Author

@franciscojavierarceo franciscojavierarceo Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to provide a lower friction migration.

The path would be:

  1. Create Vector Stores (this PR)
  2. Create Files and Vector Store Files for user (would be next PR)

Doing this would make the migration easier for users IMO. Under the hood, Vector Stores uses most of the VectorDB and VectorIO logic.

Given we have so many Red Hat users of RAG, I thought this was an easier path for them.

For some, they may just rip off the bandaid and that'd definitely be preferred but I'm happy to try and reduce the friction as much as possible.

)
vector_store = await provider.openai_create_vector_store(
name=vector_db_name or vector_db_id,
embedding_model=embedding_model,
embedding_dimension=model.metadata["embedding_dimension"],
provider_id=provider_id,
provider_vector_db_id=provider_vector_db_id,
)

vector_store_id = vector_store.id
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
logger.warning(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a reasonable way to highlight this to users.

f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
)

vector_db_data = {
"identifier": vector_db_id,
"identifier": vector_store_id,
"type": ResourceType.vector_db.value,
"provider_id": provider_id,
"provider_resource_id": provider_vector_db_id,
"provider_resource_id": actual_provider_vector_db_id,
"embedding_model": embedding_model,
"embedding_dimension": model.metadata["embedding_dimension"],
"vector_db_name": vector_db_name,
"vector_db_name": vector_store.name,
}
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
await self.register_object(vector_db)
Expand Down
73 changes: 45 additions & 28 deletions tests/integration/vector_io/test_vector_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,34 +47,45 @@ def clear_registry():


def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension):
# Register a memory bank first
vector_db_id = "test_vector_db"
client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_id,
vector_db_name = "test_vector_db"
register_response = client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_name,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
)

actual_vector_db_id = register_response.identifier

# Retrieve the memory bank and validate its properties
response = client_with_empty_registry.vector_dbs.retrieve(vector_db_id=vector_db_id)
response = client_with_empty_registry.vector_dbs.retrieve(vector_db_id=actual_vector_db_id)
assert response is not None
assert response.identifier == vector_db_id
assert response.identifier == actual_vector_db_id
assert response.embedding_model == embedding_model_id
assert response.provider_resource_id == vector_db_id
assert response.identifier.startswith("vs_")


def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension):
vector_db_id = "test_vector_db"
client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_id,
vector_db_name = "test_vector_db"
response = client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_name,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
)

actual_vector_db_id = response.identifier
assert actual_vector_db_id.startswith("vs_")
assert actual_vector_db_id != vector_db_name

vector_dbs_after_register = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
assert vector_dbs_after_register == [vector_db_id]
assert vector_dbs_after_register == [actual_vector_db_id]

vector_stores = client_with_empty_registry.vector_stores.list()
assert len(vector_stores.data) == 1
vector_store = vector_stores.data[0]
assert vector_store.id == actual_vector_db_id
assert vector_store.name == vector_db_name

client_with_empty_registry.vector_dbs.unregister(vector_db_id=vector_db_id)
client_with_empty_registry.vector_dbs.unregister(vector_db_id=actual_vector_db_id)

vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
assert len(vector_dbs) == 0
Expand All @@ -91,20 +102,22 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
],
)
def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case):
vector_db_id = "test_vector_db"
client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_id,
vector_db_name = "test_vector_db"
register_response = client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_name,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
)

actual_vector_db_id = register_response.identifier

client_with_empty_registry.vector_io.insert(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
chunks=sample_chunks,
)

response = client_with_empty_registry.vector_io.query(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
query="What is the capital of France?",
)
assert response is not None
Expand All @@ -113,7 +126,7 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding

query, expected_doc_id = test_case
response = client_with_empty_registry.vector_io.query(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
query=query,
)
assert response is not None
Expand All @@ -128,13 +141,15 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
"remote::qdrant": {"score_threshold": -1.0},
"inline::qdrant": {"score_threshold": -1.0},
}
vector_db_id = "test_precomputed_embeddings_db"
client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_id,
vector_db_name = "test_precomputed_embeddings_db"
register_response = client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_name,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
)

actual_vector_db_id = register_response.identifier

chunks_with_embeddings = [
Chunk(
content="This is a test chunk with precomputed embedding.",
Expand All @@ -144,13 +159,13 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
]

client_with_empty_registry.vector_io.insert(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
chunks=chunks_with_embeddings,
)

provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
response = client_with_empty_registry.vector_io.query(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
query="precomputed embedding test",
params=vector_io_provider_params_dict.get(provider, None),
)
Expand All @@ -173,13 +188,15 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
"remote::qdrant": {"score_threshold": 0.0},
"inline::qdrant": {"score_threshold": 0.0},
}
vector_db_id = "test_precomputed_embeddings_db"
client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_id,
vector_db_name = "test_precomputed_embeddings_db"
register_response = client_with_empty_registry.vector_dbs.register(
vector_db_id=vector_db_name,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
)

actual_vector_db_id = register_response.identifier

chunks_with_embeddings = [
Chunk(
content="duplicate",
Expand All @@ -189,13 +206,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
]

client_with_empty_registry.vector_io.insert(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
chunks=chunks_with_embeddings,
)

provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
response = client_with_empty_registry.vector_io.query(
vector_db_id=vector_db_id,
vector_db_id=actual_vector_db_id,
query="duplicate",
params=vector_io_provider_params_dict.get(provider, None),
)
Expand Down
30 changes: 24 additions & 6 deletions tests/unit/distribution/routers/test_routing_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,20 @@ async def register_vector_db(self, vector_db: VectorDB):
async def unregister_vector_db(self, vector_db_id: str):
return vector_db_id

async def openai_create_vector_store(self, **kwargs):
import time
import uuid

from llama_stack.apis.vector_io.vector_io import VectorStoreFileCounts, VectorStoreObject

vector_store_id = kwargs.get("provider_vector_db_id") or f"vs_{uuid.uuid4()}"
return VectorStoreObject(
id=vector_store_id,
name=kwargs.get("name", vector_store_id),
created_at=int(time.time()),
file_counts=VectorStoreFileCounts(completed=0, cancelled=0, failed=0, in_progress=0, total=0),
)


async def test_models_routing_table(cached_disk_dist_registry):
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
Expand Down Expand Up @@ -247,17 +261,21 @@ async def test_vectordbs_routing_table(cached_disk_dist_registry):
)

# Register multiple vector databases and verify listing
await table.register_vector_db(vector_db_id="test-vectordb", embedding_model="test_provider/test-model")
await table.register_vector_db(vector_db_id="test-vectordb-2", embedding_model="test_provider/test-model")
vdb1 = await table.register_vector_db(vector_db_id="test-vectordb", embedding_model="test_provider/test-model")
vdb2 = await table.register_vector_db(vector_db_id="test-vectordb-2", embedding_model="test_provider/test-model")
vector_dbs = await table.list_vector_dbs()

assert len(vector_dbs.data) == 2
vector_db_ids = {v.identifier for v in vector_dbs.data}
assert "test-vectordb" in vector_db_ids
assert "test-vectordb-2" in vector_db_ids
assert vdb1.identifier in vector_db_ids
assert vdb2.identifier in vector_db_ids

# Verify they have UUID-based identifiers
assert vdb1.identifier.startswith("vs_")
assert vdb2.identifier.startswith("vs_")

await table.unregister_vector_db(vector_db_id="test-vectordb")
await table.unregister_vector_db(vector_db_id="test-vectordb-2")
await table.unregister_vector_db(vector_db_id=vdb1.identifier)
await table.unregister_vector_db(vector_db_id=vdb2.identifier)

vector_dbs = await table.list_vector_dbs()
assert len(vector_dbs.data) == 0
Expand Down
Loading
Loading