diff --git a/README.md b/README.md index ef147d4b..3a677976 100644 --- a/README.md +++ b/README.md @@ -428,22 +428,48 @@ podman run --rm \ ``` Once the command is done, you can find the vector database (embedded with the registry metadata) at -`./vector_db/custom_docs/0.1` with the name `faiss_store.db` as well as a -barebones llama-stack configuration file named `llama-stack.yaml` for -reference, since it's not necessary for the final deployment. +`./vector_db/custom_docs/0.1` with the name `faiss_store.db`, a +`lightspeed-stack.yaml` configuration file for use with Lightspeed Core Stack, +and a `llama-stack.yaml` for reference. -The vector-io will be named `custom-docs-0_1`: +The generated `lightspeed-stack.yaml` will look like: ```yaml -providers: - vector_io: - - provider_id: custom-docs-0_1 - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: /home//rag-content/vector_db/custom_docs/0.1/faiss_store.db +name: Lightspeed Core Stack (LCS) +service: + host: 0.0.0.0 + port: 8080 + base_url: http://localhost:8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + use_as_library_client: false + url: http://localhost:8321 +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +conversation_cache: + type: "sqlite" + sqlite: + db_path: "/tmp/data/conversation-cache.db" +authentication: + module: "noop" + +byok_rag: + - rag_id: custom-docs-0_1 + rag_type: inline::faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: + db_path: /home//rag-content/vector_db/custom_docs/0.1/faiss_store.db + +rag: + inline: + - custom-docs-0_1 ``` Once we have a database we can use script `query_rag.py` to check some results: diff --git a/src/lightspeed_rag_content/config_templates.py b/src/lightspeed_rag_content/config_templates.py new file mode 100644 index 00000000..de986093 --- /dev/null +++ b/src/lightspeed_rag_content/config_templates.py @@ -0,0 +1,196 @@ +# Copyright 2025 Red Hat, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Configuration templates for llama-stack and Lightspeed Core Stack output.""" + +# llama-stack configuration templates + +LLAMA_STACK_TEMPLATE = """version: 2 +image_name: starter + +apis: +- agents +- files +- inference +- safety +- tool_runtime +- vector_io + +server: + port: 8321 + +providers: + inference: + - config: {{}} + provider_id: sentence-transformers + provider_type: inline::sentence-transformers + files: + - config: + metadata_store: + table_name: files_metadata + backend: sql_default + storage_dir: /tmp/files + provider_id: meta-reference-files + provider_type: inline::localfs + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + tool_runtime: + - config: {{}} + provider_id: rag-runtime + provider_type: inline::rag-runtime + vector_io: + - config: + {vector_io_cfg} + provider_id: {index_id} + provider_type: {provider_type_prefix}::{provider_type} +storage: + backends: + kv_rag: + type: kv_sqlite + db_path: {kv_db_path} + kv_default: + type: kv_sqlite + db_path: /tmp/kv_store.db + sql_default: + type: sql_sqlite + db_path: /tmp/sql_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - metadata: + embedding_dimension: {dimension} + model_id: {model_name} + provider_id: sentence-transformers + provider_model_id: {model_name_or_dir} + model_type: embedding + vector_stores: [] + shields: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::rag + provider_id: rag-runtime +""" + +LLAMA_STACK_VECTOR_STORES_TEMPLATE = """vector_stores: + - embedding_dimension: {dimension} + embedding_model: sentence-transformers/{model_name_or_dir} + provider_id: {vector_io_provider_id} + vector_store_id: {vector_store_id}""" + +LLAMA_STACK_VECTOR_IO_CONFIG_SQLITE = """persistence: + namespace: vector_io::{provider_type} + backend: kv_rag""" + +LLAMA_STACK_VECTOR_IO_CONFIG_PGVECTOR = """persistence: + namespace: vector_io::{provider_type} + backend: kv_default + host: ${{env.POSTGRES_HOST}} + port: ${{env.POSTGRES_PORT}} + db: ${{env.POSTGRES_DATABASE}} + user: ${{env.POSTGRES_USER}} + password: ${{env.POSTGRES_PASSWORD}}""" + +LLAMA_STACK_CFG_FILENAME = "llama-stack.yaml" + +# Lightspeed Core Stack configuration templates + +LCS_CFG_FILENAME = "lightspeed-stack.yaml" + +LCS_BASE_TEMPLATE = """\ +name: Lightspeed Core Stack (LCS) +service: + host: 0.0.0.0 + port: 8080 + base_url: http://localhost:8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + use_as_library_client: false + url: http://localhost:8321 + # api_key: custom-key # Uncomment if your llama-stack requires authentication + # To run llama-stack in-process (no external container needed), set: + # use_as_library_client: true + # library_client_config_path: /path/to/llama-stack.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +conversation_cache: + type: "sqlite" + sqlite: + db_path: "/tmp/data/conversation-cache.db" +authentication: + module: "noop" + +""" + +LCS_FAISS_BYOK_TEMPLATE = """\ +byok_rag: + - rag_id: {index_id} + rag_type: inline::faiss + embedding_model: {model_name} + embedding_dimension: {dimension} + vector_db_id: {vector_store_id} + db_path: {db_path} + +rag: + inline: + - {index_id} + # tool: + # - {index_id} +""" + +LCS_PGVECTOR_BYOK_TEMPLATE = """\ +byok_rag: + - rag_id: {index_id} + rag_type: remote::pgvector + embedding_model: {model_name} + embedding_dimension: {dimension} + vector_db_id: {vector_store_id} + host: ${{env.POSTGRES_HOST}} + port: ${{env.POSTGRES_PORT}} + db: ${{env.POSTGRES_DATABASE}} + user: ${{env.POSTGRES_USER}} + password: ${{env.POSTGRES_PASSWORD}} + +rag: + inline: + - {index_id} + # tool: + # - {index_id} +""" diff --git a/src/lightspeed_rag_content/document_processor.py b/src/lightspeed_rag_content/document_processor.py index 3eb8011a..b0191e92 100644 --- a/src/lightspeed_rag_content/document_processor.py +++ b/src/lightspeed_rag_content/document_processor.py @@ -36,6 +36,7 @@ from llama_index.vector_stores.postgres import PGVectorStore from sentence_transformers import SentenceTransformer +from lightspeed_rag_content import config_templates from lightspeed_rag_content.metadata_processor import MetadataProcessor if TYPE_CHECKING: @@ -301,99 +302,15 @@ def _save_metadata( class _LlamaStackDB(_BaseDB): # Lllama-stack faiss vector-db uses IndexFlatL2 (it's hardcoded for now) - TEMPLATE = """version: 2 -image_name: starter - -apis: -- files -- tool_runtime -- vector_io -- inference - -server: - port: 8321 - -providers: - inference: - - config: {{}} - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - files: - - config: - metadata_store: - table_name: files_metadata - backend: sql_default - storage_dir: /tmp/files - provider_id: meta-reference-files - provider_type: inline::localfs - tool_runtime: - - config: {{}} - provider_id: rag-runtime - provider_type: inline::rag-runtime - vector_io: - - config: - {vector_io_cfg} - provider_id: {index_id} - provider_type: {provider_type_prefix}::{provider_type} -storage: - backends: - kv_rag: - type: kv_sqlite - db_path: {kv_db_path} - kv_default: - type: kv_sqlite - db_path: /tmp/kv_store.db - sql_default: - type: sql_sqlite - db_path: /tmp/sql_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - metadata: - embedding_dimension: {dimension} - model_id: {model_name} - provider_id: sentence-transformers - provider_model_id: {model_name_or_dir} - model_type: embedding - vector_stores: [] - shields: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::rag - provider_id: rag-runtime -""" - # Template for vector_stores section, added after vector store is created - VECTOR_STORES_TEMPLATE = """vector_stores: - - embedding_dimension: {dimension} - embedding_model: sentence-transformers/{model_name_or_dir} - provider_id: {vector_io_provider_id} - vector_store_id: {vector_store_id}""" - - # Template for vector_io/config section - VECTOR_IO_CONFIG_TEMPLATE_FOR_SQLITE = """persistence: - namespace: vector_io::{provider_type} - backend: kv_rag""" - VECTOR_IO_CONFIG_TEMPLATE_FOR_PGVECTOR = """persistence: - namespace: vector_io::{provider_type} - backend: kv_default - host: ${{env.POSTGRES_HOST}} - port: ${{env.POSTGRES_PORT}} - db: ${{env.POSTGRES_DATABASE}} - user: ${{env.POSTGRES_USER}} - password: ${{env.POSTGRES_PASSWORD}}""" - - CFG_FILENAME = "llama-stack.yaml" + TEMPLATE = config_templates.LLAMA_STACK_TEMPLATE + VECTOR_STORES_TEMPLATE = config_templates.LLAMA_STACK_VECTOR_STORES_TEMPLATE + VECTOR_IO_CONFIG_TEMPLATE_FOR_SQLITE = config_templates.LLAMA_STACK_VECTOR_IO_CONFIG_SQLITE + VECTOR_IO_CONFIG_TEMPLATE_FOR_PGVECTOR = config_templates.LLAMA_STACK_VECTOR_IO_CONFIG_PGVECTOR + CFG_FILENAME = config_templates.LLAMA_STACK_CFG_FILENAME + LCS_CFG_FILENAME = config_templates.LCS_CFG_FILENAME + LCS_BASE_TEMPLATE = config_templates.LCS_BASE_TEMPLATE + LCS_FAISS_BYOK_TEMPLATE = config_templates.LCS_FAISS_BYOK_TEMPLATE + LCS_PGVECTOR_BYOK_TEMPLATE = config_templates.LCS_PGVECTOR_BYOK_TEMPLATE def __init__(self, config: _Config): """Initialize the llama-stack Vector IO database. @@ -783,6 +700,25 @@ async def _upload_and_process_files( # noqa: C901 # pylint: disable=R0912,R091 LOG.info("All files processed successfully") return str(vector_store.id) + def write_lcs_config( + self, index: str, filename: str, vector_store_id: str, db_file: str + ) -> None: + """Write a lightspeed-stack.yaml configuration file.""" + if self.provider_type == "pgvector": + byok_template = self.LCS_PGVECTOR_BYOK_TEMPLATE + else: + byok_template = self.LCS_FAISS_BYOK_TEMPLATE + + data = self.LCS_BASE_TEMPLATE + byok_template.format( + index_id=index, + model_name=self.config.model_name, + dimension=self.config.embedding_dimension, + vector_store_id=vector_store_id, + db_path=db_file, + ) + with open(filename, "w", encoding="utf-8") as fd: + fd.write(data) + def _update_yaml_config(self, cfg_file: str, index: str, vector_store_id: str) -> None: """Update the config file with the created vector_store_id.""" vector_stores_section = self.VECTOR_STORES_TEMPLATE.format( @@ -817,6 +753,8 @@ def save( except Exception as exc: LOG.error("Failed to insert document: %s", exc) raise + lcs_file = os.path.join(output_dir, self.LCS_CFG_FILENAME) + self.write_lcs_config(index, lcs_file, vector_store_id, db_file) return vector_store_id diff --git a/tests/test_document_processor_llama_stack.py b/tests/test_document_processor_llama_stack.py index aa9a6beb..5e82db50 100644 --- a/tests/test_document_processor_llama_stack.py +++ b/tests/test_document_processor_llama_stack.py @@ -27,10 +27,12 @@ image_name: starter apis: +- agents - files +- inference +- safety - tool_runtime - vector_io -- inference server: port: 8321 @@ -48,6 +50,17 @@ storage_dir: /tmp/files provider_id: meta-reference-files provider_type: inline::localfs + agents: + - config: + persistence: + agent_state: + namespace: agents_state + backend: kv_default + responses: + table_name: agents_responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference tool_runtime: - config: {{}} provider_id: rag-runtime @@ -201,6 +214,61 @@ def test_write_yaml_config_faiss(self, mocker, llama_stack_processor): model_name_or_dir=llama_stack_processor["model_name"], ) + def test_write_lcs_config_faiss(self, mocker, llama_stack_processor): + """Test lightspeed-stack.yaml generation for FAISS backend.""" + mock_open = mocker.patch("builtins.open", new_callable=mocker.mock_open) + doc = document_processor._LlamaStackDB(llama_stack_processor["config"]) + + doc.write_lcs_config("my-index", "lcs.yaml", "vs_abc123", "/data/faiss_store.db") + + mock_open.assert_called_once_with("lcs.yaml", "w", encoding="utf-8") + data = mock_open.return_value.write.mock_calls[0].args[0] + assert "name: Lightspeed Core Stack (LCS)" in data + assert "service:" in data + assert "llama_stack:" in data + assert "authentication:" in data + assert "byok_rag:" in data + assert "rag_type: inline::faiss" in data + assert "rag_id: my-index" in data + assert "vector_db_id: vs_abc123" in data + assert "db_path: /data/faiss_store.db" in data + assert "embedding_dimension: 768" in data + assert f"embedding_model: {llama_stack_processor['model_name']}" in data + assert "rag:" in data + assert "inline:" in data + assert "- my-index" in data + + def test_write_lcs_config_pgvector(self, mocker, llama_stack_processor): + """Test lightspeed-stack.yaml generation for pgvector backend.""" + mock_open = mocker.patch("builtins.open", new_callable=mocker.mock_open) + config = llama_stack_processor["config"] + config.vector_store_type = "llamastack-pgvector" + doc = document_processor._LlamaStackDB(config) + + doc.write_lcs_config("pg-index", "lcs.yaml", "vs_pg123", "/data/pg_store.db") + + mock_open.assert_called_once_with("lcs.yaml", "w", encoding="utf-8") + data = mock_open.return_value.write.mock_calls[0].args[0] + assert "name: Lightspeed Core Stack (LCS)" in data + assert "service:" in data + assert "llama_stack:" in data + assert "authentication:" in data + assert "byok_rag:" in data + assert "rag_type: remote::pgvector" in data + assert "rag_id: pg-index" in data + assert "vector_db_id: vs_pg123" in data + byok_section = data[data.index("byok_rag:") :] + assert "db_path" not in byok_section + assert f"embedding_model: {llama_stack_processor['model_name']}" in data + assert "${env.POSTGRES_HOST}" in data + assert "${env.POSTGRES_PORT}" in data + assert "${env.POSTGRES_DATABASE}" in data + assert "${env.POSTGRES_USER}" in data + assert "${env.POSTGRES_PASSWORD}" in data + assert "rag:" in data + assert "inline:" in data + assert "- pg-index" in data + def test_run_llama_stack(self, mocker, llama_stack_processor): """Test running with llama-stack client lifecycle management.""" import asyncio @@ -372,6 +440,7 @@ def _test_save(self, mocker, config): write_cfg = mocker.patch.object(doc, "write_yaml_config") update_yaml = mocker.patch.object(doc, "_update_yaml_config") + write_lcs = mocker.patch.object(doc, "write_lcs_config") # Mock client_class to support async context manager client_instance = mocker.Mock() @@ -427,6 +496,12 @@ def _test_save(self, mocker, config): mock.sentinel.index, "vs_123", ) + write_lcs.assert_called_once_with( + mock.sentinel.index, + "out_dir/lightspeed-stack.yaml", + "vs_123", + "/cwd/out_dir/faiss_store.db", + ) # Verify client lifecycle (async context manager) client_class_mock.return_value.__aenter__.assert_awaited_once() client_class_mock.return_value.__aexit__.assert_awaited_once()