diff --git a/.env-example b/.env-example index 8945822..d63eadc 100644 --- a/.env-example +++ b/.env-example @@ -1,8 +1,6 @@ # Database Configuration -# REQUIRED: Set these for production (test defaults exist for CI) -# SECURITY: Change these credentials in production! -POSTGRES_USER=your_postgres_user -POSTGRES_PASSWORD=your_secure_postgres_password +POSTGRES_USER=rag_user +POSTGRES_PASSWORD=rag_password POSTGRES_DB=rag_database POSTGRES_HOST=postgres POSTGRES_PORT=5432 @@ -13,10 +11,7 @@ PGVECTOR_PORT=5432 PGVECTOR_DB=vector_db # Neo4j Configuration -# REQUIRED: Set this for production (test default exists for CI) -# Format: username/password -# SECURITY: Change the password in production! -NEO4J_AUTH=neo4j/your_secure_neo4j_password +NEO4J_AUTH=neo4j/neo4j_password NEO4J_HOST=neo4j NEO4J_PORT=7687 NEO4J_HTTP_PORT=7474 @@ -29,36 +24,53 @@ BACKEND_PORT=8000 FRONTEND_PORT=3000 # OpenAI Configuration -# REQUIRED: Get your API key from https://platform.openai.com/api-keys -OPENAI_API_KEY=your_openai_api_key_here +# OPENAI_API_KEY= +# OPENAI_MODEL=gpt-4o +# OPENAI_EMBEDDING_MODEL=text-embedding-3-large OPENAI_MODEL=gpt-4o-mini OPENAI_EMBEDDING_MODEL=text-embedding-3-small + # Document Processing Configuration -# Optimized for >95% RAGAS F1 Score (Updated Oct 13, 2025) CHUNK_SIZE=1200 -CHUNK_OVERLAP=500 # Increased from 400 for better context continuity -TOP_K_RESULTS=20 # Increased from 15 for improved coverage +CHUNK_OVERLAP=400 +TOP_K_RESULTS=5 -# GraphRAG Parallel Processing Configuration -# Controls concurrent chunk processing for faster graph building -GRAPHRAG_CONCURRENCY=25 # Number of concurrent chunk processing tasks (recommended: 10-50) -GRAPHRAG_MAX_RETRIES=3 # Maximum retries for failed chunks (recommended: 2-5) -GRAPHRAG_BASE_BACKOFF=0.5 # Base backoff time in seconds for retries (recommended: 0.5-2.0) +# Storage +STORAGE_PATH=/app/storage -# Multi-Pass Graph Enrichment Configuration -# Enables intelligent multi-pass graph building for richer knowledge graphs -# Pass 1: Initial broad extraction -# Pass 2: Find missing entities referenced but not extracted -# Pass 3: Discover indirect relationships between entities -GRAPHRAG_ENABLE_MULTIPASS=true # Enable multi-pass enrichment (true/false) -GRAPHRAG_NUM_PASSES=3 # Number of passes through document (1-3 recommended) +# GraphRAG Settings +GRAPHRAG_ENABLED=true +GRAPHRAG_LLM_MODEL=gpt-4o +GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-large -# Max Performance Mode -# When enabled, all search tools (vector, graph, filter) run in parallel -# Results are collected and synthesized for comprehensive answers -# When disabled, the agent intelligently selects the best single tool for each query -MAX_PERFORMANCE=false # Enable max performance mode (true/false) +# Entity Resolution +ENTITY_SIMILARITY_THRESHOLD=0.85 +ENABLE_ENTITY_DEDUPLICATION=true -# Storage -STORAGE_PATH=/app/storage +# Elasticsearch Configuration +ELASTICSEARCH_HOST=elasticsearch +ELASTICSEARCH_PORT=9200 +ELASTICSEARCH_INDEX_NAME=rag_documents + +# Search Configuration +ENABLE_VECTOR_SEARCH=true +ENABLE_GRAPH_SEARCH=true +ENABLE_FILTER_SEARCH=true +DEFAULT_SEARCH_TOOLS=auto + +# Performance Settings +MAX_PERFORMANCE=false +# MAX_PERFORMANCE=true + +# Memory Management Settings +MEMORY_ENABLED=true +MEMORY_DB_HOST=postgres +MEMORY_DB_PORT=5432 +MEMORY_DB_NAME=rag_database +MEMORY_DB_USER=rag_user +MEMORY_DB_PASSWORD=rag_password +MEMORY_APPROACH=external_llm +MEMORY_MODEL=gpt-4o-mini +REDIS_HOST=redis +REDIS_PORT=6379 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..4e4b6e0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "memory"] + path = memory + url = https://github.com/dev-pratap-singh/memory diff --git a/memory b/memory index c6076ae..1acdf42 160000 --- a/memory +++ b/memory @@ -1 +1 @@ -Subproject commit c6076aeff028a5d738533f7343cf51264aaa03c6 +Subproject commit 1acdf4280ce500cdaf4e587a8c9078c1316f946d