Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,14 @@ mcp:
chat:
welcome_message: "Welcome to Invopop Expert! Ask questions about GOBL, Invopop and the invopop/gobl library (type 'exit' to quit)"
input_prompt: "Enter your multi-line question. Press Enter on an empty line to send."
max_history: 50
max_history: 50

vector_store:
id: "" #vector store id
max_results: 10
score_threshold: 0.1
default_filters:
country: "es" # Spain
subject: "verifactu" # VeriFactu-specific docs

checkpointer: "memory"
13 changes: 13 additions & 0 deletions src/expert/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from opik.integrations.langchain import OpikTracer

from .config import Config
from .official_docs_tool import OfficialDocsTool

AVAILABLE_REPOS = [
"invopop/gobl",
Expand All @@ -31,6 +32,10 @@ def __init__(self, config: Config):
self._load_prompts()
self.opik_config = None
self.mcp_client = None
self.official_docs_tool = OfficialDocsTool(
vector_store_id=self.config.vector_store_config["id"],
max_results=self.config.vector_store_config.get("max_results", 10),
)

def _load_prompts(self):
"""Load prompt templates from files."""
Expand Down Expand Up @@ -88,6 +93,9 @@ async def setup(self):
)
renamed_tools.append(renamed_tool)

# Add the official docs tool
renamed_tools.append(self.official_docs_tool.get_tool())

# Create the agent
llm_config = self.config.llm_config
provider = llm_config.get("provider", "openai")
Expand Down Expand Up @@ -170,6 +178,11 @@ async def get_response_with_context(self, messages: list[dict], thread_id: str)
f"🔍 Searching {repo_name} repo:",
tool_call["function"]["arguments"],
)
elif func_name == "search_official":
print(
"🔍 Searching official docs:",
tool_call["function"]["arguments"],
)
# Update the final message content
final_message_content = message.content

Expand Down
5 changes: 5 additions & 0 deletions src/expert/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ def chat_config(self) -> dict[str, Any]:
"""Get chat configuration."""
return self.config.get("chat", {})

@property
def vector_store_config(self) -> dict[str, Any]:
"""Get vector store configuration."""
return self.config.get("vector_store", {})

@property
def checkpointer(self) -> BaseCheckpointSaver | None:
"""Get checkpointer configuration."""
Expand Down
117 changes: 117 additions & 0 deletions src/expert/official_docs_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""RAG search tool using OpenAI Vector Store."""

from pathlib import Path

from langchain_core.tools import StructuredTool
from openai import OpenAI
from pydantic import BaseModel, Field


class OfficialDocsInput(BaseModel):
"""Input schema for RAG search tool."""

query: str = Field(description="The search query to find relevant documentation")
country: str | None = Field(
default="es", description="Country filter for the search (for now only 'es' for Spain)"
)
subject: str | None = Field(
default="verifactu",
description="""Subject filter for the search
('regulation' for general regulations or 'verifactu' for VeriFactu-specific docs)""",
)


class OfficialDocsTool:
"""RAG search tool using OpenAI Vector Store."""

def __init__(self, vector_store_id: str, max_results: int = 10):
"""Initialize the RAG search tool."""
self.client = OpenAI()
self.vector_store_id = vector_store_id
self.max_results = max_results

prompts_dir = Path(__file__).parent / "prompts"

with open(prompts_dir / "official_docs_description.md") as f:
self.official_docs_description = f.read().strip()

def search(self, query: str, country: str = "es", subject: str = "verifactu") -> str:
"""
Search the vector store for relevant documents with filters.
Args:
query: The search query
country: Country filter (default: "es")
subject: Subject filter (default: "verifactu")
Returns:
Formatted string with search results
"""
try:
# Build filters based on parameters
filters = {"type": "and", "filters": []}

# Add country filter
if country:
filters["filters"].append({"type": "eq", "key": "country", "value": country})

# Add subject filter
if subject:
filters["filters"].append({"type": "eq", "key": "subject", "value": subject})

# Use OpenAI client with built-in filter support
results = self.client.vector_stores.search(
vector_store_id=self.vector_store_id,
query=query,
filters=filters if filters["filters"] else None,
)

# Convert to dict for easier handling
results_dict = results.model_dump()

if not results_dict.get("data"):
return f"""No relevant documents found for your query with filters
(country: {country}, subject: {subject})."""

# Format the results
formatted_results = []
for i, result in enumerate(results_dict["data"][: self.max_results], 1):
content_texts = []
if result.get("content"):
for content in result["content"]:
if content.get("text"):
content_texts.append(content["text"])

# Get attributes for context
attributes = result.get("attributes", {})
attr_info = []
if attributes.get("subject"):
attr_info.append(f"Subject: {attributes['subject']}")
if attributes.get("type"):
attr_info.append(f"Type: {attributes['type']}")
if attributes.get("country"):
attr_info.append(f"Country: {attributes['country']}")
if attributes.get("format"):
attr_info.append(f"Format: {attributes['format']}")
if attributes.get("url"):
attr_info.append(f"Source: {attributes['url']}")

attr_str = f" ({', '.join(attr_info)})" if attr_info else ""

result_text = "\n".join(content_texts) if content_texts else "No content available"
formatted_results.append(f"## Result {i}{attr_str}\n\n{result_text}")

# Add filter info to the response
filter_info = f"**Filtered by:** Country='{country}', Subject='{subject}'\n\n"

return filter_info + "\n\n---\n\n".join(formatted_results)

except Exception as e:
return f"Error performing search: {str(e)}"

def get_tool(self) -> StructuredTool:
"""Get the LangChain tool for this RAG search."""
return StructuredTool.from_function(
func=self.search,
name="search_official",
description=self.official_docs_description,
args_schema=OfficialDocsInput,
)
31 changes: 31 additions & 0 deletions src/expert/prompts/official_docs_description.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Search through official VeriFactu documentation using semantic search with country and subject filters.

This tool provides modular access to official Spanish government documentation for VeriFactu:

**Filters Available:**
- Country: Currently supports 'es' (Spain)
- Subject: 'verifactu' for VeriFactu-specific docs or 'regulation' for general regulations

**Documentation Content:**
- Official VeriFactu regulations and legal requirements
- VeriFactu XML schema specifications and field definitions
- Validation rules and business requirements
- Error codes and troubleshooting guides
- Technical implementation requirements
- Spanish tax compliance procedures

**Use this tool to find relevant information about:**
- VeriFactu XML format and structure
- Required vs optional fields and elements
- Validation rules and constraints
- Error codes and their meanings
- Spanish tax compliance requirements
- Official government regulations and updates
- Troubleshooting common VeriFactu issues

**Parameters:**
- query: Your search question
- country: Filter by country (default: 'es' for Spain)
- subject: Filter by subject ('verifactu' for VeriFactu docs, 'regulation' for general regulations)

The search returns filtered, relevant excerpts from official documentation to help answer compliance questions.
Loading