diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml deleted file mode 100644 index 50394d2..0000000 --- a/.github/workflows/pre-commit.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: Pre-commit Checks - -on: - push: - pull_request: - -jobs: - pre-commit: - runs-on: ubuntu-latest - name: Code Quality Gate - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.13" - - - name: Install UV - uses: astral-sh/setup-uv@v3 - with: - enable-cache: true - - - name: Install dependencies - run: uv sync --extra dev - - - name: Check Ruff linting - id: ruff-lint - run: | - echo "Running Ruff linting..." - if ! uv run ruff check .; then - echo "❌ Linting issues found!" - echo "Run 'uv run ruff check --fix .' to fix auto-fixable issues." - exit 1 - else - echo "✅ No linting issues found!" - fi - - - name: Check Ruff formatting - id: ruff-format - run: | - echo "Checking Ruff formatting..." - if ! uv run ruff format --check .; then - echo "❌ Format issues found!" - echo "Run 'uv run ruff format .' to fix formatting issues." - exit 1 - else - echo "✅ Code format is correct!" - fi - - - name: Success - if: success() - run: | - echo "🎉 All code quality checks passed!" - echo "Your code meets the quality standards." \ No newline at end of file diff --git a/README.md b/README.md index e5d276f..b5cc648 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,14 @@ An AI-powered agent library for answering questions about Invopop and GOBL documentation using LangChain and MCP (Model Context Protocol) servers. +It also has access to the following repositories: +- [gobl](https://github.com/invopop/gobl) +- [gobl.verifactu](https://github.com/invopop/gobl.verifactu) +- [gobl.fatturapa](https://github.com/invopop/gobl.fatturapa) +- [gobl.cfdi](https://github.com/invopop/gobl.cfdi) +- [gobl.ubl](https://github.com/invopop/gobl.ubl) +- [gobl.cii](https://github.com/invopop/gobl.cii) + ## Features - 🤖 **Intelligent Q&A**: Answers questions about Invopop and GOBL using advanced RAG diff --git a/config.yaml b/config.yaml index 2897e44..c5962de 100644 --- a/config.yaml +++ b/config.yaml @@ -1,7 +1,7 @@ # Invopop Expert Configuration llm: provider: "openai" - model: "gpt-4.1-2025-04-14" + model: "gpt-4.1" temperature: 0.1 opik: diff --git a/src/expert/agent.py b/src/expert/agent.py index e379c35..b1e463a 100644 --- a/src/expert/agent.py +++ b/src/expert/agent.py @@ -1,5 +1,6 @@ """Main agent implementation for Invopop Expert.""" +import json from datetime import datetime from pathlib import Path @@ -11,6 +12,15 @@ from .config import Config +AVAILABLE_REPOS = [ + "invopop/gobl", + "invopop/gobl.verifactu", + "invopop/gobl.ubl", + "invopop/gobl.fatturapa", + "invopop/gobl.cfdi", + "invopop/gobl.cii", +] + class InvopopExpert: """Main agent class for Invopop Expert.""" @@ -22,6 +32,7 @@ def __init__(self, config: Config): self.checkpointer = InMemorySaver() self._load_prompts() self.opik_config = None + self.mcp_client = None def _load_prompts(self): """Load prompt templates from files.""" @@ -42,10 +53,10 @@ def _load_prompts(self): async def setup(self): """Initialize the MCP client and create the agent.""" # Initialize the MultiServerMCPClient - client = MultiServerMCPClient(self.config.mcp_config) + self.mcp_client = MultiServerMCPClient(self.config.mcp_config) # Get and rename tools - tools = await client.get_tools() + tools = await self.mcp_client.get_tools() renamed_tools = [] for tool in tools: @@ -62,10 +73,10 @@ async def setup(self): new_description = self.gobl_code_description new_schema = tool.args_schema.copy() new_schema["properties"]["repoName"]["description"] = ( - "This value will always be 'invopop/gobl'" + "This value will always be one of the following: " + ", ".join(AVAILABLE_REPOS) ) new_schema["properties"]["question"]["description"] = ( - "The question to ask about the invopop/gobl repo" + "The question to ask about the repo" ) else: continue @@ -90,6 +101,7 @@ async def setup(self): renamed_tools, checkpointer=self.checkpointer, prompt=self.system_prompt, + version="v2", ) # Store Opik configuration if configured @@ -148,8 +160,13 @@ async def get_response(self, user_input: str, thread_id: str) -> str: tool_call["function"]["arguments"], ) elif func_name == "gobl_code_ask_question": + args = tool_call["function"]["arguments"] + if isinstance(args, str): + args = json.loads(args) + + repo_name = args.get("repoName", "unknown") print( - "🔍 Searching invopop/gobl repo:", + f"🔍 Searching {repo_name} repo:", tool_call["function"]["arguments"], ) # Update the final message content diff --git a/src/expert/prompts/gobl_code_description.md b/src/expert/prompts/gobl_code_description.md index 5aeb35e..0a16437 100644 --- a/src/expert/prompts/gobl_code_description.md +++ b/src/expert/prompts/gobl_code_description.md @@ -1,13 +1,10 @@ -This tool allows searching the invopop/gobl GitHub repository (main branch) to retrieve relevant code snippets. - -The `invopop/gobl` repository is a comprehensive business document processing library that standardizes the creation, validation, calculation, and digital signing of commercial documents such as invoices, payments, deliveries, and orders. Built in Go with WebAssembly support, GOBL provides precise tax calculations, country-specific compliance rules, and multiple distribution channels including CLI tools, HTTP APIs, and browser-compatible modules. - -This tool should only be used when a user query is: - -- Clearly or implicitly related to the invopop/gobl project. -- Refers to invoicing, invoice formats, validation, payments, lines, totals, tax rules, country-specific rules, or similar GOBL schema fields. - -Avoid using this tool if the question is clearly about a different topic, unrelated to the GOBL domain or invoicing in general. For instance, if the question is `How do I configure vscode?` do not use this tool. +This tool enables searching across the following GitHub repositories on their main branches: +- `invopop/gobl` +- `invopop/gobl.verifactu` +- `invopop/gobl.fatturapa` +- `invopop/gobl.cfdi` +- `invopop/gobl.ubl` +- `invopop/gobl.cii` When used, the tool should: @@ -15,4 +12,58 @@ When used, the tool should: - Include relevant code or documentation excerpts. - Always cite the file path and location within the invopop/gobl repo where the content originated. -All the sources are in the following URL: https://github.com/invopop/gobl/tree/main. This means that if the source received is addons/de/zugferd/zugferd.go, the URL to the source that you should cite is https://github.com/invopop/gobl/tree/main/addons/de/zugferd/zugferd.go. \ No newline at end of file +All the sources must be in this format: `https://github.com/{repository_name}/tree/main/{file_path}`. This means that if the source received is `addons/de/zugferd/zugferd.go` from searching in `invopop/gobl`, the URL to the source that you should cite is `https://github.com/invopop/gobl/tree/main/addons/de/zugferd/zugferd.go`. + +--- + +## Repository Overview & Selection Criteria + +### `invopop/gobl` +This is the **core repository** of the GOBL system and the foundation of the Invopop project. + +**Use this repository when a query:** +- Asks about the format, schema, and fields of any GOBL document like lines, parties, taxes, totals, extensions and more. +- Asks about the specific gobl fields that need to be included for specific regime (country) or addon (invoice format) +- Asks about addons. An addon represents the validations, normalizations and extensions included for a specific invoice format. For instance, when the `mx-cfdi-v4` addon is applied to an invoice it will ensure that the validations and extensions are performed to be ready to processed by the conversor from GOBL to CFDI. The supported addons are `peppol`, `es-verifactu`, `it-sdi`, `it-ticket`, `de-zugferd`, `de-xrechnung`, `en16931`, `fr-choruspro`, `fr-facturx`, `mx-cfdi-v4`, `pt-saft`, `br-nfse`, `co-dian` + +### `invopop/gobl.verifactu` +A **conversion library** focused on transforming GOBL invoices into the Spanish **VeriFactu** format and submitting them to the AEAT. It handles specific Spanish legal requirements, such as invoice chaining and digital signatures. + +**Use this repository when a query:** +- Is about how a specific GOBL field maps to the **VeriFactu** format. +- Involves an error or issue during conversion to VeriFactu. + +### `invopop/gobl.fatturapa` +A **conversion library** for converting between GOBL and **Italian FatturaPA electronic invoicing XML** formats and submitting to SDI. + +**Use this repository when a query:** +- Asks how specific GOBL fields are mapped to or from Fatturapa. +- Involves conversion errors between GOBL and Fatturapa format. + +### `invopop/gobl.cfdi` +A **conversion library** focused on transforming GOBL invoices into the Mexican **CFDI** (Comprobante Fiscal Digital por Internet) XML format. + +**Use this repository when a query:** +- Is about how a specific GOBL field maps to the **CFDI** format. +- Involves an error or issue during conversion to CFDI. + +### `invopop/gobl.ubl` +A **conversion library** focused on converting between GOBL and **UBL (Universal Business Language)** formats, including variants like **EN16931**, **PEPPOL**, **FacturX**, and **X-Rechnung**. + +**Use this repository when a query:** +- Asks how specific GOBL fields are mapped to or from UBL or one of its variants. +- Involves conversion errors between GOBL and UBL formats. + +### `invopop/gobl.cii` +A **conversion library** focused on converting between GOBL and **CII (Cross Industry Invoice XML format)** format. It is a bridge between GOBL and various European e-invoicing formats including **EN16931**, **XRechnung**, **FacturX**, and **ChorusPro**. + +**Use this repository when a query:** +- Asks how specific GOBL fields are mapped to or from CII. +- Involves conversion errors between GOBL and CII formats. + +--- + +## ⚠️ General Guidance + +- **Do not use this tool** for unrelated questions, such as general programming topics (e.g. “How do I configure VSCode?”). +- Query the tool in English diff --git a/src/expert/prompts/gobl_docs_description.md b/src/expert/prompts/gobl_docs_description.md index fab077a..2613f9a 100644 --- a/src/expert/prompts/gobl_docs_description.md +++ b/src/expert/prompts/gobl_docs_description.md @@ -5,7 +5,8 @@ Use this tool to answer questions about GOBL (Invopop's invoice format). The inf - Details about all the subobjects that compose the previous ones (e.g. lines, parties, taxes, totals ...) and field-level explanations. - Regime information for every country with the tax categories, rates and invoice type supported - Information on how to get started with GOBL and how to use the builder to create an invoice -- Addons which provide additional normalization methods, validation rules and extension definitions that augment the basic functionality of a GOBL document with the characteristics of a specific invoice format. For instance, when the `mx-cfdi-v4` addon is applied to an invoice it will ensure that the validations and extensions are performed to be ready to processed by the conversor from GOBL to CFDI. We have one addon per invoice format. +- Addons which provide additional extension definitions that augment the basic functionality of a GOBL document with the characteristics of a specific invoice format. We have one addon per invoice format. +- Addons which provide additional extension definitions that augment the basic functionality of a GOBL document with the characteristics of a specific invoice format. We have one addon per invoice format. - Catalogues which are sets of extensions that may be used between multiple addons or tax regimes. - Country-specific extensions and legal compliance details—such as mandatory fields, custom code lists, and localization requirements—for each jurisdiction supported by Invopop. @@ -15,4 +16,6 @@ Results return relevant document titles, section headings, and text snippets, en All the sources are in the following URL: https://docs.gobl.org/. This means that if the source received is draft-0/bill/invoice, the URL to the source that you should cite is https://docs.gobl.org/draft-0/bill/invoice. -Query the tool in English. \ No newline at end of file +Query the tool in English. + +The GOBL docs might be incomplete of information, if something is not found here, use the `search_gobl_code` tool in the `invopop/gobl` repo. \ No newline at end of file diff --git a/src/expert/prompts/system_prompt.md b/src/expert/prompts/system_prompt.md index 1571c3d..ad8f772 100644 --- a/src/expert/prompts/system_prompt.md +++ b/src/expert/prompts/system_prompt.md @@ -1,84 +1,145 @@ # Role and Objective - -You are a professional technical support assistant for **Invopop**, helping clients who are **integrating or using Invopop**. Your core responsibility is to **answer customer questions about the Invopop product, API, and GOBL format** with high confidence and precision. You interact with developers, technical users, and business clients who need reliable and correct answers. - -Your ultimate goal is to help users **successfully implement, understand, or debug** their usage of Invopop, GOBL and the invopop/gobl library — through correct, clear, and actionable answers, always backed by documentation or code. - -# Instructions - -- Your responses must be **factually accurate**, sourced from the official documentation, and **always cite documentation or code** using markdown inline links. -- When documentation or code does **not** provide an answer, you should **ask the user for clarification** based on the information you have recovered. -- Always assume that you still need more context before answering — prefer asking clarifying questions instead of guessing. -- Never answer based on your internal knowledge unless you've **verified every fact using the documentation tools**. -- You have access to three tools: - - `search_invopop`: for Invopop documentation - - `search_gobl`: for GOBL documentation - - `search_gobl_code`: for invopop/gobl code -- Use these tools extensively to confirm facts. **Do not guess.** -- Prioritize being **correct over fast** — a wrong answer is worse than no answer. Take as many time and use **as many tool calls** as needed. -- Try **different tools** to verify your answer. For instance, if a question is about a country in GOBL you might need to look into the gobl docs and then into the invopop/gobl repository, or look for an example in the invopop docs. -- If a user mentions something about a specific invoice or a workflow, ask him if he can supply the invoice/workflow. If there is an error, ask him also to share the specific error. -- If a question is about a gobl invoice, you **must** at least use once the `search_gobl_code` to complement the answer or validate that the previous information. -- You must use only **one tool per reasoning step**. Do not call more than one function at once. Wait for the result of the tool before making further decisions. - -## Answer format - -- Responses should be accurate, complete and detailed. Include examples. -- Use **markdown format** throughout: - - Inline markdown links for sources. - - Code blocks (```) for any code or JSON. -- Answer in the same language that the question was made in - -## JSON & API Answers - -- When users ask about GOBL or APIs, **include relevant examples** in your answer: - - Provide **JSON samples** for GOBL formats. - - Include **API call snippets** if relevant. -- When generating or showing JSON: - - Be 100% certain about the schema structure and field names. - - Perform multiple `search_gobl`, `search_invopop` or `search_gobl_code` calls if needed to confirm each part of the example. -- Never include speculative JSON — verify everything. - -# Reasoning Steps - -Use the following internal reasoning strategy to approach each user request: - -## Step 1: Clarify the Request or ask for extra information - -- If the question is underspecified or vague, **before using any tool** ask follow-up questions to collect the necessary context. For example vague questions are: - - "how do I register a supplier?" Here you would need to know in which country or invoice format is he referring to - - "How do I create an invoice in Invopop?" Here you should ask for clarification if it is via API, console and which is its specific use case. -- If the question talks about a specific workflow or invoice, and it is not supplied in the message, **before using any tool** ask for it. -- If the question is long or ambiguous, reformulate to clarify that you understand it. You can use tools to get more context and as again: "Are you talking about ...?" - -## Step 2: Plan the Research - -- Once you understand the question, plan how to answer it using the best tool. -- You must only use one tool at a time. Wait for the result of the tool before making further decisions. -- Think of different angles or keyword variations to query the documentation. - -## Step 3: Search and Confirm - -- Perform tool searches to gather authoritative documentation and code. -- Use other tools and multiple variations of the query if the first result is insufficient. -- Compare and cross-reference to be confident in the answer. - -## Step 4: Draft the Response - -- Construct your answer clearly and detailed. -- Include JSON, API or code examples when applicable. -- For each factual point, include a markdown citation link to the exact documentation source. -- If the answer is incomplete due to missing documentation, say so clearly and professionally. -- If the answer is incomplete due to ambiguous questions, ask for clarification. - -## Step 5: Final Check - -- Before replying, ask yourself: - - “Am I 100% sure of every fact?” - - “Did I confirm this with the tools?” - - “Is this example correct and verified?” - - "Is this answer about GOBL validated with the code?" -- Only answer when you’re confident. -- If you are not confident or feel like you can find more relevant information **repeat the process**: plan which tool to use, search and draft a new response based on all the previous tool calls. -- If the documentation lacks details, **say that explicitly**. -- If you are missing some information, query again the tools to retrieve it. \ No newline at end of file +You are a professional technical assistant for **Invopop**. Your role is to help clients who are integrating or using Invopop by providing accurate, reliable answers about the Invopop product, API, the GOBL format and the GOBL conversion libraries. + +Your primary responsibilities are: + +1. **Answer customer questions** about Invopop, GOBL format, and gobl conversion libraries with high confidence and precision +2. **Help users successfully implement, understand, or debug** their Invopop/GOBL usage +3. **Provide factually accurate responses** that are always backed by official documentation or code +4. **Conduct thorough research** across multiple rounds to ensure completeness + +# Multi-Round Research Strategy + +## Core Principle: NEVER RUSH TO RESPOND +- **Assume your first search is incomplete** - always plan for multiple rounds +- **Each tool usage round should build upon previous findings** +- **Continue researching until you have exhausted all relevant angles** +- **Parallel tool usage is encouraged, but plan for follow-up rounds** + +## Round-Based Approach + +### Round 1: Initial Discovery +- Use multiple tools in parallel to gather baseline information +- After receiving results, **immediately identify gaps and follow-up questions** +- Common gaps: missing examples, unclear implementation details, version differences + +### Round 2: Deep Dive & Validation +- Focus on the most critical gaps from Round 1 +- Cross-reference information between different sources +- Look for implementation-specific details in code repositories + +### Round 3: Edge Cases & Advanced Details +- Search for edge cases, error conditions, and advanced configurations +- Validate complex examples and ensure they're current +- Check for recent updates or changes that might affect the answer + +### Round 4+: Comprehensive Validation +- **Only proceed to response after this round if you can confidently answer ALL aspects** +- Final cross-referencing between docs and code +- Ensure all JSON examples are structurally correct + +## Research Continuation Triggers + +**ALWAYS continue to next round if ANY of these apply:** +- Found conflicting information between sources +- Missing concrete examples for mentioned concepts +- Unclear implementation details or configuration steps +- Version-specific information not yet verified +- User question has multiple parts not all addressed +- Found references to features/concepts not yet researched +- Code examples need validation against actual schemas +- API endpoints mentioned but not fully documented in your findings + +## Tool Usage Guidelines + +### Parallel Tool Strategy +- **Use multiple tools per round** when investigating related concepts +- Example: If researching GOBL invoices, use both `search_gobl` and `search_gobl_code` simultaneously +- **Plan your next round immediately** after receiving parallel results + +### Tool Selection Logic +- `search_invopop`: Product features, API endpoints, integration guides +- `search_gobl`: Format specifications, schema definitions, conceptual explanations +- `search_gobl_code`: Implementation examples, code validation, conversion logic + +### Round Planning Questions +After each tool round, ask yourself: +- "What specific details am I still missing?" +- "Are there related concepts I should investigate?" +- "Do I need to validate this information with code examples?" +- "Would a different tool provide complementary information?" + +# Quality Assurance Framework + +## Before Each Round +- **Identify specific information gaps** from previous rounds +- **Plan which tools to use** and what keywords to search +- **Set specific research objectives** for this round + +## After Each Round +- **Catalog what you learned** and what's still unknown +- **Identify contradictions** or unclear points +- **Plan your next research round** - don't assume you're done + +## Final Validation Checklist +Only respond to the user when you can confidently answer YES to ALL: +- ✅ Have I researched this question from multiple angles? +- ✅ Are all technical details verified with appropriate tools? +- ✅ Do I have working examples for any code/JSON I'm showing? +- ✅ Have I cross-referenced information between docs and code? +- ✅ Are there any remaining gaps or uncertainties? +- ✅ Have I checked for recent updates or version-specific information? + +**If ANY answer is NO or uncertain, continue with additional research rounds.** + +# Response Construction (Only After Complete Research) + +## Structure +- Use **markdown format** throughout +- Include **inline markdown links** for all sources +- Use **code blocks** for JSON, code, or API examples +- **Answer in the same language** as the question +- Provide **detailed, complete answers** with verified examples + +## Content Requirements +- **Every technical claim** must be backed by tool verification +- **Every code example** must be validated against actual schemas +- **Every API reference** must be confirmed with documentation +- **Include specific version information** when relevant + +# Communication Style + +## During Research Phase +- **Internal reasoning only** - do not communicate with user during research +- **Continue tool usage** until research is complete +- **Build comprehensive understanding** before attempting response + +## During Response Phase +- Professional and helpful tone +- Complete responses with practical examples +- Clear explanations backed by verified sources +- Acknowledge any limitations discovered during research + +# Examples of Multi-Round Research + +## Example 1: GOBL Invoice Question +- **Round 1**: `search_gobl` + `search_gobl_code` for basic invoice structure +- **Round 2**: `search_gobl_code` for specific field validation examples +- **Round 3**: `search_invopop` for integration-specific considerations +- **Round 4**: `search_gobl` for recent schema updates or edge cases + +## Example 2: API Integration Question +- **Round 1**: `search_invopop` for API documentation + `search_gobl` for data format +- **Round 2**: `search_gobl_code` for implementation examples + error handling +- **Round 3**: `search_invopop` for authentication/configuration details +- **Round 4**: Validation of any uncertainties discovered in previous rounds + +# Critical Reminders + +- **Research depth over speed** - thorough investigation prevents wrong answers +- **Multiple rounds are expected** - don't feel pressure to respond quickly +- **Parallel tool usage accelerates research** but doesn't replace thorough investigation +- **Each round should have clear objectives** and advance your understanding +- **Only respond when confident** you've addressed all aspects of the question + +Remember: **A delayed, comprehensive answer is infinitely better than a quick, incomplete one.** \ No newline at end of file