From 1a04f2bbed7dd1bfd35fea1b65bc8d6f1f868707 Mon Sep 17 00:00:00 2001 From: Tavily PR Agent Date: Mon, 20 Apr 2026 19:53:21 +0000 Subject: [PATCH] feat: add Tavily as configurable search provider alongside SearXNG --- .env | 6 ++- docker-compose.yml | 3 ++ model_config.py | 4 ++ requirements.txt | 1 + utils/websearch_utils.py | 82 ++++++++++++++++++++++++++++++---------- 5 files changed, 74 insertions(+), 22 deletions(-) diff --git a/.env b/.env index 3274d32..0a05cd9 100644 --- a/.env +++ b/.env @@ -5,4 +5,8 @@ PORT_NUM_SEARXNG=8085 HOST_SEARXNG=0.0.0.0 # Example API key environment variable (optional). Set in your .env, NOT in model_config.py GOOGLE_API_KEY=REPLACE_YOUR_API_KEY -ADMIN_TOKEN=123456 \ No newline at end of file +ADMIN_TOKEN=123456 +# Search provider: 'searxng' (default) or 'tavily' +# SEARCH_PROVIDER=searxng +# Required when SEARCH_PROVIDER=tavily +# TAVILY_API_KEY=tvly-YOUR_API_KEY \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index e1a82cc..721fedf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,9 @@ services: # Ensure app binds to all interfaces inside the container (host access via localhost:8000) - HOST_APP=0.0.0.0 - PORT_NUM_APP=8000 + # Search provider selection: 'searxng' (default) or 'tavily' + - SEARCH_PROVIDER=${SEARCH_PROVIDER:-searxng} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} depends_on: - searxng diff --git a/model_config.py b/model_config.py index 3417303..2f9c1c8 100644 --- a/model_config.py +++ b/model_config.py @@ -35,6 +35,10 @@ HOST_APP = os.environ.get('HOST_APP', DEFAULT_HOST_APP) HOST_SEARXNG = os.environ.get('HOST_SEARXNG', DEFAULT_HOST_SEARXNG) +# Search provider: 'searxng' (default) or 'tavily' +SEARCH_PROVIDER = os.environ.get('SEARCH_PROVIDER', 'searxng') +TAVILY_API_KEY = os.environ.get('TAVILY_API_KEY', '') + ############### # API keys: prefer provider-specific vars, fall back to the old GOOGLE_API_KEY for compatibility diff --git a/requirements.txt b/requirements.txt index f41263c..16b0e46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,6 +38,7 @@ langchain-groq youtube_transcript_api>=1.1.0 slowapi charset-normalizer +tavily-python kokoro-onnx soundfile langchain-chroma \ No newline at end of file diff --git a/utils/websearch_utils.py b/utils/websearch_utils.py index cc1157a..2976a18 100644 --- a/utils/websearch_utils.py +++ b/utils/websearch_utils.py @@ -63,51 +63,70 @@ class SearchWeb: """ - A class for performing web searches using Searx and scraping content from web pages. + A class for performing web searches using Searx or Tavily and scraping content from web pages. Attributes: port (int): The port number for the Searx search wrapper. searcher (SearxSearchWrapper): An instance of SearxSearchWrapper for querying search engines. + search_provider (str): The active search provider ('searxng' or 'tavily'). """ def __init__(self, port, host="localhost", type='http'): """ Initializes the SearchWeb class with the given Searx server port. + When SEARCH_PROVIDER is set to 'tavily', a TavilyClient is used instead. Args: port (int): The port number for Searx search service. host (str): The host address for Searx search service. type (str): The protocol type ('http' or 'https'). Defaults to 'http'. """ - # Build explicit base URL and pass to SearxSearchWrapper. Add debug logging - # so we can trace which host/port the application is actually using. - base_url = f"{type}://{host}:{port}" - self.searcher = SearxSearchWrapper(searx_host=base_url) - self.base_url = base_url - try: - logger.info(f"SearchWeb initialized with searx base_url={self.base_url}") - # Some wrappers may expose their configured host attribute; ensure it's set for visibility - if not hasattr(self.searcher, 'searx_host'): - try: - setattr(self.searcher, 'searx_host', self.base_url) - except Exception: - pass - except Exception: - # logging should not crash initialization - pass + from model_config import SEARCH_PROVIDER, TAVILY_API_KEY + + self.search_provider = SEARCH_PROVIDER.lower() + + if self.search_provider == 'tavily': + from tavily import TavilyClient + self.tavily_client = TavilyClient(api_key=TAVILY_API_KEY) + self.searcher = None + self.base_url = None + logger.info("SearchWeb initialized with Tavily provider") + else: + # Build explicit base URL and pass to SearxSearchWrapper. Add debug logging + # so we can trace which host/port the application is actually using. + base_url = f"{type}://{host}:{port}" + self.searcher = SearxSearchWrapper(searx_host=base_url) + self.base_url = base_url + self.tavily_client = None + try: + logger.info(f"SearchWeb initialized with searx base_url={self.base_url}") + # Some wrappers may expose their configured host attribute; ensure it's set for visibility + if not hasattr(self.searcher, 'searx_host'): + try: + setattr(self.searcher, 'searx_host', self.base_url) + except Exception: + pass + except Exception: + # logging should not crash initialization + pass def query_search(self, query, engines=['google','brave','duckduckgo','startpage','yahoo'], num_results=3): """ - Performs a search using the Searx engine and retrieves search results. + Performs a search using the configured provider and retrieves search results. Args: query (str): The search query. - engines (list, optional): The search engines to use. Defaults to ['google']. - num_results (int, optional): The number of search results to retrieve. Defaults to 5. + engines (list, optional): The search engines to use (SearXNG only). Defaults to ['google']. + num_results (int, optional): The number of search results to retrieve. Defaults to 3. Returns: - list: The search results from Searx. + list: The search results. """ + if self.search_provider == 'tavily': + return self._tavily_search(query, num_results=num_results) + return self._searxng_search(query, engines=engines, num_results=num_results) + + def _searxng_search(self, query, engines, num_results): try: search_results = self.searcher.results( query, @@ -120,6 +139,27 @@ def query_search(self, query, engines=['google','brave','duckduckgo','startpage' logger.error(f"Error during search for query '{query}': {e}") return [] + def _tavily_search(self, query, num_results): + try: + response = self.tavily_client.search( + query=query, + max_results=num_results, + search_depth="basic", + ) + # Normalize Tavily results to match SearXNG result format (dict with 'link', 'title', 'snippet') + results = [] + for item in response.get("results", []): + results.append({ + "link": item.get("url", ""), + "title": item.get("title", ""), + "snippet": item.get("content", ""), + }) + logger.info(f"Tavily search results for query '{query}': {results}") + return results + except Exception as e: + logger.error(f"Error during Tavily search for query '{query}': {e}") + return [] + def scrape_text(self, url): """ Scrapes the plain text content from the specified URL, removing HTML tags and unwanted elements.