Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ PORT_NUM_SEARXNG=8085
HOST_SEARXNG=0.0.0.0
# Example API key environment variable (optional). Set in your .env, NOT in model_config.py
GOOGLE_API_KEY=REPLACE_YOUR_API_KEY
ADMIN_TOKEN=123456
ADMIN_TOKEN=123456
# Search provider: 'searxng' (default) or 'tavily'
# SEARCH_PROVIDER=searxng
# Required when SEARCH_PROVIDER=tavily
# TAVILY_API_KEY=tvly-YOUR_API_KEY
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ services:
# Ensure app binds to all interfaces inside the container (host access via localhost:8000)
- HOST_APP=0.0.0.0
- PORT_NUM_APP=8000
# Search provider selection: 'searxng' (default) or 'tavily'
- SEARCH_PROVIDER=${SEARCH_PROVIDER:-searxng}
- TAVILY_API_KEY=${TAVILY_API_KEY:-}
depends_on:
- searxng

Expand Down
4 changes: 4 additions & 0 deletions model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
HOST_APP = os.environ.get('HOST_APP', DEFAULT_HOST_APP)
HOST_SEARXNG = os.environ.get('HOST_SEARXNG', DEFAULT_HOST_SEARXNG)

# Search provider: 'searxng' (default) or 'tavily'
SEARCH_PROVIDER = os.environ.get('SEARCH_PROVIDER', 'searxng')
TAVILY_API_KEY = os.environ.get('TAVILY_API_KEY', '')

###############

# API keys: prefer provider-specific vars, fall back to the old GOOGLE_API_KEY for compatibility
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ langchain-groq
youtube_transcript_api>=1.1.0
slowapi
charset-normalizer
tavily-python
kokoro-onnx
soundfile
langchain-chroma
82 changes: 61 additions & 21 deletions utils/websearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,51 +63,70 @@

class SearchWeb:
"""
A class for performing web searches using Searx and scraping content from web pages.
A class for performing web searches using Searx or Tavily and scraping content from web pages.

Attributes:
port (int): The port number for the Searx search wrapper.
searcher (SearxSearchWrapper): An instance of SearxSearchWrapper for querying search engines.
search_provider (str): The active search provider ('searxng' or 'tavily').
"""

def __init__(self, port, host="localhost", type='http'):
"""
Initializes the SearchWeb class with the given Searx server port.
When SEARCH_PROVIDER is set to 'tavily', a TavilyClient is used instead.

Args:
port (int): The port number for Searx search service.
host (str): The host address for Searx search service.
type (str): The protocol type ('http' or 'https'). Defaults to 'http'.
"""
# Build explicit base URL and pass to SearxSearchWrapper. Add debug logging
# so we can trace which host/port the application is actually using.
base_url = f"{type}://{host}:{port}"
self.searcher = SearxSearchWrapper(searx_host=base_url)
self.base_url = base_url
try:
logger.info(f"SearchWeb initialized with searx base_url={self.base_url}")
# Some wrappers may expose their configured host attribute; ensure it's set for visibility
if not hasattr(self.searcher, 'searx_host'):
try:
setattr(self.searcher, 'searx_host', self.base_url)
except Exception:
pass
except Exception:
# logging should not crash initialization
pass
from model_config import SEARCH_PROVIDER, TAVILY_API_KEY

self.search_provider = SEARCH_PROVIDER.lower()

if self.search_provider == 'tavily':
from tavily import TavilyClient
self.tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
self.searcher = None
self.base_url = None
logger.info("SearchWeb initialized with Tavily provider")
else:
# Build explicit base URL and pass to SearxSearchWrapper. Add debug logging
# so we can trace which host/port the application is actually using.
base_url = f"{type}://{host}:{port}"
self.searcher = SearxSearchWrapper(searx_host=base_url)
self.base_url = base_url
self.tavily_client = None
try:
logger.info(f"SearchWeb initialized with searx base_url={self.base_url}")
# Some wrappers may expose their configured host attribute; ensure it's set for visibility
if not hasattr(self.searcher, 'searx_host'):
try:
setattr(self.searcher, 'searx_host', self.base_url)
except Exception:
pass
except Exception:
# logging should not crash initialization
pass

def query_search(self, query, engines=['google','brave','duckduckgo','startpage','yahoo'], num_results=3):
"""
Performs a search using the Searx engine and retrieves search results.
Performs a search using the configured provider and retrieves search results.

Args:
query (str): The search query.
engines (list, optional): The search engines to use. Defaults to ['google'].
num_results (int, optional): The number of search results to retrieve. Defaults to 5.
engines (list, optional): The search engines to use (SearXNG only). Defaults to ['google'].
num_results (int, optional): The number of search results to retrieve. Defaults to 3.

Returns:
list: The search results from Searx.
list: The search results.
"""
if self.search_provider == 'tavily':
return self._tavily_search(query, num_results=num_results)
return self._searxng_search(query, engines=engines, num_results=num_results)

def _searxng_search(self, query, engines, num_results):
try:
search_results = self.searcher.results(
query,
Expand All @@ -120,6 +139,27 @@ def query_search(self, query, engines=['google','brave','duckduckgo','startpage'
logger.error(f"Error during search for query '{query}': {e}")
return []

def _tavily_search(self, query, num_results):
try:
response = self.tavily_client.search(
query=query,
max_results=num_results,
search_depth="basic",
)
# Normalize Tavily results to match SearXNG result format (dict with 'link', 'title', 'snippet')
results = []
for item in response.get("results", []):
results.append({
"link": item.get("url", ""),
"title": item.get("title", ""),
"snippet": item.get("content", ""),
})
logger.info(f"Tavily search results for query '{query}': {results}")
return results
except Exception as e:
logger.error(f"Error during Tavily search for query '{query}': {e}")
return []

def scrape_text(self, url):
"""
Scrapes the plain text content from the specified URL, removing HTML tags and unwanted elements.
Expand Down