77
88import requests
99from bs4 import BeautifulSoup
10- from googlesearch import search as google_search
1110from langchain_community .tools import DuckDuckGoSearchResults
1211
1312
1413def search_on_web (
1514 query : str ,
16- search_engine : str = "Google " ,
15+ search_engine : str = "duckduckgo " ,
1716 max_results : int = 10 ,
1817 port : int = 8080 ,
1918 timeout : int = 10 ,
@@ -41,7 +40,7 @@ def search_on_web(
4140 raise ValueError ("Query must be a non-empty string" )
4241
4342 search_engine = search_engine .lower ()
44- valid_engines = {"google" , " duckduckgo" , "bing" , "searxng" , "serper" }
43+ valid_engines = {"duckduckgo" , "bing" , "searxng" , "serper" }
4544 if search_engine not in valid_engines :
4645 raise ValueError (f"Search engine must be one of: { ', ' .join (valid_engines )} " )
4746
@@ -52,20 +51,12 @@ def search_on_web(
5251
5352 try :
5453 results = []
55- if search_engine == "google" :
56- kwargs = {
57- "num_results" : max_results ,
58- "proxy" : formatted_proxy ,
59- "lang" : language ,
60- }
61- if region :
62- kwargs ["region" ] = region
63-
64- results = list (google_search (query , ** kwargs ))
65-
66- elif search_engine == "duckduckgo" :
54+ if search_engine == "duckduckgo" :
55+ # Create a DuckDuckGo search object with max_results
6756 research = DuckDuckGoSearchResults (max_results = max_results )
57+ # Run the search
6858 res = research .run (query )
59+ # Extract URLs using regex
6960 results = re .findall (r"https?://[^\s,\]]+" , res )
7061
7162 elif search_engine == "bing" :
@@ -74,7 +65,7 @@ def search_on_web(
7465 elif search_engine == "searxng" :
7566 results = _search_searxng (query , max_results , port , timeout )
7667
77- elif search_engine . lower () == "serper" :
68+ elif search_engine == "serper" :
7869 results = _search_serper (query , max_results , serper_api_key , timeout )
7970
8071 return filter_pdf_links (results )
0 commit comments