LangChain Scrapeless: an all-in-one, highly scalable web scraping toolkit for enterprises and developers that also integrates with LangChain’s AI tools. Maintained by Scrapeless.
langchain-scrapeless is designed for seamless integration with LangChain, enabling you to:
- Run custom scraping tasks using your own crawlers or scraping logic.
- Automate data extraction and processing workflows in Python.
- Manage and interact with datasets produced by your scraping jobs.
- Access scraping and data handling capabilities as LangChain tools, making them easy to compose with LLM-powered chains and agents.
pip install langchain-scrapeless
You should configure the credentials for the Scrapeless API in your environment variables.
SCRAPELESS_API_KEY
: Your Scrapeless API key.
If you don't have an API key, you can register at here and learn how to get your API key in Scrapeless documentation.
Perform Google search queries and get the results.
from langchain_scrapeless import ScrapelessDeepSerpGoogleSearchTool
tool = ScrapelessDeepSerpGoogleSearchTool()
# Basic usage
# result = tool.invoke("I want to know Scrapeless")
# print(result)
# Advanced usage
result = tool.invoke({
"q": "Scrapeless",
"hl": "en",
"google_domain": "google.com"
})
print(result)
# With LangChain
from langchain_openai import ChatOpenAI
from langchain_scrapeless import ScrapelessDeepSerpGoogleSearchTool
from langgraph.prebuilt import create_react_agent
llm = ChatOpenAI()
tool = ScrapelessDeepSerpGoogleSearchTool()
# Use the tool with an agent
tools = [tool]
agent = create_react_agent(llm, tools)
for chunk in agent.stream(
{"messages": [("human", "I want to what is Scrapeless")]},
stream_mode="values"
):
chunk["messages"][-1].pretty_print()
You can visit here to learn more customizations options.
Perform Google trends queries and get the results.
from langchain_scrapeless import ScrapelessDeepSerpGoogleTrendsTool
tool = ScrapelessDeepSerpGoogleTrendsTool()
# Basic usage
# result = tool.invoke("Funny 2048,negamon monster trainer")
# print(result)
# Advanced usage
result = tool.invoke({
"q": "Scrapeless",
"data_type": "related_topics",
"hl": "en"
})
print(result)
# With LangChain
from langchain_openai import ChatOpenAI
from langchain_scrapeless import ScrapelessDeepSerpGoogleTrendsTool
from langgraph.prebuilt import create_react_agent
llm = ChatOpenAI()
tool = ScrapelessDeepSerpGoogleTrendsTool()
# Use the tool with an agent
tools = [tool]
agent = create_react_agent(llm, tools)
for chunk in agent.stream(
{"messages": [("human", "I want to know the iphone keyword trends")]},
stream_mode="values"
):
chunk["messages"][-1].pretty_print()
You can visit here to learn more customizations options.
Access any website at scale and say goodbye to blocks.
from langchain_scrapeless import ScrapelessUniversalScrapingTool
tool = ScrapelessUniversalScrapingTool()
# Basic usage
# result = tool.invoke("https://example.com")
# print(result)
# Advanced usage
result = tool.invoke({
"url": "https://exmaple.com",
"response_type": "markdown"
})
print(result)
# With LangChain
from langchain_openai import ChatOpenAI
from langchain_scrapeless import ScrapelessUniversalScrapingTool
from langgraph.prebuilt import create_react_agent
llm = ChatOpenAI()
tool = ScrapelessUniversalScrapingTool()
# Use the tool with an agent
tools = [tool]
agent = create_react_agent(llm, tools)
for chunk in agent.stream(
{"messages": [("human", "Use the scrapeless scraping tool to fetch https://www.scrapeless.com/en and extract the h1 tag.")]},
stream_mode="values"
):
chunk["messages"][-1].pretty_print()
You can visit here to learn more customizations options.
Crawl a website and its linked pages to extract comprehensive data
from langchain_scrapeless import ScrapelessCrawlerCrawlTool
tool = ScrapelessCrawlerCrawlTool()
# Basic
# result = tool.invoke("https://example.com")
# print(result)
# Advanced usage
result = tool.invoke({
"url": "https://exmaple.com",
"limit": 4
})
print(result)
# With LangChain
from langchain_openai import ChatOpenAI
from langchain_scrapeless import ScrapelessCrawlerCrawlTool
from langgraph.prebuilt import create_react_agent
llm = ChatOpenAI()
tool = ScrapelessCrawlerCrawlTool()
# Use the tool with an agent
tools = [tool]
agent = create_react_agent(llm, tools)
for chunk in agent.stream(
{"messages": [("human", "Use the scrapeless crawler crawl tool to crawl the website https://example.com and output the markdown content as a string.")]},
stream_mode="values"
):
chunk["messages"][-1].pretty_print()
You can visit here to learn more customizations options.
Extract data from a single or multiple webpages.
from langchain_scrapeless import ScrapelessCrawlerScrapeTool
tool = ScrapelessCrawlerScrapeTool()
result = tool.invoke({
"urls": ["https://exmaple.com", "https://www.scrapeless.com/en"],
"formats": ["markdown"]
})
print(result)
# With LangChain
from langchain_openai import ChatOpenAI
from langchain_scrapeless import ScrapelessCrawlerScrapeTool
from langgraph.prebuilt import create_react_agent
llm = ChatOpenAI()
tool = ScrapelessCrawlerScrapeTool()
# Use the tool with an agent
tools = [tool]
agent = create_react_agent(llm, tools)
for chunk in agent.stream(
{"messages": [("human", "Use the scrapeless crawler scrape tool to get the website content of https://example.com and output the html content as a string.")]},
stream_mode="values"
):
chunk["messages"][-1].pretty_print()