Skip to content

Commit 38693c7

Browse files
committed
feat: add files
1 parent 188f63f commit 38693c7

File tree

8 files changed

+2192
-0
lines changed

8 files changed

+2192
-0
lines changed

examples/01_basic_smartscraper.rb

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
# Basic SmartScraper Example
5+
#
6+
# This example demonstrates the most common use case for ScrapeGraphAI:
7+
# extracting specific data from a webpage using natural language prompts.
8+
9+
require "bundler/setup"
10+
require "scrapegraphai"
11+
12+
# Initialize the client with your API key
13+
# Make sure to set your API key as an environment variable: export SCRAPEGRAPHAI_API_KEY="your-api-key"
14+
client = Scrapegraphai::Client.new(
15+
api_key: ENV["SCRAPEGRAPHAI_API_KEY"]
16+
)
17+
18+
puts "🚀 Basic SmartScraper Example"
19+
puts "=" * 50
20+
21+
begin
22+
# Example 1: Extract product information from an e-commerce page
23+
puts "\n📦 Extracting product information..."
24+
25+
result = client.smartscraper.create(
26+
user_prompt: "Extract the product name, price, description, and availability status",
27+
website_url: "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html"
28+
)
29+
30+
puts "Request ID: #{result.request_id}"
31+
puts "Status: #{result.status}"
32+
puts "Extracted Data:"
33+
puts JSON.pretty_generate(result.result) if result.result
34+
35+
# Example 2: Extract contact information from a company website
36+
puts "\n📞 Extracting contact information..."
37+
38+
result = client.smartscraper.create(
39+
user_prompt: "Find all contact information including email addresses, phone numbers, and physical addresses",
40+
website_url: "https://scrapethissite.com/pages/simple/"
41+
)
42+
43+
puts "Request ID: #{result.request_id}"
44+
puts "Status: #{result.status}"
45+
puts "Extracted Data:"
46+
puts JSON.pretty_generate(result.result) if result.result
47+
48+
# Example 3: Extract news article information
49+
puts "\n📰 Extracting news article information..."
50+
51+
result = client.smartscraper.create(
52+
user_prompt: "Extract the headline, author, publication date, and main content summary",
53+
website_url: "https://quotes.toscrape.com/"
54+
)
55+
56+
puts "Request ID: #{result.request_id}"
57+
puts "Status: #{result.status}"
58+
puts "Extracted Data:"
59+
puts JSON.pretty_generate(result.result) if result.result
60+
61+
rescue Scrapegraphai::Errors::AuthenticationError => e
62+
puts "❌ Authentication failed. Please check your API key."
63+
puts "Error: #{e.message}"
64+
rescue Scrapegraphai::Errors::RateLimitError => e
65+
puts "⏳ Rate limit exceeded. Please wait before making more requests."
66+
puts "Error: #{e.message}"
67+
rescue Scrapegraphai::Errors::APIError => e
68+
puts "❌ API Error occurred:"
69+
puts "Status: #{e.status}" if e.respond_to?(:status)
70+
puts "Error: #{e.message}"
71+
rescue => e
72+
puts "❌ Unexpected error occurred:"
73+
puts "Error: #{e.message}"
74+
end
75+
76+
puts "\n✅ Basic SmartScraper example completed!"

examples/02_advanced_smartscraper.rb

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
# Advanced SmartScraper Example
5+
#
6+
# This example demonstrates advanced features of SmartScraper including:
7+
# - Custom JSON schema for structured output
8+
# - Pagination handling
9+
# - Infinite scrolling
10+
# - Custom headers and cookies
11+
# - Heavy JavaScript rendering
12+
# - Website interaction steps
13+
14+
require "bundler/setup"
15+
require "scrapegraphai"
16+
17+
# Initialize the client
18+
client = Scrapegraphai::Client.new(
19+
api_key: ENV["SCRAPEGRAPHAI_API_KEY"]
20+
)
21+
22+
puts "🚀 Advanced SmartScraper Example"
23+
puts "=" * 50
24+
25+
begin
26+
# Example 1: Using custom JSON schema for structured output
27+
puts "\n📋 Using custom JSON schema..."
28+
29+
custom_schema = {
30+
type: "object",
31+
properties: {
32+
products: {
33+
type: "array",
34+
items: {
35+
type: "object",
36+
properties: {
37+
name: { type: "string" },
38+
price: { type: "string" },
39+
availability: { type: "string" },
40+
rating: { type: "number" },
41+
description: { type: "string" }
42+
},
43+
required: ["name", "price"]
44+
}
45+
},
46+
total_products: { type: "integer" },
47+
page_info: {
48+
type: "object",
49+
properties: {
50+
current_page: { type: "integer" },
51+
has_next_page: { type: "boolean" }
52+
}
53+
}
54+
}
55+
}
56+
57+
result = client.smartscraper.create(
58+
user_prompt: "Extract all products with their details",
59+
website_url: "https://books.toscrape.com/",
60+
output_schema: custom_schema
61+
)
62+
63+
puts "Request ID: #{result.request_id}"
64+
puts "Status: #{result.status}"
65+
puts "Structured Data:"
66+
puts JSON.pretty_generate(result.result) if result.result
67+
68+
# Example 2: Handling pagination
69+
puts "\n📄 Handling pagination..."
70+
71+
result = client.smartscraper.create(
72+
user_prompt: "Extract all book titles and prices from multiple pages",
73+
website_url: "https://books.toscrape.com/",
74+
total_pages: 3 # Process first 3 pages
75+
)
76+
77+
puts "Request ID: #{result.request_id}"
78+
puts "Status: #{result.status}"
79+
puts "Paginated Data:"
80+
puts JSON.pretty_generate(result.result) if result.result
81+
82+
# Example 3: Using custom headers and cookies
83+
puts "\n🍪 Using custom headers and cookies..."
84+
85+
custom_headers = {
86+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
87+
"Accept-Language": "en-US,en;q=0.9"
88+
}
89+
90+
custom_cookies = {
91+
"session_id": "abc123",
92+
"preferences": "theme=dark"
93+
}
94+
95+
result = client.smartscraper.create(
96+
user_prompt: "Extract main content and any personalization elements",
97+
website_url: "https://httpbin.org/headers",
98+
headers: custom_headers,
99+
cookies: custom_cookies
100+
)
101+
102+
puts "Request ID: #{result.request_id}"
103+
puts "Status: #{result.status}"
104+
puts "Data with Custom Headers/Cookies:"
105+
puts JSON.pretty_generate(result.result) if result.result
106+
107+
# Example 4: Infinite scrolling
108+
puts "\n🔄 Handling infinite scroll..."
109+
110+
result = client.smartscraper.create(
111+
user_prompt: "Extract all visible items after scrolling",
112+
website_url: "https://quotes.toscrape.com/scroll",
113+
number_of_scrolls: 3 # Perform 3 scroll operations
114+
)
115+
116+
puts "Request ID: #{result.request_id}"
117+
puts "Status: #{result.status}"
118+
puts "Scrolled Data:"
119+
puts JSON.pretty_generate(result.result) if result.result
120+
121+
# Example 5: Heavy JavaScript rendering
122+
puts "\n⚡ Heavy JavaScript rendering..."
123+
124+
result = client.smartscraper.create(
125+
user_prompt: "Extract dynamically loaded content",
126+
website_url: "https://quotes.toscrape.com/js/",
127+
render_heavy_js: true # Enable heavy JS rendering
128+
)
129+
130+
puts "Request ID: #{result.request_id}"
131+
puts "Status: #{result.status}"
132+
puts "JS-Rendered Data:"
133+
puts JSON.pretty_generate(result.result) if result.result
134+
135+
# Example 6: Website interaction steps
136+
puts "\n🖱️ Website interactions..."
137+
138+
interaction_steps = [
139+
"click on button with text 'Load More'",
140+
"wait for 2 seconds",
141+
"click on tab 'Popular Quotes'"
142+
]
143+
144+
result = client.smartscraper.create(
145+
user_prompt: "Extract content after performing interactions",
146+
website_url: "https://quotes.toscrape.com/",
147+
steps: interaction_steps
148+
)
149+
150+
puts "Request ID: #{result.request_id}"
151+
puts "Status: #{result.status}"
152+
puts "Interactive Data:"
153+
puts JSON.pretty_generate(result.result) if result.result
154+
155+
# Example 7: Processing HTML content directly
156+
puts "\n📄 Processing HTML content directly..."
157+
158+
html_content = <<~HTML
159+
<!DOCTYPE html>
160+
<html>
161+
<head><title>Sample Page</title></head>
162+
<body>
163+
<div class="product">
164+
<h1>Sample Product</h1>
165+
<p class="price">$29.99</p>
166+
<p class="description">This is a sample product description.</p>
167+
<span class="stock">In Stock</span>
168+
</div>
169+
<div class="product">
170+
<h1>Another Product</h1>
171+
<p class="price">$19.99</p>
172+
<p class="description">Another sample product.</p>
173+
<span class="stock">Out of Stock</span>
174+
</div>
175+
</body>
176+
</html>
177+
HTML
178+
179+
result = client.smartscraper.create(
180+
user_prompt: "Extract all products with their name, price, description, and stock status",
181+
website_html: html_content # Process HTML directly instead of URL
182+
)
183+
184+
puts "Request ID: #{result.request_id}"
185+
puts "Status: #{result.status}"
186+
puts "HTML-Processed Data:"
187+
puts JSON.pretty_generate(result.result) if result.result
188+
189+
rescue Scrapegraphai::Errors::UnprocessableEntityError => e
190+
puts "❌ Invalid request parameters. Please check your input."
191+
puts "Error: #{e.message}"
192+
rescue Scrapegraphai::Errors::APIError => e
193+
puts "❌ API Error occurred:"
194+
puts "Status: #{e.status}" if e.respond_to?(:status)
195+
puts "Error: #{e.message}"
196+
rescue => e
197+
puts "❌ Unexpected error occurred:"
198+
puts "Error: #{e.message}"
199+
end
200+
201+
puts "\n✅ Advanced SmartScraper example completed!"

0 commit comments

Comments
 (0)