Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
302 changes: 171 additions & 131 deletions allrecipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,138 +5,178 @@
import urllib.parse
import urllib.request

import re
import ssl


class AllRecipes(object):

@staticmethod
def search(search_string):
"""
Search recipes parsing the returned html data.
"""
base_url = "https://allrecipes.com/search?"
query_url = urllib.parse.urlencode({"q": search_string})

url = base_url + query_url

req = urllib.request.Request(url)
req.add_header('Cookie', 'euConsent=true')

handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context())
opener = urllib.request.build_opener(handler)
response = opener.open(req)
html_content = response.read()

soup = BeautifulSoup(html_content, 'html.parser')

search_data = []
articles = soup.findAll("a", {"class": "mntl-card-list-items"})

articles = [a for a in articles if a["href"].startswith("https://www.allrecipes.com/recipe/")]

for article in articles:
data = {}
try:
data["name"] = article.find("span", {"class": "card__title"}).get_text().strip(' \t\n\r')
data["url"] = article['href']
try:
data["rate"] = len(article.find_all("svg", {"class": "icon-star"}))
try:
if len(article.find_all("svg", {"class": "icon-star-half"})):
data["rate"] += 0.5
except Exception:
pass
except Exception as e0:
data["rate"] = None
try:
data["image"] = article.find('img')['data-src']
except Exception as e1:
try:
data["image"] = article.find('img')['src']
except Exception as e1:
pass
if "image" not in data:
data["image"] = None
except Exception as e2:
pass
if data:
search_data.append(data)

return search_data

@staticmethod
def _get_name(soup):
return soup.find("h1", {"id": "article-heading_2-0"}).get_text().strip(' \t\n\r')

@staticmethod
def _get_rating(soup):
return float(soup.find("div", {"id": "mntl-recipe-review-bar__rating_2-0"}).get_text().strip(' \t\n\r'))

@staticmethod
def _get_ingredients(soup):
return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "mntl-structured-ingredients_1-0"}).find_all("li")]

@staticmethod
def _get_steps(soup):
return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "recipe__steps_1-0"}).find_all("li")]

@staticmethod
def _get_times_data(soup, text):
return soup.find("div", {"id": "recipe-details_1-0"}).find("div", text=text).parent.find("div", {"class": "mntl-recipe-details__value"}).get_text().strip(' \t\n\r')

@classmethod
def _get_prep_time(cls, soup):
return cls._get_times_data(soup, "Prep Time:")

@classmethod
def _get_cook_time(cls, soup):
return cls._get_times_data(soup, "Cook Time:")

@classmethod
def _get_total_time(cls, soup):
return cls._get_times_data(soup, "Total Time:")

@classmethod
def _get_nb_servings(cls, soup):
return cls._get_times_data(soup, "Servings:")

@classmethod
def get(cls, url):
"""
'url' from 'search' method.
ex. "/recipe/106349/beef-and-spinach-curry/"
"""
# base_url = "https://allrecipes.com/"
# url = base_url + uri

req = urllib.request.Request(url)
req.add_header('Cookie', 'euConsent=true')

handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context())
opener = urllib.request.build_opener(handler)
response = opener.open(req)
html_content = response.read()

soup = BeautifulSoup(html_content, 'html.parser')

elements = [
{"name": "name", "default_value": ""},
{"name": "ingredients", "default_value": []},
{"name": "steps", "default_value": []},
{"name": "rating", "default_value": None},
{"name": "prep_time", "default_value": ""},
{"name": "cook_time", "default_value": ""},
{"name": "total_time", "default_value": ""},
{"name": "nb_servings", "default_value": ""},
]

data = {"url": url}
for element in elements:
try:
data[element["name"]] = getattr(cls, "_get_" + element["name"])(soup)
except:
data[element["name"]] = element["default_value"]

return data
@staticmethod
def fetch_categories(url="https://www.allrecipes.com/recipes/"):
"""
Fetch categories available on Allrecipes.com recipes page. If no URL is provided, this function returns the categories present on the main recipes page.

Args:
url (str, optional): The URL to the AllRecipes recipes page. Defaults to "https://www.allrecipes.com/recipes/".

Returns:
dict: A dictionary of categories and their corresponding URLs.
"""
req = urllib.request.Request(url)
req.add_header('Cookie', 'euConsent=true')

handler = urllib.request.HTTPSHandler(
context=ssl._create_unverified_context())
opener = urllib.request.build_opener(handler)
response = opener.open(req)
html_content = response.read()

soup = BeautifulSoup(html_content, 'html.parser')


headers = soup.findAll(
"a", {"class": "taxonomy-nodes__link mntl-text-link type--squirrel-link"})

if "a-z" in url:
headers = soup.findAll("a", {"class": "link-list__link type--dog-bold type--dog-link"})

categories = {}

for header in headers:
categories[header.get_text()] = header["href"]

return categories

@staticmethod
def search(search_string):
"""
Search recipes parsing the returned html data.
"""
base_url = "https://allrecipes.com/search?"
query_url = urllib.parse.urlencode({"q": search_string})

url = base_url + query_url

req = urllib.request.Request(url)
req.add_header('Cookie', 'euConsent=true')

handler = urllib.request.HTTPSHandler(
context=ssl._create_unverified_context())
opener = urllib.request.build_opener(handler)
response = opener.open(req)
html_content = response.read()

soup = BeautifulSoup(html_content, 'html.parser')

search_data = []
articles = soup.findAll("a", {"class": "mntl-card-list-items"})

articles = [a for a in articles if a["href"].startswith(
"https://www.allrecipes.com/recipe/")]

for article in articles:
data = {}
try:
data["name"] = article.find(
"span", {"class": "card__title"}).get_text().strip(' \t\n\r')
data["url"] = article['href']
try:
data["rate"] = len(article.find_all(
"svg", {"class": "icon-star"}))
try:
if len(article.find_all("svg", {"class": "icon-star-half"})):
data["rate"] += 0.5
except Exception:
pass
except Exception as e0:
data["rate"] = None
try:
data["image"] = article.find('img')['data-src']
except Exception as e1:
try:
data["image"] = article.find('img')['src']
except Exception as e1:
pass
if "image" not in data:
data["image"] = None
except Exception as e2:
pass
if data:
search_data.append(data)

return search_data

@staticmethod
def _get_name(soup):
return soup.find("h1", {"id": "article-heading_2-0"}).get_text().strip(' \t\n\r')

@staticmethod
def _get_rating(soup):
return float(soup.find("div", {"id": "mntl-recipe-review-bar__rating_2-0"}).get_text().strip(' \t\n\r'))

@staticmethod
def _get_ingredients(soup):
return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "mntl-structured-ingredients_1-0"}).find_all("li")]

@staticmethod
def _get_steps(soup):
return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "recipe__steps_1-0"}).find_all("li")]

@staticmethod
def _get_times_data(soup, text):
return soup.find("div", {"id": "recipe-details_1-0"}).find("div", text=text).parent.find("div", {"class": "mntl-recipe-details__value"}).get_text().strip(' \t\n\r')

@classmethod
def _get_prep_time(cls, soup):
return cls._get_times_data(soup, "Prep Time:")

@classmethod
def _get_cook_time(cls, soup):
return cls._get_times_data(soup, "Cook Time:")

@classmethod
def _get_total_time(cls, soup):
return cls._get_times_data(soup, "Total Time:")

@classmethod
def _get_nb_servings(cls, soup):
return cls._get_times_data(soup, "Servings:")

@classmethod
def get(cls, url):
"""
'url' from 'search' method.
ex. "/recipe/106349/beef-and-spinach-curry/"
"""
# base_url = "https://allrecipes.com/"
# url = base_url + uri

req = urllib.request.Request(url)
req.add_header('Cookie', 'euConsent=true')

handler = urllib.request.HTTPSHandler(
context=ssl._create_unverified_context())
opener = urllib.request.build_opener(handler)
response = opener.open(req)
html_content = response.read()

soup = BeautifulSoup(html_content, 'html.parser')

elements = [
{"name": "name", "default_value": ""},
{"name": "ingredients", "default_value": []},
{"name": "steps", "default_value": []},
{"name": "rating", "default_value": None},
{"name": "prep_time", "default_value": ""},
{"name": "cook_time", "default_value": ""},
{"name": "total_time", "default_value": ""},
{"name": "nb_servings", "default_value": ""},
]

data = {"url": url}
for element in elements:
try:
data[element["name"]] = getattr(
cls, "_get_" + element["name"])(soup)
except:
data[element["name"]] = element["default_value"]

return data
32 changes: 32 additions & 0 deletions try.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from allrecipes import AllRecipes


# Fetch categories on main page
categories = AllRecipes.fetch_categories()

# Print all recipe categories available at the provided link
print(categories)

# Prints all recipe categories available on Allrecipes.com/recipes/96/salads-and-chili
# categories["Salad Recipes"] is the entire URL
print(AllRecipes.fetch_categories(categories["Salad Recipes"]))

# Search:
search_string = "pork curry" # Query
query_result = AllRecipes.search(search_string)
print(query_result)

# Get:
main_recipe_url = query_result[0]['url']
# Get the details of the first returned recipe (most relevant in our case)
detailed_recipe = AllRecipes.get(main_recipe_url)

# Display result:
print("## %s:" % detailed_recipe['name']) # Name of the recipe

print("### For %s servings:" % detailed_recipe['nb_servings'])
for ingredient in detailed_recipe['ingredients']: # List of ingredients
print("- %s" % ingredient)

for step in detailed_recipe['steps']: # List of cooking steps
print("# %s" % step)