remaudcorentin-dev · Asingh027 · Jan 22, 2024
diff --git a/allrecipes/__init__.py b/allrecipes/__init__.py
@@ -5,138 +5,178 @@
 import urllib.parse
 import urllib.request
 
-import re
 import ssl
 
-
 class AllRecipes(object):
 
-	@staticmethod
-	def search(search_string):
-		"""
-		Search recipes parsing the returned html data.
-		"""
-		base_url = "https://allrecipes.com/search?"
-		query_url = urllib.parse.urlencode({"q": search_string})
-
-		url = base_url + query_url
-
-		req = urllib.request.Request(url)
-		req.add_header('Cookie', 'euConsent=true')
-
-		handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context())
-		opener = urllib.request.build_opener(handler)
-		response = opener.open(req)
-		html_content = response.read()
-
-		soup = BeautifulSoup(html_content, 'html.parser')
-
-		search_data = []
-		articles = soup.findAll("a", {"class": "mntl-card-list-items"})
-
-		articles = [a for a in articles if a["href"].startswith("https://www.allrecipes.com/recipe/")]
-
-		for article in articles:
-			data = {}
-			try:
-				data["name"] = article.find("span", {"class": "card__title"}).get_text().strip(' \t\n\r')
-				data["url"] = article['href']
-				try:
-					data["rate"] = len(article.find_all("svg", {"class": "icon-star"}))
-					try:
-						if len(article.find_all("svg", {"class": "icon-star-half"})):
-							data["rate"] += 0.5
-					except Exception:
-						pass
-				except Exception as e0:
-					data["rate"] = None
-				try:
-					data["image"] = article.find('img')['data-src']
-				except Exception as e1:
-					try:
-						data["image"] = article.find('img')['src']
-					except Exception as e1:
-						pass
-					if "image" not in data:
-						data["image"] = None
-			except Exception as e2:
-				pass
-			if data:
-				search_data.append(data)
-
-		return search_data
-
-	@staticmethod
-	def _get_name(soup):
-		return soup.find("h1", {"id": "article-heading_2-0"}).get_text().strip(' \t\n\r')
-
-	@staticmethod
-	def _get_rating(soup):
-		return float(soup.find("div", {"id": "mntl-recipe-review-bar__rating_2-0"}).get_text().strip(' \t\n\r'))
-
-	@staticmethod
-	def _get_ingredients(soup):
-		return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "mntl-structured-ingredients_1-0"}).find_all("li")]
-
-	@staticmethod
-	def _get_steps(soup):
-		return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "recipe__steps_1-0"}).find_all("li")]
-
-	@staticmethod
-	def _get_times_data(soup, text):
-		return soup.find("div", {"id": "recipe-details_1-0"}).find("div", text=text).parent.find("div", {"class": "mntl-recipe-details__value"}).get_text().strip(' \t\n\r')
-
-	@classmethod
-	def _get_prep_time(cls, soup):
-		return cls._get_times_data(soup, "Prep Time:")
-
-	@classmethod
-	def _get_cook_time(cls, soup):
-		return cls._get_times_data(soup, "Cook Time:")
-
-	@classmethod
-	def _get_total_time(cls, soup):
-		return cls._get_times_data(soup, "Total Time:")
-
-	@classmethod
-	def _get_nb_servings(cls, soup):
-		return cls._get_times_data(soup, "Servings:")
-
-	@classmethod
-	def get(cls, url):
-		"""
-		'url' from 'search' method.
-		 ex. "/recipe/106349/beef-and-spinach-curry/"
-		"""
-		# base_url = "https://allrecipes.com/"
-		# url = base_url + uri
-
-		req = urllib.request.Request(url)
-		req.add_header('Cookie', 'euConsent=true')
-
-		handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context())
-		opener = urllib.request.build_opener(handler)
-		response = opener.open(req)
-		html_content = response.read()
-
-		soup = BeautifulSoup(html_content, 'html.parser')
-
-		elements = [
-			{"name": "name", "default_value": ""},
-			{"name": "ingredients", "default_value": []},
-			{"name": "steps", "default_value": []},
-			{"name": "rating", "default_value": None},
-			{"name": "prep_time", "default_value": ""},
-			{"name": "cook_time", "default_value": ""},
-			{"name": "total_time", "default_value": ""},
-			{"name": "nb_servings", "default_value": ""},
-		]
-
-		data = {"url": url}
-		for element in elements:
-			try:
-				data[element["name"]] = getattr(cls, "_get_" + element["name"])(soup)
-			except:
-				data[element["name"]] = element["default_value"]
-
-		return data
+    @staticmethod
+    def fetch_categories(url="https://www.allrecipes.com/recipes/"):
+        """
+        Fetch categories available on Allrecipes.com recipes page. If no URL is provided, this function returns the categories present on the main recipes page.  
+
+        Args:
+                url (str, optional): The URL to the AllRecipes recipes page. Defaults to "https://www.allrecipes.com/recipes/".
+
+        Returns:
+                dict: A dictionary of categories and their corresponding URLs.
+        """
+        req = urllib.request.Request(url)
+        req.add_header('Cookie', 'euConsent=true')
+
+        handler = urllib.request.HTTPSHandler(
+            context=ssl._create_unverified_context())
+        opener = urllib.request.build_opener(handler)
+        response = opener.open(req)
+        html_content = response.read()
+
+        soup = BeautifulSoup(html_content, 'html.parser')
+
+
+        headers = soup.findAll(
+            "a", {"class": "taxonomy-nodes__link mntl-text-link type--squirrel-link"})
+
+        if "a-z" in url:
+            headers = soup.findAll("a", {"class": "link-list__link type--dog-bold type--dog-link"})
+
+        categories = {}
+
+        for header in headers:
+            categories[header.get_text()] = header["href"]
+
+        return categories
+
+    @staticmethod
+    def search(search_string):
+        """
+        Search recipes parsing the returned html data.
+        """
+        base_url = "https://allrecipes.com/search?"
+        query_url = urllib.parse.urlencode({"q": search_string})
+
+        url = base_url + query_url
+
+        req = urllib.request.Request(url)
+        req.add_header('Cookie', 'euConsent=true')
+
+        handler = urllib.request.HTTPSHandler(
+            context=ssl._create_unverified_context())
+        opener = urllib.request.build_opener(handler)
+        response = opener.open(req)
+        html_content = response.read()
+
+        soup = BeautifulSoup(html_content, 'html.parser')
+
+        search_data = []
+        articles = soup.findAll("a", {"class": "mntl-card-list-items"})
+
+        articles = [a for a in articles if a["href"].startswith(
+            "https://www.allrecipes.com/recipe/")]
+
+        for article in articles:
+            data = {}
+            try:
+                data["name"] = article.find(
+                    "span", {"class": "card__title"}).get_text().strip(' \t\n\r')
+                data["url"] = article['href']
+                try:
+                    data["rate"] = len(article.find_all(
+                        "svg", {"class": "icon-star"}))
+                    try:
+                        if len(article.find_all("svg", {"class": "icon-star-half"})):
+                            data["rate"] += 0.5
+                    except Exception:
+                        pass
+                except Exception as e0:
+                    data["rate"] = None
+                try:
+                    data["image"] = article.find('img')['data-src']
+                except Exception as e1:
+                    try:
+                        data["image"] = article.find('img')['src']
+                    except Exception as e1:
+                        pass
+                    if "image" not in data:
+                        data["image"] = None
+            except Exception as e2:
+                pass
+            if data:
+                search_data.append(data)
+
+        return search_data
+
+    @staticmethod
+    def _get_name(soup):
+        return soup.find("h1", {"id": "article-heading_2-0"}).get_text().strip(' \t\n\r')
+
+    @staticmethod
+    def _get_rating(soup):
+        return float(soup.find("div", {"id": "mntl-recipe-review-bar__rating_2-0"}).get_text().strip(' \t\n\r'))
+
+    @staticmethod
+    def _get_ingredients(soup):
+        return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "mntl-structured-ingredients_1-0"}).find_all("li")]
+
+    @staticmethod
+    def _get_steps(soup):
+        return [li.get_text().strip(' \t\n\r') for li in soup.find("div", {"id": "recipe__steps_1-0"}).find_all("li")]
+
+    @staticmethod
+    def _get_times_data(soup, text):
+        return soup.find("div", {"id": "recipe-details_1-0"}).find("div", text=text).parent.find("div", {"class": "mntl-recipe-details__value"}).get_text().strip(' \t\n\r')
+
+    @classmethod
+    def _get_prep_time(cls, soup):
+        return cls._get_times_data(soup, "Prep Time:")
+
+    @classmethod
+    def _get_cook_time(cls, soup):
+        return cls._get_times_data(soup, "Cook Time:")
+
+    @classmethod
+    def _get_total_time(cls, soup):
+        return cls._get_times_data(soup, "Total Time:")
+
+    @classmethod
+    def _get_nb_servings(cls, soup):
+        return cls._get_times_data(soup, "Servings:")
+
+    @classmethod
+    def get(cls, url):
+        """
+        'url' from 'search' method.
+                ex. "/recipe/106349/beef-and-spinach-curry/"
+        """
+        # base_url = "https://allrecipes.com/"
+        # url = base_url + uri
+
+        req = urllib.request.Request(url)
+        req.add_header('Cookie', 'euConsent=true')
+
+        handler = urllib.request.HTTPSHandler(
+            context=ssl._create_unverified_context())
+        opener = urllib.request.build_opener(handler)
+        response = opener.open(req)
+        html_content = response.read()
+
+        soup = BeautifulSoup(html_content, 'html.parser')
+
+        elements = [
+            {"name": "name", "default_value": ""},
+            {"name": "ingredients", "default_value": []},
+            {"name": "steps", "default_value": []},
+            {"name": "rating", "default_value": None},
+            {"name": "prep_time", "default_value": ""},
+            {"name": "cook_time", "default_value": ""},
+            {"name": "total_time", "default_value": ""},
+            {"name": "nb_servings", "default_value": ""},
+        ]
+
+        data = {"url": url}
+        for element in elements:
+            try:
+                data[element["name"]] = getattr(
+                    cls, "_get_" + element["name"])(soup)
+            except:
+                data[element["name"]] = element["default_value"]
+
+        return data
diff --git a/try.py b/try.py
@@ -0,0 +1,32 @@
+from allrecipes import AllRecipes
+
+
+# Fetch categories on main page
+categories = AllRecipes.fetch_categories()
+
+# Print all recipe categories available at the provided link
+print(categories)
+
+# Prints all recipe categories available on Allrecipes.com/recipes/96/salads-and-chili
+# categories["Salad Recipes"] is the entire URL
+print(AllRecipes.fetch_categories(categories["Salad Recipes"]))
+
+# Search:
+search_string = "pork curry"  # Query
+query_result = AllRecipes.search(search_string)
+print(query_result)
+
+# Get:
+main_recipe_url = query_result[0]['url']
+# Get the details of the first returned recipe (most relevant in our case)
+detailed_recipe = AllRecipes.get(main_recipe_url)
+
+# Display result:
+print("## %s:" % detailed_recipe['name'])  # Name of the recipe
+
+print("### For %s servings:" % detailed_recipe['nb_servings'])
+for ingredient in detailed_recipe['ingredients']:  # List of ingredients
+    print("- %s" % ingredient)
+
+for step in detailed_recipe['steps']:  # List of cooking steps
+    print("# %s" % step)