-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscrape.py
More file actions
60 lines (46 loc) · 2.1 KB
/
scrape.py
File metadata and controls
60 lines (46 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
from bs4 import BeautifulSoup
from game import Game
class SteamScraper:
def __init__(self, steam_search_url):
self.steam_search_url = steam_search_url
@staticmethod
def _add_game_details(game):
page = requests.get(game.storePageUrl)
soup = BeautifulSoup(page.content, 'html.parser')
glance = soup.find('div', class_='glance_ctn')
if glance is None:
return
# Get description
description = glance.find('div', class_='game_description_snippet')
description = description.text.strip() if description is not None else None
game.description = description
# Get review line
review_div = glance.find(id='userReviews')
review_summary_div = review_div.find('div', class_='summary column') if review_div is not None else None
review = review_summary_div.text.replace('\n', '').replace('\t', '').replace('\r', ' ').strip() if review_summary_div is not None else None
while ' ' in review:
review = review.replace(' ', ' ')
game.reviewSummary = review
def get_games(self, top_recs=10):
page = requests.get(self.steam_search_url)
soup = BeautifulSoup(page.content, 'html.parser')
search_results = soup.find(id='search_resultsRows')
game_rows = search_results.find_all('a', class_='search_result_row')
games = []
loop_counter = 0
for game_row in game_rows:
if loop_counter >= top_recs:
break
loop_counter += 1
title = game_row.find('span', class_='title').text.strip()
release_date = game_row.find('div', class_='search_released').text.strip()
price_div = game_row.find('div', class_='search_price')
if any(price_div.find_all('span')):
price_div.find('span').decompose()
price = price_div.text.strip()
game_url = game_row.get('href')
game = Game(game_url, title, release_date, price)
self._add_game_details(game)
games.append(game)
return games