-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathweb_mission_to_mars.py
116 lines (103 loc) · 3.4 KB
/
web_mission_to_mars.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def mars_scrape():
# Dependencies
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import pandas as pd
import tweepy
from dotenv import load_dotenv
import os
import datetime
# Setting browser
load_dotenv()
try:
opt = webdriver.ChromeOptions()
opt.binary_location = os.getenv('GOOGLE_CHROME_BIN')
opt.add_argument('--no-sandbox')
opt.add_argument('--headless')
opt.add_argument('--disable-dev-shm-usage')
browser = webdriver.Chrome(execution_path=os.getenv('CHROMEDRIVER_PATH'), chrome_options=opt)
return browser
except:
return 'browser not working'
# News
url = 'https://mars.nasa.gov/news/'
browser.get(url)
html = browser.html
soup = bs(html, 'lxml')
content_titles = soup.find_all('div', class_ = 'content_title')
ntitle = content_titles[1].text
nbody = soup.find('div', class_ = 'article_teaser_body').text
# Feature Image page
url = 'https://www.jpl.nasa.gov/spaceimages/'
browser.get(url)
browser.click_link_by_partial_text('FULL')
browser.click_link_by_partial_text('more info')
html = browser.html
soup = bs(html,'lxml')
# Image Url
lede = soup.find('figure', class_='lede')
lede_img = lede.find('a')['href']
feat_img = f'https://jpl.nasa.gov{lede_img}'
# Image Details
details = soup.find('aside', class_='image_detail_module')
ps = details.find_all('p')
det = []
for p in ps:
if ("Full-Res" not in p.text) and ("Views" not in p.text):
img_det = {}
img_det['detail'] = p.text
det.append(img_det)
# Weather from twitter
api_key = os.getenv("api_key")
api_secret_key = os.getenv("api_secret_key")
auth = tweepy.OAuthHandler(api_key, api_secret_key)
api = tweepy.API(auth)
username = 'MarsWxReport'
tweets = []
data = api.user_timeline(id=username, tweet_mode="extended")
for t in data:
tweets.append(t.full_text)
weather = tweets[0]
weather = weather.split(' http',1)[0]
# Table Facts
url = 'https://space-facts.com/mars/'
facts = pd.read_html(url)[0]
facts.columns = ['Data','Values']
facts = facts.to_html(classes="table table-hover table-dark table-striped", header=False, justify='center', index=False)
# Hemisphere Images
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.get(url)
html = browser.html
soup = bs(html, 'lxml')
links = []
products = soup.find_all('div', class_='item')
for p in products:
h = {}
hem = p.find('h3').text
hem = hem.replace(' Hemisphere Enhanced','')
h['item'] = hem
browser.click_link_by_partial_text(hem)
html = browser.html
soup = bs(html, 'html')
image_url = soup.find_all('li')
for i in image_url:
img = i.find('a')
if 'Sample' in i.text:
img = img.get('href')
h['url'] = img
browser.back()
links.append(h)
browser.quit()
# Dictionary for Mongo
mars = {
'ntitle':ntitle,
'nbody':nbody,
'feat_img':feat_img,
'img_det':det,
'weather':weather,
'facts':facts,
'h':links,
'date': str(datetime.date.today())
}
print(mars)
return mars