Skip to content

Commit 12a5979

Browse files
committed
Improved web scraping from login page making it less error prone in case something is changed on it.
1 parent 05b11e1 commit 12a5979

File tree

3 files changed

+100
-68
lines changed

3 files changed

+100
-68
lines changed

pymyq/api.py

Lines changed: 98 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Define the MyQ API."""
22
import asyncio
33
import logging
4+
from bs4 import BeautifulSoup
45
from datetime import datetime, timedelta
5-
from html.parser import HTMLParser
66
from typing import Dict, Optional, Union, Tuple
77
from urllib.parse import urlsplit, parse_qs
88

@@ -35,35 +35,6 @@
3535
DEFAULT_TOKEN_REFRESH = 10 * 60 # 10 minutes
3636

3737

38-
class HTMLElementFinder(HTMLParser):
39-
def __init__(self, tag: str, return_attr: str, with_attr: (str, str) = None):
40-
self._FindTag = tag # type: str
41-
self._WithAttr = with_attr # type: Optional[(str, str)]
42-
self._ReturnAttr = return_attr # type: str
43-
self._Result = []
44-
HTMLParser.__init__(self)
45-
46-
@property
47-
def result(self):
48-
return self._Result
49-
50-
def handle_starttag(self, tag, attrs):
51-
if tag == self._FindTag:
52-
store_attr = False
53-
if self._WithAttr is None:
54-
store_attr = True
55-
else:
56-
for attr, value in attrs:
57-
if (attr, value) == self._WithAttr:
58-
store_attr = True
59-
break
60-
61-
if store_attr:
62-
for attr, value in attrs:
63-
if attr == self._ReturnAttr:
64-
self._Result.append(value)
65-
66-
6738
class API: # pylint: disable=too-many-instance-attributes
6839
"""Define a class for interacting with the MyQ iOS App API."""
6940

@@ -196,7 +167,9 @@ async def request(
196167
if self._authentication_task is not None:
197168
authentication_task = await self.authenticate(wait=False)
198169
if authentication_task.done():
199-
_LOGGER.debug("Scheduled token refresh completed, ensuring no exception.")
170+
_LOGGER.debug(
171+
"Scheduled token refresh completed, ensuring no exception."
172+
)
200173
self._authentication_task = None
201174
try:
202175
# Get the result so any exception is raised.
@@ -283,7 +256,7 @@ async def request(
283256
f"Error requesting data from {url}: {err.status} - {err.message}"
284257
)
285258
_LOGGER.debug(message)
286-
if getattr(err, 'status') and err.status == 401:
259+
if getattr(err, "status") and err.status == 401:
287260
# Received unauthorized, reset token and start task to get a new one.
288261
self._security_token = (None, None, self._security_token[2])
289262
await self.authenticate(wait=False)
@@ -292,9 +265,7 @@ async def request(
292265
raise RequestError(message)
293266

294267
except ClientError as err:
295-
message = (
296-
f"Error requesting data from {url}: {str(err)}"
297-
)
268+
message = f"Error requesting data from {url}: {str(err)}"
298269
_LOGGER.debug(message)
299270
raise RequestError(message)
300271

@@ -303,7 +274,7 @@ async def _oauth_authenticate(self) -> (str, int):
303274
async with ClientSession() as session:
304275
# retrieve authentication page
305276
_LOGGER.debug("Retrieving authentication page")
306-
resp, text = await self.request(
277+
resp, html = await self.request(
307278
method="get",
308279
returns="text",
309280
url=OAUTH_AUTHORIZE_URI,
@@ -322,19 +293,64 @@ async def _oauth_authenticate(self) -> (str, int):
322293
login_request=True,
323294
)
324295

296+
# Scanning returned web page for required fields.
297+
_LOGGER.debug("Scanning login page for fields to return")
298+
soup = BeautifulSoup(html, "html.parser")
299+
300+
# Go through all potential forms in the page returned. This is in case multiple forms are returned.
301+
forms = soup.find_all("form")
302+
data = {}
303+
for form in forms:
304+
have_email = False
305+
have_password = False
306+
have_submit = False
307+
# Go through all the input fields.
308+
for field in form.find_all("input"):
309+
if field.get("type"):
310+
# Hidden value, include so we return back
311+
if field.get("type").lower() == "hidden":
312+
data.update(
313+
{
314+
field.get("name", "NONAME"): field.get(
315+
"value", "NOVALUE"
316+
)
317+
}
318+
)
319+
# Email field
320+
elif field.get("type").lower() == "email":
321+
data.update({field.get("name", "Email"): self.username})
322+
have_email = True
323+
# Password field
324+
elif field.get("type").lower() == "password":
325+
data.update(
326+
{
327+
field.get(
328+
"name", "Password"
329+
): self.__credentials.get("password")
330+
}
331+
)
332+
have_password = True
333+
# To confirm this form also has a submit button
334+
elif field.get("type").lower() == "submit":
335+
have_submit = True
336+
337+
# Confirm we found email, password, and submit in the form to be submitted
338+
if have_email and have_password and have_submit:
339+
break
340+
341+
# If we're here then this is not the form to submit.
342+
data = {}
343+
344+
# If data is empty then we did not find the valid form and are unable to continue.
345+
if len(data) == 0:
346+
_LOGGER.debug("Form with required fields not found")
347+
raise RequestError(
348+
"Form containing fields for email, password and submit not found."
349+
"Unable to continue login process."
350+
)
351+
325352
# Perform login to MyQ
326353
_LOGGER.debug("Performing login to MyQ")
327-
parser = HTMLElementFinder(
328-
tag="input",
329-
return_attr="value",
330-
with_attr=("name", "__RequestVerificationToken"),
331-
)
332-
333-
# Verification token is within the returned page as <input name="__RequestVerificationToken" value=<token>>
334-
# Retrieve token from the page.
335-
parser.feed(text)
336-
request_verification_token = parser.result[0]
337-
338354
resp, _ = await self.request(
339355
method="post",
340356
returns="response",
@@ -343,22 +359,15 @@ async def _oauth_authenticate(self) -> (str, int):
343359
headers={
344360
"Content-Type": "application/x-www-form-urlencoded",
345361
"Cookie": resp.cookies.output(attrs=[]),
346-
"User-Agent": "null",
347-
},
348-
data={
349-
"Email": self.username,
350-
"Password": self.__credentials.get("password"),
351-
"__RequestVerificationToken": request_verification_token,
352362
},
363+
data=data,
353364
allow_redirects=False,
354365
login_request=True,
355366
)
356367

357368
# We're supposed to receive back at least 2 cookies. If not then authentication failed.
358369
if len(resp.cookies) < 2:
359-
message = (
360-
"Invalid MyQ credentials provided. Please recheck login and password."
361-
)
370+
message = "Invalid MyQ credentials provided. Please recheck login and password."
362371
self._invalid_credentials = True
363372
_LOGGER.debug(message)
364373
raise InvalidCredentialsError(message)
@@ -512,11 +521,15 @@ async def _get_devices_for_account(self, account) -> None:
512521
for device in devices_resp.get("items"):
513522
serial_number = device.get("serial_number")
514523
if serial_number is None:
515-
_LOGGER.debug(f"No serial number for device with name {device.get('name')}.")
524+
_LOGGER.debug(
525+
f"No serial number for device with name {device.get('name')}."
526+
)
516527
continue
517528

518529
if serial_number in self.devices:
519-
_LOGGER.debug(f"Updating information for device with serial number {serial_number}")
530+
_LOGGER.debug(
531+
f"Updating information for device with serial number {serial_number}"
532+
)
520533
myqdevice = self.devices[serial_number]
521534

522535
# When performing commands we might update the state temporary, need to ensure
@@ -525,40 +538,53 @@ async def _get_devices_for_account(self, account) -> None:
525538
last_update = myqdevice.device_json["state"].get("last_update")
526539
myqdevice.device_json = device
527540

528-
if myqdevice.device_json["state"].get("last_update") is not None and \
529-
myqdevice.device_json["state"].get("last_update") != last_update:
541+
if (
542+
myqdevice.device_json["state"].get("last_update") is not None
543+
and myqdevice.device_json["state"].get("last_update")
544+
!= last_update
545+
):
530546
# MyQ has updated device state, reset ours ensuring we have the one from MyQ.
531547
myqdevice.state = None
532-
_LOGGER.debug(f"State for device {myqdevice.name} was updated to {myqdevice.state}")
548+
_LOGGER.debug(
549+
f"State for device {myqdevice.name} was updated to {myqdevice.state}"
550+
)
533551

534552
myqdevice.state_update = state_update_timestmp
535553
else:
536554
if device.get("device_family") == DEVICE_FAMILY_GARAGEDOOR:
537-
_LOGGER.debug(f"Adding new garage door with serial number {serial_number}")
555+
_LOGGER.debug(
556+
f"Adding new garage door with serial number {serial_number}"
557+
)
538558
self.devices[serial_number] = MyQGaragedoor(
539559
api=self,
540560
account=account,
541561
device_json=device,
542562
state_update=state_update_timestmp,
543563
)
544564
elif device.get("device_family") == DEVICE_FAMLY_LAMP:
545-
_LOGGER.debug(f"Adding new lamp with serial number {serial_number}")
565+
_LOGGER.debug(
566+
f"Adding new lamp with serial number {serial_number}"
567+
)
546568
self.devices[serial_number] = MyQLamp(
547569
api=self,
548570
account=account,
549571
device_json=device,
550572
state_update=state_update_timestmp,
551573
)
552574
elif device.get("device_family") == DEVICE_FAMILY_GATEWAY:
553-
_LOGGER.debug(f"Adding new gateway with serial number {serial_number}")
575+
_LOGGER.debug(
576+
f"Adding new gateway with serial number {serial_number}"
577+
)
554578
self.devices[serial_number] = MyQDevice(
555579
api=self,
556580
account=account,
557581
device_json=device,
558582
state_update=state_update_timestmp,
559583
)
560584
else:
561-
_LOGGER.warning(f"Unknown device family {device.get('device_family')}")
585+
_LOGGER.warning(
586+
f"Unknown device family {device.get('device_family')}"
587+
)
562588
else:
563589
_LOGGER.debug(f"No devices found for account {self.accounts[account]}")
564590

@@ -597,10 +623,12 @@ async def update_device_info(self, for_account: str = None) -> None:
597623
# Request is for specific account, thus restrict retrieval to the 1 account.
598624
if self.accounts.get(for_account) is None:
599625
# Checking to ensure we know the account, but this should never happen.
600-
_LOGGER.debug(f"Unable to perform update request for account {for_account} as it is not known.")
626+
_LOGGER.debug(
627+
f"Unable to perform update request for account {for_account} as it is not known."
628+
)
601629
accounts = {}
602630
else:
603-
accounts = ({for_account: self.accounts.get(for_account)})
631+
accounts = {for_account: self.accounts.get(for_account)}
604632

605633
for account in accounts:
606634
await self._get_devices_for_account(account=account)
@@ -619,7 +647,9 @@ async def login(username: str, password: str, websession: ClientSession = None)
619647
try:
620648
await api.authenticate(wait=True)
621649
except InvalidCredentialsError as err:
622-
_LOGGER.error(f"Username and/or password are invalid. Update username/password.")
650+
_LOGGER.error(
651+
f"Username and/or password are invalid. Update username/password."
652+
)
623653
raise err
624654
except AuthenticationError as err:
625655
_LOGGER.error(f"Authentication failed: {str(err)}")

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
-i https://pypi.python.org/simple
22
aiohttp>=3.7
3+
beautifulsoup4>=4.9.3
34
pkce>=1.0.2

requirements_dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
-i https://pypi.python.org/simple
22
aiohttp>=3.7
3+
beautifulsoup4>=4.9.3
34
black==20.8b1
45
flake8>=3.8.4
56
pkce>=1.0.2

0 commit comments

Comments
 (0)