Skip to content

Commit ed792dd

Browse files
committed
major simplifications and safety improvements
1 parent debf692 commit ed792dd

File tree

3 files changed

+56
-42
lines changed

3 files changed

+56
-42
lines changed

usps/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.6.1"
1+
__version__ = "0.7.0"

usps/storage.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def load(self) -> dict[str, str]:
3636

3737
return json.loads(self.security_file.read_text())
3838

39-
def save(self, _security: dict[str, typing.Any]) -> None:
39+
def save(self, _security: dict[str, str]) -> None:
4040
self.security_file.write_text(json.dumps(_security, indent = 4))
4141

4242
security = SecurityStorage()

usps/tracking.py

+54-40
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
# type: ignore
21
# Copyright (c) 2024 iiPython
32

43
# Modules
54
from datetime import datetime
65
from dataclasses import dataclass
76

87
from requests import Session
9-
from bs4 import BeautifulSoup
8+
from bs4 import BeautifulSoup, Tag
109
from rich.status import Status
1110

1211
from seleniumwire import webdriver
@@ -21,6 +20,15 @@
2120
class NonExistentPackage(Exception):
2221
pass
2322

23+
class MissingElement(Exception):
24+
pass
25+
26+
class InvalidElementType(Exception):
27+
pass
28+
29+
class NoTextInElement(Exception):
30+
pass
31+
2432
# Typing
2533
@dataclass
2634
class Step:
@@ -35,7 +43,8 @@ class Package:
3543
state: str
3644
steps: list[Step]
3745

38-
# Mappings
46+
# Constants
47+
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0"
3948
USPS_STEP_DETAIL_MAPPING = {
4049
"usps picked up item": "Picked Up",
4150
"usps awaiting item": "Awaiting Item",
@@ -53,16 +62,25 @@ class Package:
5362
"departed usps facility": "Left Facility"
5463
}
5564

65+
# BS4 wrappers
66+
def get_text(element: Tag | None = None, alt: bool = False) -> str:
67+
if element is None:
68+
raise MissingElement
69+
70+
if alt is True:
71+
text = element.find(text = True, recursive = False)
72+
if text is None:
73+
raise NoTextInElement
74+
75+
return str(text)
76+
77+
return element.text
78+
5679
# Main class
5780
class USPSTracking:
5881
def __init__(self) -> None:
5982
self.session = Session()
60-
self.headers, self.cookies = {}, {}
61-
62-
# Fetch existing security data
63-
security_data = security.load()
64-
if security_data:
65-
self.headers, self.cookies = security_data["headers"], security_data["cookies"]
83+
self.cookies = security.load() or {}
6684

6785
@staticmethod
6886
def __map_step_details(details: str) -> str:
@@ -90,13 +108,7 @@ def __generate_security(self, url: str) -> str:
90108
WebDriverWait(instance, 5).until(
91109
expected_conditions.presence_of_element_located((By.CLASS_NAME, "tracking-number"))
92110
)
93-
for request in instance.requests:
94-
if request.url == url:
95-
self.headers = request.headers
96-
self.cookies = {c["name"]: c["value"] for c in instance.get_cookies()}
97-
security.save({"headers": dict(self.headers), "cookies": self.cookies})
98-
break
99-
111+
security.save({c["name"]: c["value"] for c in instance.get_cookies()})
100112
html = instance.page_source # This saves us a request
101113
instance.quit()
102114
return html
@@ -107,59 +119,61 @@ def track_package(self, tracking_number: str) -> Package:
107119
# Load data from page
108120
if not self.cookies:
109121

110-
# Handle generating cookies / headers
122+
# Handle generating cookies
111123
page = BeautifulSoup(self.__generate_security(url), "html.parser")
112124

113125
else:
114126
page = BeautifulSoup(
115-
self.session.get(url, cookies = self.cookies, headers = self.headers).text,
127+
self.session.get(url, cookies = self.cookies, headers = {"User-Agent": USER_AGENT}).text,
116128
"html.parser"
117129
)
118130
if "originalHeaders" in str(page):
119131
page = BeautifulSoup(self.__generate_security(url), "html.parser")
120132

133+
# Handle element searching
134+
def find_object(class_name: str, parent: Tag | None = None) -> Tag | None:
135+
element = (parent or page).find(attrs = {"class": class_name})
136+
if element is None:
137+
return element
138+
139+
if not isinstance(element, Tag):
140+
raise InvalidElementType(class_name)
141+
142+
return element
143+
121144
# Check header for possible issues
122-
if page.find(attrs = {"class": "red-banner"}):
145+
if find_object("red-banner"):
123146
raise NonExistentPackage
124147

125148
# Start fetching data
126-
has_delivery_date = page.find(attrs = {"class": "day"})
149+
has_delivery_date = find_object("day")
127150
month, year = "", ""
128151
if has_delivery_date:
129-
month, year = page.find(attrs = {"class": "month_year"}).text.split("\n")[0].strip().split(" ")
152+
month, year = get_text(find_object("month_year")).split("\n")[0].strip().split(" ")
130153

131154
# Handle fetching the current step
132-
external_shipment = page.find(attrs = {"class": "preshipment-status"})
133-
if not external_shipment:
134-
135-
# Catch services like Amazon, where the status is still not in the element
136-
# like it is with normal in-network packages.
137-
external_shipment = page.find(attrs = {"class": "shipping-partner-status"})
138-
139-
# If this is an external shipment, check OUTSIDE the element to find the status.
140-
if external_shipment:
141-
current_step = external_shipment.find(attrs = {"class": "tb-status"}).text
155+
if find_object("preshipment-status") or find_object("shipping-partner-status"):
156+
current_step = get_text(find_object("tb-status"))
142157

143158
else:
144-
current_step = page.find(attrs = {"class": "current-step"}).find(attrs = {"class": "tb-status"}).text
159+
current_step = get_text(find_object("tb-status", find_object("current-step")))
145160

146161
# Figure out delivery times
147-
times = page.find(attrs = {"class": "time"}).find(text = True, recursive = False).split(" and ") \
148-
if has_delivery_date else []
162+
times = get_text(find_object("time"), alt = True).split(" and ") if has_delivery_date else []
149163

150164
# Fetch steps
151165
steps = []
152166
for step in page.find_all(attrs = {"class": "tb-step"}):
153167
if "toggle-history-container" not in step["class"]:
154-
location = step.find(attrs = {"class": "tb-location"})
168+
location = find_object("tb-location", step)
155169
if location is not None:
156-
location = location.text.strip()
170+
location = get_text(location).strip()
157171

158172
steps.append(Step(
159-
self.__map_step_details(step.find(attrs = {"class": "tb-status-detail"}).text),
173+
self.__map_step_details(get_text(find_object("tb-status-detail", step))),
160174
location or "UNKNOWN LOCATION",
161175
datetime.strptime(
162-
self.__sanitize(step.find(attrs = {"class": "tb-date"}).text),
176+
self.__sanitize(get_text(find_object("tb-date", step))),
163177
"%B %d, %Y, %I:%M %p"
164178
)
165179
))
@@ -170,14 +184,14 @@ def track_package(self, tracking_number: str) -> Package:
170184
# Estimated delivery
171185
[
172186
datetime.strptime(
173-
f"{page.find(attrs = {'class': 'date'}).text.zfill(2)} {month} {year} {time}",
187+
f"{get_text(find_object('date')).zfill(2)} {month} {year} {time}",
174188
"%d %B %Y %I:%M%p"
175189
)
176190
for time in times
177191
] if has_delivery_date else None,
178192

179193
# Last status "banner"
180-
page.find(attrs = {"class": "banner-content"}).text.strip(),
194+
get_text(find_object("banner-content")).strip(),
181195

182196
# Current state based on current step
183197
current_step,

0 commit comments

Comments
 (0)