1
- # type: ignore
2
1
# Copyright (c) 2024 iiPython
3
2
4
3
# Modules
5
4
from datetime import datetime
6
5
from dataclasses import dataclass
7
6
8
7
from requests import Session
9
- from bs4 import BeautifulSoup
8
+ from bs4 import BeautifulSoup , Tag
10
9
from rich .status import Status
11
10
12
11
from seleniumwire import webdriver
21
20
class NonExistentPackage (Exception ):
22
21
pass
23
22
23
+ class MissingElement (Exception ):
24
+ pass
25
+
26
+ class InvalidElementType (Exception ):
27
+ pass
28
+
29
+ class NoTextInElement (Exception ):
30
+ pass
31
+
24
32
# Typing
25
33
@dataclass
26
34
class Step :
@@ -35,7 +43,8 @@ class Package:
35
43
state : str
36
44
steps : list [Step ]
37
45
38
- # Mappings
46
+ # Constants
47
+ USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0"
39
48
USPS_STEP_DETAIL_MAPPING = {
40
49
"usps picked up item" : "Picked Up" ,
41
50
"usps awaiting item" : "Awaiting Item" ,
@@ -53,16 +62,25 @@ class Package:
53
62
"departed usps facility" : "Left Facility"
54
63
}
55
64
65
+ # BS4 wrappers
66
+ def get_text (element : Tag | None = None , alt : bool = False ) -> str :
67
+ if element is None :
68
+ raise MissingElement
69
+
70
+ if alt is True :
71
+ text = element .find (text = True , recursive = False )
72
+ if text is None :
73
+ raise NoTextInElement
74
+
75
+ return str (text )
76
+
77
+ return element .text
78
+
56
79
# Main class
57
80
class USPSTracking :
58
81
def __init__ (self ) -> None :
59
82
self .session = Session ()
60
- self .headers , self .cookies = {}, {}
61
-
62
- # Fetch existing security data
63
- security_data = security .load ()
64
- if security_data :
65
- self .headers , self .cookies = security_data ["headers" ], security_data ["cookies" ]
83
+ self .cookies = security .load () or {}
66
84
67
85
@staticmethod
68
86
def __map_step_details (details : str ) -> str :
@@ -90,13 +108,7 @@ def __generate_security(self, url: str) -> str:
90
108
WebDriverWait (instance , 5 ).until (
91
109
expected_conditions .presence_of_element_located ((By .CLASS_NAME , "tracking-number" ))
92
110
)
93
- for request in instance .requests :
94
- if request .url == url :
95
- self .headers = request .headers
96
- self .cookies = {c ["name" ]: c ["value" ] for c in instance .get_cookies ()}
97
- security .save ({"headers" : dict (self .headers ), "cookies" : self .cookies })
98
- break
99
-
111
+ security .save ({c ["name" ]: c ["value" ] for c in instance .get_cookies ()})
100
112
html = instance .page_source # This saves us a request
101
113
instance .quit ()
102
114
return html
@@ -107,59 +119,61 @@ def track_package(self, tracking_number: str) -> Package:
107
119
# Load data from page
108
120
if not self .cookies :
109
121
110
- # Handle generating cookies / headers
122
+ # Handle generating cookies
111
123
page = BeautifulSoup (self .__generate_security (url ), "html.parser" )
112
124
113
125
else :
114
126
page = BeautifulSoup (
115
- self .session .get (url , cookies = self .cookies , headers = self . headers ).text ,
127
+ self .session .get (url , cookies = self .cookies , headers = { "User-Agent" : USER_AGENT } ).text ,
116
128
"html.parser"
117
129
)
118
130
if "originalHeaders" in str (page ):
119
131
page = BeautifulSoup (self .__generate_security (url ), "html.parser" )
120
132
133
+ # Handle element searching
134
+ def find_object (class_name : str , parent : Tag | None = None ) -> Tag | None :
135
+ element = (parent or page ).find (attrs = {"class" : class_name })
136
+ if element is None :
137
+ return element
138
+
139
+ if not isinstance (element , Tag ):
140
+ raise InvalidElementType (class_name )
141
+
142
+ return element
143
+
121
144
# Check header for possible issues
122
- if page . find ( attrs = { "class" : " red-banner"} ):
145
+ if find_object ( " red-banner" ):
123
146
raise NonExistentPackage
124
147
125
148
# Start fetching data
126
- has_delivery_date = page . find ( attrs = { "class" : " day"} )
149
+ has_delivery_date = find_object ( " day" )
127
150
month , year = "" , ""
128
151
if has_delivery_date :
129
- month , year = page . find ( attrs = { "class" : " month_year"}). text .split ("\n " )[0 ].strip ().split (" " )
152
+ month , year = get_text ( find_object ( " month_year")) .split ("\n " )[0 ].strip ().split (" " )
130
153
131
154
# Handle fetching the current step
132
- external_shipment = page .find (attrs = {"class" : "preshipment-status" })
133
- if not external_shipment :
134
-
135
- # Catch services like Amazon, where the status is still not in the element
136
- # like it is with normal in-network packages.
137
- external_shipment = page .find (attrs = {"class" : "shipping-partner-status" })
138
-
139
- # If this is an external shipment, check OUTSIDE the element to find the status.
140
- if external_shipment :
141
- current_step = external_shipment .find (attrs = {"class" : "tb-status" }).text
155
+ if find_object ("preshipment-status" ) or find_object ("shipping-partner-status" ):
156
+ current_step = get_text (find_object ("tb-status" ))
142
157
143
158
else :
144
- current_step = page . find ( attrs = { "class" : "current-step" }). find ( attrs = { "class" : "tb-status" }). text
159
+ current_step = get_text ( find_object ( "tb-status" , find_object ( "current-step" )))
145
160
146
161
# Figure out delivery times
147
- times = page .find (attrs = {"class" : "time" }).find (text = True , recursive = False ).split (" and " ) \
148
- if has_delivery_date else []
162
+ times = get_text (find_object ("time" ), alt = True ).split (" and " ) if has_delivery_date else []
149
163
150
164
# Fetch steps
151
165
steps = []
152
166
for step in page .find_all (attrs = {"class" : "tb-step" }):
153
167
if "toggle-history-container" not in step ["class" ]:
154
- location = step . find ( attrs = { "class" : " tb-location"} )
168
+ location = find_object ( " tb-location", step )
155
169
if location is not None :
156
- location = location . text .strip ()
170
+ location = get_text ( location ) .strip ()
157
171
158
172
steps .append (Step (
159
- self .__map_step_details (step . find ( attrs = { "class" : " tb-status-detail"}). text ),
173
+ self .__map_step_details (get_text ( find_object ( " tb-status-detail", step )) ),
160
174
location or "UNKNOWN LOCATION" ,
161
175
datetime .strptime (
162
- self .__sanitize (step . find ( attrs = { "class" : " tb-date"}). text ),
176
+ self .__sanitize (get_text ( find_object ( " tb-date", step )) ),
163
177
"%B %d, %Y, %I:%M %p"
164
178
)
165
179
))
@@ -170,14 +184,14 @@ def track_package(self, tracking_number: str) -> Package:
170
184
# Estimated delivery
171
185
[
172
186
datetime .strptime (
173
- f"{ page . find ( attrs = { 'class' : ' date'}). text .zfill (2 )} { month } { year } { time } " ,
187
+ f"{ get_text ( find_object ( ' date')) .zfill (2 )} { month } { year } { time } " ,
174
188
"%d %B %Y %I:%M%p"
175
189
)
176
190
for time in times
177
191
] if has_delivery_date else None ,
178
192
179
193
# Last status "banner"
180
- page . find ( attrs = { "class" : " banner-content"}). text .strip (),
194
+ get_text ( find_object ( " banner-content")) .strip (),
181
195
182
196
# Current state based on current step
183
197
current_step ,
0 commit comments