@@ -174,7 +174,7 @@ def parse_file(html_file):
174174 return HtmlFile (html_file )
175175
176176def html_name_from_markdown (filename ):
177- md_pattern = re .compile ("\.md$" , re .IGNORECASE )
177+ md_pattern = re .compile (r "\.md$" , re .IGNORECASE )
178178 return md_pattern .sub ('.html' , filename )
179179
180180def create_html (markdown_file ):
@@ -193,23 +193,33 @@ def create_html(markdown_file):
193193 )
194194 return process
195195
196+ def issue_request (method , url , ** kwargs ):
197+ retries = 0
198+
199+ while retries < 3 :
200+ r = method (url , ** kwargs )
201+ if r .status_code == 429 :
202+ retry_after = int (r .headers .get ("Retry-After" , 60 ))
203+ time .sleep (retry_after )
204+ retries += 1
205+ else :
206+ return r
207+ raise Exception ("Max retries exceeded" )
208+
196209def access_url (url ):
197210 global http_headers
198211 status = ''
199212 is_broken = False
200213 try_with_trusted_ca_bundle = False
201214
202215 try :
203- r = requests .head ( url , allow_redirects = True , headers = http_headers )
216+ r = issue_request ( requests .head , url , allow_redirects = True , headers = http_headers )
204217 # Some sites may return 404 for head but not get, e.g.
205218 # https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
206219 if r .status_code >= 400 :
207220 # Allow redirects is already enabled by default for GET.
208- r = requests .get (url , headers = http_headers )
209- # It's likely we will run into GitHub's rate-limiting if there are many links.
210- if r .status_code == 429 :
211- time .sleep (int (r .headers ['Retry-After' ]))
212- r = requests .head (url , allow_redirects = True )
221+ r = issue_request (requests .get , url , headers = http_headers )
222+
213223 if r .status_code >= 400 :
214224 is_broken = True
215225 status = r .status_code
@@ -223,16 +233,13 @@ def access_url(url):
223233
224234 if try_with_trusted_ca_bundle == True :
225235 try :
226- r = requests .head ( url , allow_redirects = True , headers = http_headers , verify = TRUSTED_CA_BUNDLE )
236+ r = issue_request ( requests .head , url , allow_redirects = True , headers = http_headers , verify = TRUSTED_CA_BUNDLE )
227237 # Some sites may return 404 for head but not get, e.g.
228238 # https://tls.mbed.org/kb/development/thread-safety-and-multi-threading
229239 if r .status_code >= 400 :
230240 # Allow redirects is already enabled by default for GET.
231- r = requests .get (url , headers = http_headers , verify = TRUSTED_CA_BUNDLE )
232- # It's likely we will run into GitHub's rate-limiting if there are many links.
233- if r .status_code == 429 :
234- time .sleep (int (r .headers ['Retry-After' ]))
235- r = requests .head (url , allow_redirects = True , verify = TRUSTED_CA_BUNDLE )
241+ r = issue_request (requests .get , url , headers = http_headers , verify = TRUSTED_CA_BUNDLE )
242+
236243 if r .status_code >= 400 :
237244 is_broken = True
238245 status = r .status_code
0 commit comments