Skip to content

Commit

Permalink
Extract url_fetcher into URLFetchHandler and add state testing
Browse files Browse the repository at this point in the history
  • Loading branch information
mormahr committed Feb 26, 2021
1 parent 5b4ffbe commit 25ec245
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 18 deletions.
45 changes: 45 additions & 0 deletions pdf_service/URLFetchHandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import werkzeug
from sentry_sdk import add_breadcrumb

from .errors import ForbiddenURLFetchError, URLFetcherCalledAfterExitException


class URLFetchHandler:
"""
Implements an url_fetcher for WeasyPrint.
Normally WeasyPrint will swallow any url fetch errors and demote them to warning.
This implementation keeps track of thrown errors and throws an exception if any occured.
It's important to note that the `url_fetcher` is stored by HTML and will then be used by the
`.render` method, so the render call has to be inside the with, too.
:raise: werkzeug.exceptions.Forbidden() if a forbidden URL was requested.
:example:
>>> from weasyprint import HTML
>>>
>>> with URLFetchHandler() as url_fetcher
>>> html = HTML(string=html_string, url_fetcher=url_fetcher)
>>> doc = html.render()
"""

def __init__(self):
self.url_errors = []
self.closed = False

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.closed = True
if len(self.url_errors) != 0:
raise werkzeug.exceptions.Forbidden()

def __call__(self, url):
if self.closed:
raise URLFetcherCalledAfterExitException()

error = ForbiddenURLFetchError(url)
add_breadcrumb(message="Refused to fetch URL (%s)" % url)
self.url_errors.append(error)
raise error
25 changes: 7 additions & 18 deletions pdf_service/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from sentry_sdk import init, start_span, set_context, add_breadcrumb, set_tag
from sentry_sdk import init, start_span, set_context, set_tag
from flask import Flask, request, make_response
from weasyprint import HTML
from sentry_sdk.integrations.flask import FlaskIntegration
import os
import werkzeug

from .URLFetchHandler import URLFetchHandler
from .errors import ForbiddenURLFetchError


Expand All @@ -29,23 +29,12 @@ def generate_pdf():
with start_span(op='decode'):
data = request.get_data(as_text=True)

url_errors = []
with URLFetchHandler() as url_fetcher:
with start_span(op='parse'):
html = HTML(string=data, url_fetcher=url_fetcher)

def url_fetcher(url):
error = ForbiddenURLFetchError(url)
add_breadcrumb(message="Refused to fetch URL (%s)" % url)
url_errors.append(error)
raise error

with start_span(op='parse'):
# noinspection PyTypeChecker
html = HTML(string=data, url_fetcher=url_fetcher)

with start_span(op='render'):
doc = html.render()

if len(url_errors) != 0:
raise werkzeug.exceptions.Forbidden()
with start_span(op='render'):
doc = html.render()

with start_span(op='write-pdf'):
pdf = doc.write_pdf()
Expand Down
5 changes: 5 additions & 0 deletions pdf_service/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@ class ForbiddenURLFetchError(werkzeug.exceptions.HTTPException):

def __init__(self, url):
self.message = "Attempted to fetch forbidden url (%r)" % url


class URLFetcherCalledAfterExitException(Exception):
def __init__(self):
self.message = "Called URLFetchCather after it was closed."
25 changes: 25 additions & 0 deletions tests/test_url_fetch_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pytest
import werkzeug

from pdf_service import URLFetchHandler
from pdf_service.errors import URLFetcherCalledAfterExitException


def test_exits_without_throwing_if_fetcher_isnt_called():
with URLFetchHandler():
pass


def test_throws_if_fetcher_throws():
with pytest.raises(werkzeug.exceptions.Forbidden):
with URLFetchHandler() as url_fetcher:
url_fetcher("https://example.com/test.png")


def test_throws_when_called_after_exit():
handler = URLFetchHandler()
with handler:
pass

with pytest.raises(URLFetcherCalledAfterExitException):
handler("https://example.com/test.png")

0 comments on commit 25ec245

Please sign in to comment.