Skip to content

Commit f2dce80

Browse files
authored
Merge pull request #37 from scrapinghub/organize-page-inputs
reorganize page_inputs.py as a submodule; move HttpClient to it
2 parents 025f5b1 + a4f1dcc commit f2dce80

File tree

7 files changed

+152
-141
lines changed

7 files changed

+152
-141
lines changed

tests/test_requests.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33
import pytest
44
from web_poet.exceptions import RequestBackendError
55
from web_poet.page_inputs import (
6+
HttpClient,
67
HttpRequest,
78
HttpResponse,
89
HttpRequestBody,
910
HttpRequestHeaders
1011
)
11-
from web_poet.requests import (
12-
HttpClient,
13-
request_backend_var,
14-
)
12+
from web_poet.requests import request_backend_var
1513

1614

1715
@pytest.fixture
@@ -47,7 +45,7 @@ async def test_perform_request_from_httpclient(async_mock):
4745
async def test_http_client_single_requests(async_mock):
4846
client = HttpClient(async_mock)
4947

50-
with mock.patch("web_poet.requests.HttpRequest") as mock_request:
48+
with mock.patch("web_poet.page_inputs.client.HttpRequest") as mock_request:
5149
response = await client.request("url")
5250
response.url == "url"
5351

web_poet/__init__.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
from .pages import WebPage, ItemPage, ItemWebPage, Injectable
2-
from .requests import (
3-
request_backend_var,
4-
HttpClient,
5-
)
2+
from .requests import request_backend_var
63
from .page_inputs import (
74
Meta,
5+
HttpClient,
86
HttpRequest,
97
HttpResponse,
108
HttpRequestHeaders,

web_poet/page_inputs/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .meta import Meta
2+
from .client import HttpClient
3+
from .http import (
4+
HttpRequest,
5+
HttpResponse,
6+
HttpRequestHeaders,
7+
HttpResponseHeaders,
8+
HttpRequestBody,
9+
HttpResponseBody,
10+
)

web_poet/page_inputs/client.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
"""This module has a full support for :mod:`asyncio` that enables developers to
2+
perform asynchronous additional requests inside of Page Objects.
3+
4+
Note that the implementation to fully execute any :class:`~.Request` is not
5+
handled in this module. With that, the framework using **web-poet** must supply
6+
the implementation.
7+
8+
You can read more about this in the :ref:`advanced-downloader-impl` documentation.
9+
"""
10+
11+
import asyncio
12+
from typing import Optional, Dict, List, Union, Callable
13+
14+
from web_poet.requests import request_backend_var, _perform_request
15+
from web_poet.page_inputs.http import (
16+
HttpRequest,
17+
HttpRequestHeaders,
18+
HttpRequestBody,
19+
HttpResponse,
20+
)
21+
22+
23+
_StrMapping = Dict[str, str]
24+
_Headers = Union[_StrMapping, HttpRequestHeaders]
25+
_Body = Union[bytes, HttpRequestBody]
26+
27+
28+
class HttpClient:
29+
"""A convenient client to easily execute requests.
30+
31+
By default, it uses the request implementation assigned in the
32+
``web_poet.request_backend_var`` which is a :mod:`contextvars` instance to
33+
download the actual requests. However, it can easily be overridable by
34+
providing an optional ``request_downloader`` callable.
35+
36+
Providing the request implementation by dependency injection would be a good
37+
alternative solution when you want to avoid setting up :mod:`contextvars`
38+
like ``web_poet.request_backend_var``.
39+
40+
In any case, this doesn't contain any implementation about how to execute
41+
any requests fed into it. When setting that up, make sure that the downloader
42+
implementation returns a :class:`~.HttpResponse` instance.
43+
"""
44+
45+
def __init__(self, request_downloader: Callable = None):
46+
self._request_downloader = request_downloader or _perform_request
47+
48+
async def request(
49+
self,
50+
url: str,
51+
*,
52+
method: str = "GET",
53+
headers: Optional[_Headers] = None,
54+
body: Optional[_Body] = None,
55+
) -> HttpResponse:
56+
"""This is a shortcut for creating a :class:`~.HttpRequest` instance and executing
57+
that request.
58+
59+
A :class:`~.HttpResponse` instance should then be returned.
60+
61+
.. warning::
62+
By convention, the request implementation supplied optionally to
63+
:class:`~.HttpClient` should return a :class:`~.HttpResponse` instance.
64+
However, the underlying implementation supplied might change that,
65+
depending on how the framework using **web-poet** implements it.
66+
"""
67+
headers = headers or {}
68+
body = body or b""
69+
req = HttpRequest(url=url, method=method, headers=headers, body=body)
70+
return await self.execute(req)
71+
72+
async def get(
73+
self, url: str, *, headers: Optional[_Headers] = None
74+
) -> HttpResponse:
75+
"""Similar to :meth:`~.HttpClient.request` but peforming a ``GET``
76+
request.
77+
"""
78+
return await self.request(url=url, method="GET", headers=headers)
79+
80+
async def post(
81+
self,
82+
url: str,
83+
*,
84+
headers: Optional[_Headers] = None,
85+
body: Optional[_Body] = None,
86+
) -> HttpResponse:
87+
"""Similar to :meth:`~.HttpClient.request` but performing a ``POST``
88+
request.
89+
"""
90+
return await self.request(url=url, method="POST", headers=headers, body=body)
91+
92+
async def execute(self, request: HttpRequest) -> HttpResponse:
93+
"""Accepts a single instance of :class:`~.HttpRequest` and executes it
94+
using the request implementation configured in the :class:`~.HttpClient`
95+
instance.
96+
97+
This returns a single :class:`~.HttpResponse`.
98+
"""
99+
return await self._request_downloader(request)
100+
101+
async def batch_execute(
102+
self, *requests: HttpRequest, return_exceptions: bool = False
103+
) -> List[Union[HttpResponse, Exception]]:
104+
"""Similar to :meth:`~.HttpClient.execute` but accepts a collection of
105+
:class:`~.HttpRequest` instances that would be batch executed.
106+
107+
The order of the :class:`~.HttpResponses` would correspond to the order
108+
of :class:`~.HttpRequest` passed.
109+
110+
If any of the :class:`~.HttpRequest` raises an exception upon execution,
111+
the exception is raised.
112+
113+
To prevent this, the actual exception can be returned alongside any
114+
successful :class:`~.HttpResponse`. This enables salvaging any usable
115+
responses despite any possible failures. This can be done by setting
116+
``True`` to the ``return_exceptions`` parameter.
117+
"""
118+
119+
coroutines = [self._request_downloader(r) for r in requests]
120+
responses = await asyncio.gather(
121+
*coroutines, return_exceptions=return_exceptions
122+
)
123+
return responses

web_poet/page_inputs.py renamed to web_poet/page_inputs/http.py

+3-12
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
from web_poet.utils import memoizemethod_noargs
1515

1616
T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
17-
AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
17+
18+
_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
1819

1920

2021
class HttpRequestBody(bytes):
@@ -99,7 +100,7 @@ class HttpResponseHeaders(_HttpHeaders):
99100

100101
@classmethod
101102
def from_bytes_dict(
102-
cls: Type[T_headers], arg: AnyStrDict, encoding: str = "utf-8"
103+
cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
103104
) -> T_headers:
104105
"""An alternative constructor for instantiation where the header-value
105106
pairs could be in raw bytes form.
@@ -270,13 +271,3 @@ def _auto_detect_fun(self, body: bytes) -> Optional[str]:
270271
except UnicodeError:
271272
continue
272273
return resolve_encoding(enc)
273-
274-
275-
class Meta(dict):
276-
"""Container class that could contain any arbitrary data to be passed into
277-
a Page Object.
278-
279-
Note that this is simply a subclass of Python's ``dict``.
280-
"""
281-
282-
pass

web_poet/page_inputs/meta.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
class Meta(dict):
2+
"""Container class that could contain any arbitrary data to be passed into
3+
a Page Object.
4+
5+
Note that this is simply a subclass of Python's ``dict``.
6+
"""
7+
8+
pass

web_poet/requests.py

+3-120
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,14 @@
1-
"""This module has a full support for :mod:`asyncio` that enables developers to
2-
perform asynchronous additional requests inside of Page Objects.
3-
4-
Note that the implementation to fully execute any :class:`~.Request` is not
5-
handled in this module. With that, the framework using **web-poet** must supply
6-
the implementation.
7-
8-
You can read more about this in the :ref:`advanced-downloader-impl` documentation.
9-
"""
10-
11-
import asyncio
121
import logging
132
from contextvars import ContextVar
14-
from typing import Optional, List, Callable, Union, Dict
15-
16-
import attrs
173

18-
from web_poet.page_inputs import (
19-
HttpResponse,
4+
from web_poet.exceptions import RequestBackendError
5+
from web_poet.page_inputs.http import (
206
HttpRequest,
21-
HttpRequestHeaders,
22-
HttpRequestBody,
7+
HttpResponse,
238
)
24-
from web_poet.exceptions import RequestBackendError
259

2610
logger = logging.getLogger(__name__)
2711

28-
_StrMapping = Dict[str, str]
29-
_Headers = Union[_StrMapping, HttpRequestHeaders]
30-
_Body = Union[bytes, HttpRequestBody]
31-
32-
3312
# Frameworks that wants to support additional requests in ``web-poet`` should
3413
# set the appropriate implementation for requesting data.
3514
request_backend_var: ContextVar = ContextVar("request_backend")
@@ -60,99 +39,3 @@ async def _perform_request(request: HttpRequest) -> HttpResponse:
6039

6140
response_data: HttpResponse = await request_backend(request)
6241
return response_data
63-
64-
65-
class HttpClient:
66-
"""A convenient client to easily execute requests.
67-
68-
By default, it uses the request implementation assigned in the
69-
``web_poet.request_backend_var`` which is a :mod:`contextvars` instance to
70-
download the actual requests. However, it can easily be overridable by
71-
providing an optional ``request_downloader`` callable.
72-
73-
Providing the request implementation by dependency injection would be a good
74-
alternative solution when you want to avoid setting up :mod:`contextvars`
75-
like ``web_poet.request_backend_var``.
76-
77-
In any case, this doesn't contain any implementation about how to execute
78-
any requests fed into it. When setting that up, make sure that the downloader
79-
implementation returns a :class:`~.HttpResponse` instance.
80-
"""
81-
82-
def __init__(self, request_downloader: Callable = None):
83-
self._request_downloader = request_downloader or _perform_request
84-
85-
async def request(
86-
self,
87-
url: str,
88-
*,
89-
method: str = "GET",
90-
headers: Optional[_Headers] = None,
91-
body: Optional[_Body] = None,
92-
) -> HttpResponse:
93-
"""This is a shortcut for creating a :class:`~.HttpRequest` instance and executing
94-
that request.
95-
96-
A :class:`~.HttpResponse` instance should then be returned.
97-
98-
.. warning::
99-
By convention, the request implementation supplied optionally to
100-
:class:`~.HttpClient` should return a :class:`~.HttpResponse` instance.
101-
However, the underlying implementation supplied might change that,
102-
depending on how the framework using **web-poet** implements it.
103-
"""
104-
headers = headers or {}
105-
body = body or b""
106-
req = HttpRequest(url=url, method=method, headers=headers, body=body)
107-
return await self.execute(req)
108-
109-
async def get(self, url: str, *, headers: Optional[_Headers] = None) -> HttpResponse:
110-
"""Similar to :meth:`~.HttpClient.request` but peforming a ``GET``
111-
request.
112-
"""
113-
return await self.request(url=url, method="GET", headers=headers)
114-
115-
async def post(
116-
self,
117-
url: str,
118-
*,
119-
headers: Optional[_Headers] = None,
120-
body: Optional[_Body] = None,
121-
) -> HttpResponse:
122-
"""Similar to :meth:`~.HttpClient.request` but performing a ``POST``
123-
request.
124-
"""
125-
return await self.request(url=url, method="POST", headers=headers, body=body)
126-
127-
async def execute(self, request: HttpRequest) -> HttpResponse:
128-
"""Accepts a single instance of :class:`~.HttpRequest` and executes it
129-
using the request implementation configured in the :class:`~.HttpClient`
130-
instance.
131-
132-
This returns a single :class:`~.HttpResponse`.
133-
"""
134-
return await self._request_downloader(request)
135-
136-
async def batch_execute(
137-
self, *requests: HttpRequest, return_exceptions: bool = False
138-
) -> List[Union[HttpResponse, Exception]]:
139-
"""Similar to :meth:`~.HttpClient.execute` but accepts a collection of
140-
:class:`~.HttpRequest` instances that would be batch executed.
141-
142-
The order of the :class:`~.HttpResponses` would correspond to the order
143-
of :class:`~.HttpRequest` passed.
144-
145-
If any of the :class:`~.HttpRequest` raises an exception upon execution,
146-
the exception is raised.
147-
148-
To prevent this, the actual exception can be returned alongside any
149-
successful :class:`~.HttpResponse`. This enables salvaging any usable
150-
responses despite any possible failures. This can be done by setting
151-
``True`` to the ``return_exceptions`` parameter.
152-
"""
153-
154-
coroutines = [self._request_downloader(r) for r in requests]
155-
responses = await asyncio.gather(
156-
*coroutines, return_exceptions=return_exceptions
157-
)
158-
return responses

0 commit comments

Comments
 (0)