Skip to content

Commit

Permalink
feat/1495 rest_client: renames JSONResponsePaginator to JSONLinkPagin…
Browse files Browse the repository at this point in the history
…ator (#1558)
  • Loading branch information
willi-mueller authored Jul 18, 2024
1 parent 706500e commit 04a7b0e
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 47 deletions.
6 changes: 3 additions & 3 deletions dlt/sources/helpers/rest_client/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .paginators import (
BasePaginator,
HeaderLinkPaginator,
JSONResponsePaginator,
JSONLinkPaginator,
JSONResponseCursorPaginator,
SinglePagePaginator,
PageNumberPaginator,
Expand Down Expand Up @@ -156,7 +156,7 @@ def header_links_detector(response: Response) -> Tuple[HeaderLinkPaginator, floa
return None, None


def json_links_detector(response: Response) -> Tuple[JSONResponsePaginator, float]:
def json_links_detector(response: Response) -> Tuple[JSONLinkPaginator, float]:
dictionary = response.json()
next_path_parts, next_href = find_next_page_path(dictionary)

Expand All @@ -166,7 +166,7 @@ def json_links_detector(response: Response) -> Tuple[JSONResponsePaginator, floa
try:
urlparse(next_href)
if next_href.startswith("http") or next_href.startswith("/"):
return JSONResponsePaginator(next_url_path=".".join(next_path_parts)), 1.0
return JSONLinkPaginator(next_url_path=".".join(next_path_parts)), 1.0
except Exception:
pass

Expand Down
20 changes: 16 additions & 4 deletions dlt/sources/helpers/rest_client/paginators.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any
from urllib.parse import urlparse, urljoin
Expand Down Expand Up @@ -422,7 +423,7 @@ class BaseNextUrlPaginator(BaseReferencePaginator):
Subclasses should implement the `update_state` method to extract the next
page URL and set the `_next_reference` attribute accordingly.
See `HeaderLinkPaginator` and `JSONResponsePaginator` for examples.
See `HeaderLinkPaginator` and `JSONLinkPaginator` for examples.
"""

def update_request(self, request: Request) -> None:
Expand Down Expand Up @@ -491,7 +492,7 @@ def __str__(self) -> str:
return super().__str__() + f": links_next_key: {self.links_next_key}"


class JSONResponsePaginator(BaseNextUrlPaginator):
class JSONLinkPaginator(BaseNextUrlPaginator):
"""Locates the next page URL within the JSON response body. The key
containing the URL can be specified using a JSON path.
Expand All @@ -511,12 +512,12 @@ class JSONResponsePaginator(BaseNextUrlPaginator):
The link to the next page (`https://api.example.com/items?page=2`) is
located in the 'next' key of the 'pagination' object. You can use
`JSONResponsePaginator` to paginate through the API endpoint:
`JSONLinkPaginator` to paginate through the API endpoint:
from dlt.sources.helpers.rest_client import RESTClient
client = RESTClient(
base_url="https://api.example.com",
paginator=JSONResponsePaginator(next_url_path="pagination.next")
paginator=JSONLinkPaginator(next_url_path="pagination.next")
)
@dlt.resource
Expand Down Expand Up @@ -547,6 +548,17 @@ def __str__(self) -> str:
return super().__str__() + f": next_url_path: {self.next_url_path}"


class JSONResponsePaginator(JSONLinkPaginator):
def __init__(self) -> None:
warnings.warn(
"JSONResponsePaginator is deprecated and will be removed in version 1.0.0. Use"
" JSONLinkPaginator instead.",
DeprecationWarning,
stacklevel=2,
)
super().__init__()


class JSONResponseCursorPaginator(BaseReferencePaginator):
"""Uses a cursor parameter for pagination, with the cursor value found in
the JSON response body.
Expand Down
4 changes: 2 additions & 2 deletions docs/website/docs/dlt-ecosystem/verified-sources/rest_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator

{
"path": "posts",
"paginator": JSONResponsePaginator(
"paginator": JSONLinkPaginator(
next_url_path="pagination.next"
),
}
Expand All @@ -400,7 +400,7 @@ These are the available paginators:

| `type` | Paginator class | Description |
| ------------ | -------------- | ----------- |
| `json_response` | [JSONResponsePaginator](../../general-usage/http/rest-client.md#jsonresponsepaginator) | The link to the next page is in the body (JSON) of the response.<br/>*Parameters:*<ul><li>`next_url_path` (str) - the JSONPath to the next page URL</li></ul> |
| `json_link` | [JSONLinkPaginator](../../general-usage/http/rest-client.md#jsonresponsepaginator) | The link to the next page is in the body (JSON) of the response.<br/>*Parameters:*<ul><li>`next_url_path` (str) - the JSONPath to the next page URL</li></ul> |
| `header_link` | [HeaderLinkPaginator](../../general-usage/http/rest-client.md#headerlinkpaginator) | The links to the next page are in the response headers.<br/>*Parameters:*<ul><li>`link_header` (str) - the name of the header containing the links. Default is "next".</li></ul> |
| `offset` | [OffsetPaginator](../../general-usage/http/rest-client.md#offsetpaginator) | The pagination is based on an offset parameter. With total items count either in the response body or explicitly provided.<br/>*Parameters:*<ul><li>`limit` (int) - the maximum number of items to retrieve in each request</li><li>`offset` (int) - the initial offset for the first request. Defaults to `0`</li><li>`offset_param` (str) - the name of the query parameter used to specify the offset. Defaults to "offset"</li><li>`limit_param` (str) - the name of the query parameter used to specify the limit. Defaults to "limit"</li><li>`total_path` (str) - a JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset`</li><li>`maximum_offset` (int) - optional maximum offset value. Limits pagination even without total count</li></ul> |
| `page_number` | [PageNumberPaginator](../../general-usage/http/rest-client.md#pagenumberpaginator) | The pagination is based on a page number parameter. With total pages count either in the response body or explicitly provided.<br/>*Parameters:*<ul><li>`initial_page` (int) - the starting page number. Defaults to `0`</li><li>`page_param` (str) - the query parameter name for the page number. Defaults to "page"</li><li>`total_path` (str) - a JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page`</li><li>`maximum_page` (int) - optional maximum page number. Stops pagination once this page is reached</li></ul> |
Expand Down
8 changes: 4 additions & 4 deletions docs/website/docs/general-usage/http/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ Note that we do not explicitly specify the pagination parameters in the example.
```py
import dlt
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator
from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator

github_client = RESTClient(
base_url="https://pokeapi.co/api/v2",
paginator=JSONResponsePaginator(next_url_path="next"), # (1)
data_selector="results", # (2)
paginator=JSONLinkPaginator(next_url_path="next"), # (1)
data_selector="results", # (2)
)

@dlt.resource
Expand All @@ -86,6 +86,6 @@ print(load_info)
```

In the example above:
1. We create a `RESTClient` instance with the base URL of the API: in this case, the [PokéAPI](https://pokeapi.co/). We also specify the paginator to use explicitly: `JSONResponsePaginator` with the `next_url_path` set to `"next"`. This tells the paginator to look for the next page URL in the `next` key of the JSON response.
1. We create a `RESTClient` instance with the base URL of the API: in this case, the [PokéAPI](https://pokeapi.co/). We also specify the paginator to use explicitly: `JSONLinkPaginator` with the `next_url_path` set to `"next"`. This tells the paginator to look for the next page URL in the `next` key of the JSON response.
2. In `data_selector` we specify the JSON path to extract the data from the response. This is used to extract the data from the response JSON.
3. By default the number of items per page is limited to 20. We override this by specifying the `limit` parameter in the API call.
22 changes: 11 additions & 11 deletions docs/website/docs/general-usage/http/rest-client.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
title: RESTClient
description: Learn how to use the RESTClient class to interact with RESTful APIs
keywords: [api, http, rest, request, extract, restclient, client, pagination, json, response, data_selector, session, auth, paginator, jsonresponsepaginator, headerlinkpaginator, offsetpaginator, jsonresponsecursorpaginator, queryparampaginator, bearer, token, authentication]
keywords: [api, http, rest, request, extract, restclient, client, pagination, json, response, data_selector, session, auth, paginator, JSONLinkPaginator, headerlinkpaginator, offsetpaginator, jsonresponsecursorpaginator, queryparampaginator, bearer, token, authentication]
---

The `RESTClient` class offers an interface for interacting with RESTful APIs, including features like:
Expand All @@ -16,13 +16,13 @@ This guide shows how to use the `RESTClient` class to read data from APIs, focus
```py
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.auth import BearerTokenAuth
from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator
from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator

client = RESTClient(
base_url="https://api.example.com",
headers={"User-Agent": "MyApp/1.0"},
auth=BearerTokenAuth(token="your_access_token_here"), # type: ignore
paginator=JSONResponsePaginator(next_url_path="pagination.next"),
paginator=JSONLinkPaginator(next_url_path="pagination.next"),
data_selector="data",
session=MyCustomSession()
)
Expand Down Expand Up @@ -111,17 +111,17 @@ Each `PageData` instance contains the data for a single page, along with context

Paginators are used to handle paginated responses. The `RESTClient` class comes with built-in paginators for common pagination mechanisms:

- [JSONResponsePaginator](#jsonresponsepaginator) - link to the next page is included in the JSON response.
- [JSONLinkPaginator](#JSONLinkPaginator) - link to the next page is included in the JSON response.
- [HeaderLinkPaginator](#headerlinkpaginator) - link to the next page is included in the response headers.
- [OffsetPaginator](#offsetpaginator) - pagination based on offset and limit query parameters.
- [PageNumberPaginator](#pagenumberpaginator) - pagination based on page numbers.
- [JSONResponseCursorPaginator](#jsonresponsecursorpaginator) - pagination based on a cursor in the JSON response.

If the API uses a non-standard pagination, you can [implement a custom paginator](#implementing-a-custom-paginator) by subclassing the `BasePaginator` class.

#### JSONResponsePaginator
#### JSONLinkPaginator

`JSONResponsePaginator` is designed for APIs where the next page URL is included in the response's JSON body. This paginator uses a JSONPath to locate the next page URL within the JSON response.
`JSONLinkPaginator` is designed for APIs where the next page URL is included in the response's JSON body. This paginator uses a JSONPath to locate the next page URL within the JSON response.

**Parameters:**

Expand All @@ -144,15 +144,15 @@ Suppose the API response for `https://api.example.com/posts` looks like this:
}
```

To paginate this response, you can use the `JSONResponsePaginator` with the `next_url_path` set to `"pagination.next"`:
To paginate this response, you can use the `JSONLinkPaginator` with the `next_url_path` set to `"pagination.next"`:

```py
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator
from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator

client = RESTClient(
base_url="https://api.example.com",
paginator=JSONResponsePaginator(next_url_path="pagination.next")
paginator=JSONLinkPaginator(next_url_path="pagination.next")
)

@dlt.resource
Expand Down Expand Up @@ -625,11 +625,11 @@ and [response](https://docs.python-requests.org/en/latest/api/#requests.Response

```py
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator
from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator

client = RESTClient(
base_url="https://api.example.com",
paginator=JSONResponsePaginator(next_url_path="pagination.next")
paginator=JSONLinkPaginator(next_url_path="pagination.next")
)

for page in client.paginate("/posts"):
Expand Down
16 changes: 8 additions & 8 deletions tests/sources/helpers/rest_client/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from dlt.sources.helpers.rest_client.client import Hooks
from dlt.sources.helpers.rest_client.exceptions import IgnoreResponseException
from dlt.sources.helpers.rest_client.paginators import JSONResponsePaginator, BaseReferencePaginator
from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator, BaseReferencePaginator

from .conftest import DEFAULT_PAGE_SIZE, DEFAULT_TOTAL_PAGES, assert_pagination

Expand Down Expand Up @@ -82,7 +82,7 @@ def test_get_single_resource(self, rest_client):
def test_pagination(self, rest_client: RESTClient):
pages_iter = rest_client.paginate(
"/posts",
paginator=JSONResponsePaginator(next_url_path="next_page"),
paginator=JSONLinkPaginator(next_url_path="next_page"),
)

pages = list(pages_iter)
Expand All @@ -92,15 +92,15 @@ def test_pagination(self, rest_client: RESTClient):
def test_page_context(self, rest_client: RESTClient) -> None:
for page in rest_client.paginate(
"/posts",
paginator=JSONResponsePaginator(next_url_path="next_page"),
paginator=JSONLinkPaginator(next_url_path="next_page"),
):
# response that produced data
assert isinstance(page.response, Response)
# updated request
assert isinstance(page.request, Request)
# make request url should be same as next link in paginator
if page.paginator.has_next_page:
paginator = cast(JSONResponsePaginator, page.paginator)
paginator = cast(JSONLinkPaginator, page.paginator)
assert paginator._next_reference == page.request.url

def test_default_paginator(self, rest_client: RESTClient):
Expand All @@ -112,7 +112,7 @@ def test_default_paginator(self, rest_client: RESTClient):

def test_excplicit_paginator(self, rest_client: RESTClient):
pages_iter = rest_client.paginate(
"/posts", paginator=JSONResponsePaginator(next_url_path="next_page")
"/posts", paginator=JSONLinkPaginator(next_url_path="next_page")
)
pages = list(pages_iter)

Expand All @@ -121,7 +121,7 @@ def test_excplicit_paginator(self, rest_client: RESTClient):
def test_excplicit_paginator_relative_next_url(self, rest_client: RESTClient):
pages_iter = rest_client.paginate(
"/posts_relative_next_url",
paginator=JSONResponsePaginator(next_url_path="next_page"),
paginator=JSONLinkPaginator(next_url_path="next_page"),
)
pages = list(pages_iter)

Expand All @@ -138,7 +138,7 @@ def response_hook(response: Response, *args: Any, **kwargs: Any) -> None:

pages_iter = rest_client.paginate(
"/posts",
paginator=JSONResponsePaginator(next_url_path="next_page"),
paginator=JSONLinkPaginator(next_url_path="next_page"),
hooks=hooks,
)

Expand All @@ -148,7 +148,7 @@ def response_hook(response: Response, *args: Any, **kwargs: Any) -> None:

pages_iter = rest_client.paginate(
"/posts/1/some_details_404",
paginator=JSONResponsePaginator(),
paginator=JSONLinkPaginator(),
hooks=hooks,
)

Expand Down
12 changes: 6 additions & 6 deletions tests/sources/helpers/rest_client/test_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dlt.sources.helpers.rest_client.paginators import (
OffsetPaginator,
PageNumberPaginator,
JSONResponsePaginator,
JSONLinkPaginator,
HeaderLinkPaginator,
SinglePagePaginator,
JSONResponseCursorPaginator,
Expand Down Expand Up @@ -106,7 +106,7 @@
"results": [{"id": 1, "name": "Account 1"}, {"id": 2, "name": "Account 2"}],
},
"expected": {
"type": JSONResponsePaginator,
"type": JSONLinkPaginator,
"records_path": "results",
"next_path": ("next",),
},
Expand All @@ -123,7 +123,7 @@
"page": {"size": 2, "totalElements": 100, "totalPages": 50, "number": 1},
},
"expected": {
"type": JSONResponsePaginator,
"type": JSONLinkPaginator,
"records_path": "_embedded.items",
"next_path": ("_links", "next", "href"),
},
Expand All @@ -145,7 +145,7 @@
},
},
"expected": {
"type": JSONResponsePaginator,
"type": JSONLinkPaginator,
"records_path": "items",
"next_path": ("links", "nextPage"),
},
Expand Down Expand Up @@ -197,7 +197,7 @@
},
},
"expected": {
"type": JSONResponsePaginator,
"type": JSONLinkPaginator,
"records_path": "data",
"next_path": ("links", "next"),
},
Expand Down Expand Up @@ -395,7 +395,7 @@ def test_find_paginator(test_case) -> None:
assert type(paginator) is expected_paginator
if isinstance(paginator, PageNumberPaginator):
assert str(paginator.total_path) == ".".join(test_case["expected"]["total_path"])
if isinstance(paginator, JSONResponsePaginator):
if isinstance(paginator, JSONLinkPaginator):
assert str(paginator.next_url_path) == ".".join(test_case["expected"]["next_path"])
if isinstance(paginator, JSONResponseCursorPaginator):
assert str(paginator.cursor_path) == ".".join(test_case["expected"]["next_path"])
Expand Down
14 changes: 7 additions & 7 deletions tests/sources/helpers/rest_client/test_paginators.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
OffsetPaginator,
PageNumberPaginator,
HeaderLinkPaginator,
JSONResponsePaginator,
JSONLinkPaginator,
JSONResponseCursorPaginator,
)

Expand Down Expand Up @@ -46,7 +46,7 @@ def test_client_pagination(self, rest_client):


@pytest.mark.usefixtures("mock_api_server")
class TestJSONResponsePaginator:
class TestJSONLinkPaginator:
@pytest.mark.parametrize(
"test_case",
[
Expand Down Expand Up @@ -98,9 +98,9 @@ def test_update_state(self, test_case):
next_url_path = test_case["next_url_path"]

if next_url_path is None:
paginator = JSONResponsePaginator()
paginator = JSONLinkPaginator()
else:
paginator = JSONResponsePaginator(next_url_path=next_url_path)
paginator = JSONLinkPaginator(next_url_path=next_url_path)
response = Mock(Response, json=lambda: test_case["response_json"])
paginator.update_state(response)
assert paginator._next_reference == test_case["expected"]["next_reference"]
Expand Down Expand Up @@ -167,14 +167,14 @@ def test_update_state(self, test_case):
],
)
def test_update_request(self, test_case):
paginator = JSONResponsePaginator()
paginator = JSONLinkPaginator()
paginator._next_reference = test_case["next_reference"]
request = Mock(Request, url=test_case["request_url"])
paginator.update_request(request)
assert request.url == test_case["expected"]

def test_no_duplicate_params_on_update_request(self):
paginator = JSONResponsePaginator()
paginator = JSONLinkPaginator()

request = Request(
method="GET",
Expand All @@ -200,7 +200,7 @@ def test_no_duplicate_params_on_update_request(self):
def test_client_pagination(self, rest_client):
pages_iter = rest_client.paginate(
"/posts",
paginator=JSONResponsePaginator(
paginator=JSONLinkPaginator(
next_url_path="next_page",
),
)
Expand Down
Loading

0 comments on commit 04a7b0e

Please sign in to comment.