Skip to content

linkcheck: retain default do-not-follow for redirects #13467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ Features added
* #13332: Add :confval:`doctest_fail_fast` option to exit after the first failed
test.
Patch by Till Hoffmann.
* #13439: linkcheck: Permit warning on every redirect with
* #13439, #13462: linkcheck: Permit warning on every redirect with
``linkcheck_allowed_redirects = {}``.
Patch by Adam Turner.
Patch by Adam Turner and James Addison.

Bugs fixed
----------
Expand Down
5 changes: 5 additions & 0 deletions doc/usage/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3642,6 +3642,7 @@ and which failures and redirects it ignores.

.. confval:: linkcheck_allowed_redirects
:type: :code-py:`dict[str, str]`
:default: :code-py:`{}` (do not follow)

A dictionary that maps a pattern of the source URI
to a pattern of the canonical URI.
Expand All @@ -3655,6 +3656,10 @@ and which failures and redirects it ignores.
It can be useful to detect unexpected redirects when using
:option:`the fail-on-warnings mode <sphinx-build --fail-on-warning>`.

To deny all redirects, configure an empty dictionary (the default).

To follow all redirections, configure a value of :code-py:`None`.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given recent bugreport #13462 about unconfigured linkcheck_allow_redirects reported as unexpectedly None during sphinx-build -b html ... builds: we should check that running sphinx-build -b linkcheck ... for an affected project yields the expected {} default.


Example:

.. code-block:: python
Expand Down
51 changes: 21 additions & 30 deletions sphinx/builders/linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

from sphinx._cli.util.colour import darkgray, darkgreen, purple, red, turquoise
from sphinx.builders.dummy import DummyBuilder
from sphinx.errors import ConfigError
from sphinx.locale import __
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import logging, requests
Expand Down Expand Up @@ -387,7 +386,7 @@ def __init__(
)
self.check_anchors: bool = config.linkcheck_anchors
self.allowed_redirects: dict[re.Pattern[str], re.Pattern[str]]
self.allowed_redirects = config.linkcheck_allowed_redirects or {}
self.allowed_redirects = config.linkcheck_allowed_redirects
self.retries: int = config.linkcheck_retries
self.rate_limit_timeout = config.linkcheck_rate_limit_timeout
self._allow_unauthorized = config.linkcheck_allow_unauthorized
Expand Down Expand Up @@ -722,10 +721,13 @@ def handle_starttag(self, tag: Any, attrs: Any) -> None:
def _allowed_redirect(
url: str, new_url: str, allowed_redirects: dict[re.Pattern[str], re.Pattern[str]]
) -> bool:
return any(
from_url.match(url) and to_url.match(new_url)
for from_url, to_url in allowed_redirects.items()
)
if allowed_redirects is None: # no restrictions configured
return True
else:
return any(
from_url.match(url) and to_url.match(new_url)
for from_url, to_url in allowed_redirects.items()
)


class RateLimit(NamedTuple):
Expand All @@ -750,29 +752,18 @@ def rewrite_github_anchor(app: Sphinx, uri: str) -> str | None:

def compile_linkcheck_allowed_redirects(app: Sphinx, config: Config) -> None:
"""Compile patterns to the regexp objects."""
if config.linkcheck_allowed_redirects is _sentinel_lar:
config.linkcheck_allowed_redirects = None
return
if not isinstance(config.linkcheck_allowed_redirects, dict):
msg = __(
f'Invalid value `{config.linkcheck_allowed_redirects!r}` in '
'linkcheck_allowed_redirects. Expected a dictionary.'
)
raise ConfigError(msg)
allowed_redirects = {}
for url, pattern in config.linkcheck_allowed_redirects.items():
try:
allowed_redirects[re.compile(url)] = re.compile(pattern)
except re.error as exc:
logger.warning(
__('Failed to compile regex in linkcheck_allowed_redirects: %r %s'),
exc.pattern,
exc.msg,
)
config.linkcheck_allowed_redirects = allowed_redirects


_sentinel_lar = object()
if config.linkcheck_allowed_redirects is not None:
allowed_redirects = {}
for url, pattern in config.linkcheck_allowed_redirects.items():
try:
allowed_redirects[re.compile(url)] = re.compile(pattern)
except re.error as exc:
logger.warning(
__('Failed to compile regex in linkcheck_allowed_redirects: %r %s'),
exc.pattern,
exc.msg,
)
config.linkcheck_allowed_redirects = allowed_redirects


def setup(app: Sphinx) -> ExtensionMetadata:
Expand All @@ -784,7 +775,7 @@ def setup(app: Sphinx) -> ExtensionMetadata:
'linkcheck_exclude_documents', [], '', types=frozenset({list, tuple})
)
app.add_config_value(
'linkcheck_allowed_redirects', _sentinel_lar, '', types=frozenset({dict})
'linkcheck_allowed_redirects', None, '', types=frozenset({dict, type(None)})
)
app.add_config_value('linkcheck_auth', [], '', types=frozenset({list, tuple}))
app.add_config_value('linkcheck_request_headers', {}, '', types=frozenset({dict}))
Expand Down
56 changes: 32 additions & 24 deletions tests/test_builders/test_build_linkcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import wsgiref.handlers
from base64 import b64encode
from http.server import BaseHTTPRequestHandler
from io import StringIO
from queue import Queue
from typing import TYPE_CHECKING
from unittest import mock
Expand All @@ -28,7 +27,6 @@
RateLimit,
compile_linkcheck_allowed_redirects,
)
from sphinx.errors import ConfigError
from sphinx.testing.util import SphinxTestApp
from sphinx.util import requests
from sphinx.util._pathlib import _StrPath
Expand Down Expand Up @@ -680,7 +678,7 @@ def check_headers(self):
assert content['status'] == 'working'


def make_redirect_handler(*, support_head: bool) -> type[BaseHTTPRequestHandler]:
def make_redirect_handler(*, support_head: bool = True) -> type[BaseHTTPRequestHandler]:
class RedirectOnceHandler(BaseHTTPRequestHandler):
protocol_version = 'HTTP/1.1'

Expand Down Expand Up @@ -712,16 +710,15 @@ def log_date_time_string(self):
'linkcheck',
testroot='linkcheck-localserver',
freshenv=True,
confoverrides={'linkcheck_allowed_redirects': None},
)
def test_follows_redirects_on_HEAD(app, capsys):
with serve_application(app, make_redirect_handler(support_head=True)) as address:
compile_linkcheck_allowed_redirects(app, app.config)
app.build()
_stdout, stderr = capsys.readouterr()
content = (app.outdir / 'output.txt').read_text(encoding='utf8')
assert content == (
'index.rst:1: [redirected with Found] '
f'http://{address}/ to http://{address}/?redirected=1\n'
)
assert content == ''
assert stderr == textwrap.dedent(
"""\
127.0.0.1 - - [] "HEAD / HTTP/1.1" 302 -
Expand All @@ -735,16 +732,15 @@ def test_follows_redirects_on_HEAD(app, capsys):
'linkcheck',
testroot='linkcheck-localserver',
freshenv=True,
confoverrides={'linkcheck_allowed_redirects': None},
)
def test_follows_redirects_on_GET(app, capsys):
with serve_application(app, make_redirect_handler(support_head=False)) as address:
compile_linkcheck_allowed_redirects(app, app.config)
app.build()
_stdout, stderr = capsys.readouterr()
content = (app.outdir / 'output.txt').read_text(encoding='utf8')
assert content == (
'index.rst:1: [redirected with Found] '
f'http://{address}/ to http://{address}/?redirected=1\n'
)
assert content == ''
assert stderr == textwrap.dedent(
"""\
127.0.0.1 - - [] "HEAD / HTTP/1.1" 405 -
Expand All @@ -755,25 +751,37 @@ def test_follows_redirects_on_GET(app, capsys):
assert app.warning.getvalue() == ''


@pytest.mark.sphinx(
'linkcheck',
testroot='linkcheck-localserver',
freshenv=True,
confoverrides={'linkcheck_allowed_redirects': {}}, # do not follow any redirects
)
def test_warns_redirects_on_GET(app, capsys):
with serve_application(app, make_redirect_handler()) as address:
compile_linkcheck_allowed_redirects(app, app.config)
app.build()
_stdout, stderr = capsys.readouterr()
content = (app.outdir / 'output.txt').read_text(encoding='utf8')
assert content == (
'index.rst:1: [redirected with Found] '
f'http://{address}/ to http://{address}/?redirected=1\n'
)
assert stderr == textwrap.dedent(
"""\
127.0.0.1 - - [] "HEAD / HTTP/1.1" 302 -
127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 -
""",
)
assert len(app.warning.getvalue().splitlines()) == 1


def test_linkcheck_allowed_redirects_config(
make_app: Callable[..., SphinxTestApp], tmp_path: Path
) -> None:
tmp_path.joinpath('conf.py').touch()
tmp_path.joinpath('index.rst').touch()

# ``linkcheck_allowed_redirects = None`` is rejected
warning_stream = StringIO()
with pytest.raises(ConfigError):
make_app(
'linkcheck',
srcdir=tmp_path,
confoverrides={'linkcheck_allowed_redirects': None},
warning=warning_stream,
)
assert strip_escape_sequences(warning_stream.getvalue()).splitlines() == [
"WARNING: The config value `linkcheck_allowed_redirects' has type `NoneType'; expected `dict'."
]

# ``linkcheck_allowed_redirects = {}`` is permitted
app = make_app(
'linkcheck',
Expand Down